In [70]:
import py3Dmol

pdb_file = "input/AF-O06917-F1-model_v4.pdb"

with open(pdb_file, "r") as f:
    pdb_data = f.read()

view = py3Dmol.view(width=800, height=600)

view.addModel(pdb_data, "pdb")

view.setStyle({"cartoon": {"color": "spectrum"}})
view.zoomTo()

view.show()

In [69]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
import numpy as np
import plotly.graph_objects as go

def parse_pdb(file_path):
    coordinates = []
    with open(file_path, 'r') as pdb_file:
        for line in pdb_file:
            if line.startswith('ATOM'):
                x, y, z = float(line[30:38]), float(line[38:46]), float(line[46:54])
                coordinates.append([x, y, z])
    return np.array(coordinates)


def generate_views(coordinates):
    view1 = coordinates
    view2 = np.random.permutation(coordinates)
    view3 = np.flip(coordinates, axis=1)
    return view1, view2, view3

def train_model(input_data, output_data):
    X_train, X_test, y_train, y_test = train_test_split(input_data, output_data, test_size=0.2, random_state=42)
    scaler = MinMaxScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    model = Sequential()
    model.add(Dense(512, input_dim=X_train_scaled.shape[1], activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.3))
    model.add(Dense(512, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.3))
    model.add(Dense(256, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.3))
    model.add(Dense(3, activation='linear'))

    optimizer = Adam(lr=0.001)
    model.compile(loss='mean_squared_error', optimizer=optimizer)
    model.fit(X_train_scaled, y_train, epochs=200, batch_size=32, validation_split=0.1)

    loss = model.evaluate(X_test_scaled, y_test)
    print("Mean Squared Error on Test Data:", loss)

    return model, scaler

def reconstruct_structure(model, scaler, views):
    reconstructed_views = []
    for view in views:
        scaled_view = scaler.transform(view)
        reconstructed_view = model.predict(scaled_view)
        reconstructed_views.append(reconstructed_view)
    return np.array(reconstructed_views)

def save_pdb(output_path, coordinates):
    with open(output_path, 'w') as pdb_file:
        for i, coord in enumerate(coordinates):
            pdb_file.write(f"ATOM  {i+1:4}  CA  ALA A   1     {coord[0]:7.3f} {coord[1]:7.3f} {coord[2]:7.3f}  1.00  0.00           C\n")

def visualize_structure(coordinates, color_variable):
    fig = go.Figure()

    scatter = go.Scatter3d(
        x=coordinates[:, 0],
        y=coordinates[:, 1],
        z=coordinates[:, 2],
        mode='markers',
        marker=dict(size=5, color=color_variable, colorscale='Viridis', opacity=0.8),
        name='Protein Structure'
    )

    fig.add_trace(scatter)

    fig.update_layout(scene=dict(
        xaxis_title='X',
        yaxis_title='Y',
        zaxis_title='Z'
    ))

    fig.show()

def main():
    input_pdb_path = "input/AF-O06917-F1-model_v4.pdb"
    output_pdb_path = 'output/reconstructed_protein.pdb'
    original_coordinates = parse_pdb(input_pdb_path)

    view1, view2, view3 = generate_views(original_coordinates)

    input_data = np.vstack([view1, view2, view3])
    output_data = np.tile(original_coordinates, (3, 1))
    model, scaler = train_model(input_data, output_data)

    reconstructed_views = reconstruct_structure(model, scaler, [view1, view2, view3])

    color_variable = np.arange(len(np.mean(reconstructed_views, axis=0)))
    visualize_structure(np.mean(reconstructed_views, axis=0), color_variable)
    
    save_pdb(output_pdb_path, np.mean(reconstructed_views, axis=0))

if __name__ == "__main__":
    main()




Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78