In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Concatenate, Flatten, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications import ResNet50
from sklearn.model_selection import train_test_split

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd


In [None]:
df = pd.read_parquet('/content/drive/MyDrive/9mers.parquet')


In [None]:
df.shape

(112977, 3)

In [None]:
X_protein = np.array(df['protein_sequences'].tolist())


In [None]:
def preprocess_phi_psi(angles):
    try:
        flattened = [item for sublist in angles for item in sublist]
        return np.array(flattened, dtype=float).reshape(-1, 2)  # Reshape to maintain pair structure
    except Exception as e:
        print(f"Error processing phi-psi angles: {e}")
        return np.array([]).reshape(-1, 2)

phi_psi_features = df['phi_psi_angles'].apply(preprocess_phi_psi)
phi_psi_features = np.array([f for f in phi_psi_features if f.size > 0])

In [None]:
scaler_phi_psi = StandardScaler()
phi_psi_features = scaler_phi_psi.fit_transform(phi_psi_features.reshape(-1, 2)).reshape(phi_psi_features.shape)


In [None]:
def preprocess_3d_coords(coord_data):
    scaler = MinMaxScaler()
    preprocessed_coords = []

    for structure in coord_data:
        scaled_structure = []

        for coords in structure:
            coords = np.array(coords)
            coords = coords.reshape(-1, 1)
            scaled_coords = scaler.fit_transform(coords).flatten()
            scaled_structure.append(scaled_coords)

        scaled_structure = np.array(scaled_structure)
        preprocessed_coords.append(scaled_structure)

    return preprocessed_coords

In [None]:
coords_features = preprocess_3d_coords(df['3d_coordinates'].tolist())
coords_features_reshaped = np.expand_dims(coords_features, axis=-1)

In [None]:
coords_features_reshaped = np.array(coords_features)
if len(coords_features_reshaped.shape) == 4:
    coords_features_reshaped = coords_features_reshaped.reshape(coords_features_reshaped.shape[0], coords_features_reshaped.shape[1], -1)


In [None]:
phi_psi_features_reshaped = np.array(phi_psi_features)
if len(phi_psi_features_reshaped.shape) == 3:
    phi_psi_features_reshaped = phi_psi_features_reshaped.reshape(phi_psi_features_reshaped.shape[0], 1, -1)


In [None]:
combined_features = np.concatenate([coords_features_reshaped, phi_psi_features], axis=-1)


In [None]:
X_train, X_test, y_train, y_test = train_test_split(combined_features, X_protein, test_size=0.3, random_state=42)

In [None]:
import tensorflow as tf
input_3d = Input(shape=(9, 9, 1), name='3d_coordinates_input')
input_torsion = Input(shape=(9, 2), name='torsion_angles_input')

In [None]:
input_3d_3_channels = tf.keras.layers.Concatenate()([input_3d, input_3d, input_3d])

In [None]:
resnet_base = ResNet50(weights='imagenet', include_top=False, input_tensor=input_3d_3_channels)
x_3d = Flatten()(resnet_base.output)

In [None]:
x_torsion = Dense(64, activation='relu')(input_torsion)
x_torsion = Flatten()(x_torsion)

In [None]:
combined = Concatenate()([x_3d, x_torsion])

In [None]:
x = Dense(512, activation='relu')(combined)
x = Dropout(0.3)(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.3)(x)
x = Dense(128, activation='relu')(x)
x = Dropout(0.3)(x)

In [None]:
output = Dense(9, activation='linear')(x)
model = Model(inputs=[input_3d, input_torsion], outputs=output)
model.compile(optimizer='adam', loss='mae', metrics=['accuracy'])


In [None]:
model.summary()


In [None]:
history = model.fit(
    [X_train[..., :-2], X_train[..., -2:]],
    y_train,
    epochs=50,
    batch_size=32,
    validation_split=0.2
)


Epoch 1/50
[1m1978/1978[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m150s[0m 43ms/step - accuracy: 0.1144 - loss: 4.8121 - val_accuracy: 0.1245 - val_loss: 66.2359
Epoch 2/50
[1m1978/1978[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 27ms/step - accuracy: 0.1165 - loss: 4.5292 - val_accuracy: 0.1314 - val_loss: 5.0150
Epoch 3/50
[1m1978/1978[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 28ms/step - accuracy: 0.1189 - loss: 4.4682 - val_accuracy: 0.1131 - val_loss: 4.3914
Epoch 4/50
[1m1978/1978[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 28ms/step - accuracy: 0.1141 - loss: 4.4519 - val_accuracy: 0.1151 - val_loss: 4.3807
Epoch 5/50
[1m1978/1978[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 28ms/step - accuracy: 0.1125 - loss: 4.4274 - val_accuracy: 0.1106 - val_loss: 4.3915
Epoch 6/50
[1m1978/1978[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 27ms/step - accuracy: 0.1123 - loss: 4.4277 - val_accuracy: 0.1132 - val_loss: 4.3880
Ep

In [None]:
test_loss, test_accuracy = model.evaluate([X_test[..., :-2], X_test[..., -2:]], y_test)


[1m1060/1060[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 8ms/step - accuracy: 0.1111 - loss: 4.3451


In [None]:
model.save('/content/drive/MyDrive/9mers_dense/my_model.h5')


In [None]:
from tensorflow.keras.models import load_model
from tensorflow.keras.losses import MeanAbsoluteError
import numpy as np
batch_size=1
model=load_model('/content/drive/MyDrive/9mers_dense/9mers_resnet.h5',compile=False)
model.compile(optimizer='adam', loss=MeanAbsoluteError(), metrics=['accuracy'])

FileNotFoundError: [Errno 2] Unable to synchronously open file (unable to open file: name = '/content/drive/MyDrive/9mers_dense/9mers_resnet.h5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)

In [None]:
import numpy as np
coords_features_reshaped = np.random.random((batch_size, 9, 9, 1))
phi_psi_features = np.random.random((batch_size, 9, 2))

In [None]:
print(coords_features_reshaped)
print(phi_psi_features)

[[[[6.36767762e-01]
   [6.63538881e-01]
   [6.83189496e-01]
   [9.91244226e-02]
   [8.76291579e-01]
   [4.42906613e-01]
   [8.02631389e-01]
   [5.55219533e-01]
   [9.58787414e-01]]

  [[3.80701041e-01]
   [3.71203889e-02]
   [7.59986738e-01]
   [3.55566809e-02]
   [9.21596049e-02]
   [4.54235682e-01]
   [8.64489122e-01]
   [3.14122982e-01]
   [6.10560561e-01]]

  [[7.87796942e-01]
   [4.68994164e-01]
   [9.40595176e-01]
   [8.73630820e-01]
   [4.41148553e-02]
   [2.50941770e-01]
   [3.51097233e-01]
   [5.27805114e-01]
   [7.80324418e-01]]

  [[5.51518071e-01]
   [4.66539214e-01]
   [5.25337339e-01]
   [1.75208997e-01]
   [6.52401510e-02]
   [3.78626052e-01]
   [3.79151396e-02]
   [8.08535221e-01]
   [2.07542365e-01]]

  [[9.46418848e-02]
   [8.73559662e-01]
   [3.04332330e-01]
   [1.09326159e-02]
   [4.81955574e-01]
   [5.41666518e-01]
   [3.60944202e-01]
   [9.68609909e-01]
   [5.35436684e-01]]

  [[6.32875841e-01]
   [7.93894780e-01]
   [9.35285548e-01]
   [7.69946086e-01]
   [7.6539

In [None]:
predicted_protein_sequence = model.predict([coords_features_reshaped, phi_psi_features])
print("Predicted Protein Sequence:", predicted_protein_sequence)
print("Predicted Protein Sequence Shape:", predicted_protein_sequence.shape)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
Predicted Protein Sequence: [[ 6.461937   6.621698   8.6538315  6.7519717  6.0028462  7.168259
   8.338591   9.1311035 11.583356 ]]
Predicted Protein Sequence Shape: (1, 9)


In [None]:
rounded_predictions = np.round(predicted_protein_sequence).astype(int)
print("Rounded predictions:", rounded_predictions)
index_to_amino_acid = {
    0: 'A', 1: 'C', 2: 'D', 3: 'E', 4: 'F', 5: 'G',
    6: 'H', 7: 'I', 8: 'K', 9: 'L', 10: 'M', 11: 'N'
}
mapped_sequence = ''.join([index_to_amino_acid.get(idx, 'X') for idx in rounded_predictions[0]])
print("Predicted Protein Sequence:", mapped_sequence)

In [None]:
import tensorflow as tf
print(tf.__version__)


2.17.0


In [None]:
model.save('/content/drive/MyDrive/9mers_dense/9mers_resnet_mae.h5')


