In [19]:
import os as os

import numpy as np

import tensorflow.keras as keras
import tensorflow.train as train
import tensorflow.nn as nn

from sklearn.model_selection import train_test_split

In [20]:
def convertLabel(aminoAcid): #gives a nb to the aminoacid
    aminoacidList = ["ALA", "ARG", "ASN", "ASP", "CYS", "GLU", "GLN", "GLY", "HIS", "ILE",
                    "LEU", "LYS", "MET", "PHE", "PRO", "PYL", "SEL", "SER", "THR", "TRP", "TYR", "VAL"]
    for i in range (0, 22):
        if aminoAcid == aminoacidList[i]:
            return i

In [21]:
filename = 'matrices1.csv'
def load_data(filename):


    labelList = []

    with open(filename) as f:

        nbOfSamples = int(f.readline())
        i = 0

        shapes = f.readline() # length of sequence + nb of features
        input_shape = int(shapes.split(",")[1])

        # inputs
        distancesList = np.zeros((nbOfSamples, input_shape))
        previousResiduesList = np.zeros((nbOfSamples, 6))
        
        # outputs
        labelList = np.zeros(nbOfSamples)
        


        while i < nbOfSamples:
            # Extracting labels
            sequence = f.readline().split(",")
            labelList[i] = convertLabel(sequence[7])
            previousResiduesList[i, :] = np.array([convertLabel(sequence[i]) for i in range(0, 6)])

            # Extracting distances
            distancesString = f.readline()
            distancesArray = np.array([float(s) for s in distancesString.split(",")])
            distancesList[i, :] = distancesArray

            # empty line
            f.readline()

            i += 1
    return input_shape, distancesList, previousResiduesList, labelList

In [22]:
def create_model():
    # activation function: leaky ReLU
    leakyrelu = lambda x: keras.activations.relu(x, alpha=0.01, max_value=None, threshold=0)

    # distances branch
    distancesBranch = keras.models.Sequential([
        keras.layers.Dense(units = 105, activation = leakyrelu, input_shape = (105,)),
        #keras.layers.Dropout(rate=0.2),
        keras.layers.Dense(units = 210, activation = leakyrelu),
        #keras.layers.Dropout(rate=0.2),
        keras.layers.Dense(units = 210, activation = leakyrelu),
        #keras.layers.Dropout(rate=0.2),
        keras.layers.Dense(units = 100, activation = leakyrelu),
       # keras.layers.Dropout(rate=0.2),
        keras.layers.Dense(units = 75, activation = leakyrelu),
        #keras.layers.Dropout(rate=0.2),
        keras.layers.Dense(units = 53, activation = leakyrelu),
        #keras.layers.Dropout(rate=0.2)
    ])
    
    # residues branch   
    residuesBranch = keras.models.Sequential([
        keras.layers.Dense(units = 6, activation = leakyrelu, input_shape = (6,))
    ])
    
    # concatenation
    combinedInput = keras.layers.concatenate([distancesBranch.output, residuesBranch.output])

    x = keras.layers.Dense(units = 30, activation = leakyrelu, input_shape = (59,))(combinedInput)
    x = keras.layers.Dense(units = 22, activation = nn.softmax)(x)
    
    model = keras.models.Model(inputs = [distancesBranch.input, residuesBranch.input], outputs = x)

    model.compile(optimizer=keras.optimizers.Adam(),
                  loss=keras.losses.sparse_categorical_crossentropy,
                  metrics=['accuracy'])

    return model

In [23]:
# Data processing
input_shape, distances, previousResidues, output = load_data(filename)


distances_train, distances_test, residues_train, residues_test, y_train, y_test = train_test_split(
    distances, previousResidues, output, test_size=0.2, shuffle = False, random_state=2)

In [24]:
# Create checkppoints

checkpoint_path = "checkpoints/checkpoint.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)

cp_callback = keras.callbacks.ModelCheckpoint(
    checkpoint_path, save_weights_only=True, verbose=1,
    # save weights, every 5-epoch
    period=5)

latest = train.latest_checkpoint(checkpoint_dir)

In [25]:
# create a model instance

model = create_model()

if latest: model.load_weights(latest)

In [26]:
# training
model.fit([distances_train, residues_train], y_train, epochs = 170, callbacks = [cp_callback]) # save checkpoints at the end of each epoch

model.save('my_model.h5')

a, b = model.evaluate([distances_test, residues_test], y_test)
print(a, b)

Epoch 1/170
Epoch 2/170
Epoch 3/170
Epoch 4/170
Epoch 5/170
Epoch 00005: saving model to checkpoints/checkpoint.ckpt

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 6/170
Epoch 7/170
Epoch 8/170
Epoch 9/170
Epoch 10/170
Epoch 00010: saving model to checkpoints/checkpoint.ckpt

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 11/170
Epoch 12/170
Epoch 13/170
Epoch 14/170
Epoch 15/170
Epoch 00015: saving model to checkpoints/checkpoint.ckpt

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 16/170
Epoch 17/170
Epoch 18/170
Epoch 19/170
Epoch 20/170
Epoch 00020: saving model to checkpoints/checkpoint.ckpt

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 21/170
Epoch 22/170
Epoch 23/170
Epoch 24/170
Epoch 25/170
Epoch 00025: saving model to checkpoints/checkpoint.ckpt

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 26/170
Epoch 27/170
Epoch 28/170
Epoch 29/170
Epoch 30/170
Epoch 00030: saving model to checkpoints/checkpoint.ck

Epoch 41/170
Epoch 42/170
Epoch 43/170
Epoch 44/170
Epoch 45/170
Epoch 00045: saving model to checkpoints/checkpoint.ckpt

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 46/170
Epoch 47/170
Epoch 48/170
Epoch 49/170
Epoch 50/170
Epoch 00050: saving model to checkpoints/checkpoint.ckpt

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 51/170
Epoch 52/170
Epoch 53/170
Epoch 54/170
Epoch 55/170
Epoch 00055: saving model to checkpoints/checkpoint.ckpt

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 56/170
Epoch 57/170
Epoch 58/170
Epoch 59/170
Epoch 60/170
Epoch 00060: saving model to checkpoints/checkpoint.ckpt

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 61/170
Epoch 62/170
Epoch 63/170
Epoch 64/170
Epoch 65/170
Epoch 00065: saving model to checkpoints/checkpoint.ckpt

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 66/170
Epoch 67/170
Epoch 68/170
Epoch 69/170
Epoch 70/170
Epoch 00070: saving model to checkpoints/chec

Epoch 81/170
Epoch 82/170
Epoch 83/170
Epoch 84/170
Epoch 85/170
Epoch 00085: saving model to checkpoints/checkpoint.ckpt

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 86/170
Epoch 87/170
Epoch 88/170
Epoch 89/170
Epoch 90/170
Epoch 00090: saving model to checkpoints/checkpoint.ckpt

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 91/170
Epoch 92/170
Epoch 93/170
Epoch 94/170
Epoch 95/170
Epoch 00095: saving model to checkpoints/checkpoint.ckpt

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 96/170
Epoch 97/170
Epoch 98/170
Epoch 99/170
Epoch 100/170
Epoch 00100: saving model to checkpoints/checkpoint.ckpt

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 101/170
Epoch 102/170
Epoch 103/170
Epoch 104/170
Epoch 105/170
Epoch 00105: saving model to checkpoints/checkpoint.ckpt

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 106/170
Epoch 107/170
Epoch 108/170
Epoch 109/170
Epoch 110/170
Epoch 00110: saving model to check

Epoch 121/170
Epoch 122/170
Epoch 123/170
Epoch 124/170
Epoch 125/170
Epoch 00125: saving model to checkpoints/checkpoint.ckpt

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 126/170
Epoch 127/170
Epoch 128/170
Epoch 129/170
Epoch 130/170
Epoch 00130: saving model to checkpoints/checkpoint.ckpt

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 131/170
Epoch 132/170
Epoch 133/170
Epoch 134/170
Epoch 135/170
Epoch 00135: saving model to checkpoints/checkpoint.ckpt

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 136/170
Epoch 137/170
Epoch 138/170
Epoch 139/170
Epoch 140/170
Epoch 00140: saving model to checkpoints/checkpoint.ckpt

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 141/170
Epoch 142/170
Epoch 143/170
Epoch 144/170
Epoch 145/170
Epoch 00145: saving model to checkpoints/checkpoint.ckpt

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 146/170
Epoch 147/170
Epoch 148/170
Epoch 149/170
Epoch 150/170
Epoch 00150: sa

Epoch 161/170
Epoch 162/170
Epoch 163/170
Epoch 164/170
Epoch 165/170
Epoch 00165: saving model to checkpoints/checkpoint.ckpt

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 166/170
Epoch 167/170
Epoch 168/170
Epoch 169/170
Epoch 170/170
Epoch 00170: saving model to checkpoints/checkpoint.ckpt

Consider using a TensorFlow optimizer from `tf.train`.
0.40098974888832106 0.8870056491562651


In [27]:
filename1 = "matrices.csv"
input_shape1, distances1, previousResidues1, output1 = load_data(filename1)

distances_train1, distances_test1, residues_train1, residues_test1, y_train1, y_test1 = train_test_split(
    distances1, previousResidues1, output1, test_size=0.2, shuffle = True, random_state=42)

In [28]:
a1, b1 = model.evaluate([distances1, previousResidues1], output1)
print(a1, b1)

0.3791608504835062 0.8886158886647049
