In [56]:
import os as os

import tensorflow as tf

import numpy as np

import shutil as shutil

from sklearn.model_selection import train_test_split

## Data processing

In [13]:
def convertLabel(label): #gives a nb to the aminoacid
    aminoacidList = ["ALA", "ARG", "ASN", "ASP", "CYS", "GLU", "GLN", "GLY", "HIS", "ILE",
                    "LEU", "LYS", "MET", "PHE", "PRO", "PYL", "SEL", "SER", "THR", "TRP", "TYR", "VAL"]
    for i in range (0, 22):
        if label == aminoacidList[i]:
            return i
    print("ERROR")

In [40]:
def load_data(filename):


    labelList = []

    with open(filename) as f:

        nbOfSamples = int(f.readline())
        i = 0

        shapes = f.readline() # length of sequence + nb of features
        input_shape = int(shapes.split(",")[1])

        # inputs
        distancesList = np.zeros((nbOfSamples, input_shape))
        previousResidues = np.zeros((nbOfSamples, 7, 22))
        
        # outputs
        labelList = np.zeros(nbOfSamples)


        while i < nbOfSamples:
            # Extracting labels
            sequence = f.readline().split(",")
            labelList[i] = convertLabel(sequence[7])
            for residue in range(0, 7):
                previousResidues[i, residue, convertLabel(sequence[residue])] = 1

            # Extracting distances
            distancesString = f.readline()
            distancesArray = np.array([float(s) for s in distancesString.split(",")])
            distancesList[i, :] = distancesArray

            # empty line
            f.readline()

            i += 1
    return input_shape, distancesList, previousResidues, labelList

## Creating models

### Single input model

In [15]:
def single_input_model():
    # activation function: leaky ReLU
    leakyrelu = lambda x: tf.keras.activations.relu(x, alpha=0.01, max_value=None, threshold=0)

    # creation du reseau de neurones
    model = tf.keras.models.Sequential([

        # hidden layer
        tf.keras.layers.Dense(units = 105, activation = leakyrelu, input_shape = (105,)),
        tf.keras.layers.Dropout(rate=0.2),
        tf.keras.layers.Dense(units = 210, activation = leakyrelu),
        tf.keras.layers.Dense(units = 400, activation = leakyrelu),
        tf.keras.layers.Dropout(rate=0.2),
        tf.keras.layers.Dense(units = 210, activation = leakyrelu),
        tf.keras.layers.Dense(units = 100, activation = leakyrelu),
        tf.keras.layers.Dense(units = 75, activation = leakyrelu),
        tf.keras.layers.Dense(units = 53, activation = leakyrelu),

        # final layer
        tf.keras.layers.Dense(units = 22, activation = tf.nn.softmax),
    ])

    model.compile(optimizer=tf.keras.optimizers.Adam(),
                  loss=tf.keras.losses.sparse_categorical_crossentropy,
                  metrics=['accuracy'])

    return model

In [16]:
# Define checkppoints
single_input_checkpoint_path = "Single input model/checkpoints/checkpoint.ckpt"
single_input_checkpoint_dir = os.path.dirname(single_input_checkpoint_path)

single_input_cp_callback = tf.keras.callbacks.ModelCheckpoint(
    single_input_checkpoint_path, save_weights_only=True, verbose=1,
    # save weights, every 10-epoch
    period=10)

single_input_latest = tf.train.latest_checkpoint(single_input_checkpoint_dir)

### Double input model

In [47]:
def double_input_model():
    # activation function: leaky ReLU
    leakyrelu = lambda x: tf.keras.activations.relu(x, alpha=0.01, max_value=None, threshold=0)

    # distances branch
    distancesBranch = tf.keras.models.Sequential([
        tf.keras.layers.Dense(units = 105, activation = leakyrelu, input_shape = (105,)),
        tf.keras.layers.Dense(units = 210, activation = leakyrelu),
        tf.keras.layers.Dense(units = 420, activation = leakyrelu),
        tf.keras.layers.Dropout(rate=0.2),
        tf.keras.layers.Dense(units = 210, activation = leakyrelu),
        tf.keras.layers.Dense(units = 105, activation = leakyrelu),
        tf.keras.layers.Dropout(rate=0.07),
        tf.keras.layers.Dense(units = 50, activation = leakyrelu),
    ])
    
    
    # residues branch   
    residuesBranch = tf.keras.models.Sequential([
        tf.keras.layers.Dense(units = 7, activation = leakyrelu, input_shape = (7,22)),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(units = 50, activation = leakyrelu)
        
    ])
    
    # concatenation
    combinedInput = tf.keras.layers.concatenate([distancesBranch.output, residuesBranch.output])

    x = tf.keras.layers.Dense(units = 30, activation = leakyrelu, input_shape = (100,))(combinedInput)
    x = tf.keras.layers.Dense(units = 22, activation = tf.nn.softmax)(x)
    
    model = tf.keras.models.Model(inputs = [distancesBranch.input, residuesBranch.input], outputs = x)

    model.compile(optimizer=tf.keras.optimizers.Adam(),
                  loss=tf.keras.losses.sparse_categorical_crossentropy,
                  metrics=['accuracy'])

    return model

In [48]:
# Define checkppoints
double_input_checkpoint_path = "Double input model/checkpoints/checkpoint.ckpt"
double_input_checkpoint_dir = os.path.dirname(double_input_checkpoint_path)

double_input_cp_callback = tf.keras.callbacks.ModelCheckpoint(
    double_input_checkpoint_path, save_weights_only=True, verbose=1,
    # save weights, every 50-epoch
    period=50)

double_input_latest = tf.train.latest_checkpoint(double_input_checkpoint_dir)

## Training

### Importing data

In [41]:
# Data processing
input_shape, distances, previousResidues, output = load_data("Data/fullmatrices.csv")


distances_train, distances_test, residues_train, residues_test, y_train, y_test = train_test_split(
    distances, previousResidues, output, test_size=0.2, shuffle = True, random_state=2)

### Training the single input model

#### Creation

In [51]:
single_input_model = single_input_model()

if single_input_latest: single_input_model.load_weights(single_input_latest)

#### Training

In [None]:
single_input_model.fit(distances_train, y_train,
                       validation_data = (distances_test, y_test),
                       shuffle = True,
                       batch_size = 1024,
                       callbacks = [single_input_cp_callback], epochs = 150)

Train on 3264 samples, validate on 817 samples
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 00010: saving model to Single input model/checkpoints/checkpoint.ckpt

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 00020: saving model to Single input model/checkpoints/checkpoint.ckpt

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 00030: saving model to Single input model/checkpoints/checkpoint.ckpt

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 00040: saving model to Single input mode

Epoch 87/500
Epoch 88/500
Epoch 89/500
Epoch 90/500
Epoch 00090: saving model to Single input model/checkpoints/checkpoint.ckpt

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 91/500
Epoch 92/500
Epoch 93/500
Epoch 94/500
Epoch 95/500
Epoch 96/500
Epoch 97/500
Epoch 98/500
Epoch 99/500
Epoch 100/500
Epoch 00100: saving model to Single input model/checkpoints/checkpoint.ckpt

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 101/500
Epoch 102/500
Epoch 103/500
Epoch 104/500
Epoch 105/500
Epoch 106/500
Epoch 107/500
Epoch 108/500
Epoch 109/500
Epoch 110/500
Epoch 00110: saving model to Single input model/checkpoints/checkpoint.ckpt

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 111/500
Epoch 112/500
Epoch 113/500
Epoch 114/500
Epoch 115/500
Epoch 116/500
Epoch 117/500
Epoch 118/500
Epoch 119/500
Epoch 120/500
Epoch 00120: saving model to Single input model/checkpoints/checkpoint.ckpt

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 121

Epoch 129/500
Epoch 130/500
Epoch 00130: saving model to Single input model/checkpoints/checkpoint.ckpt

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 131/500
Epoch 132/500
Epoch 133/500
Epoch 134/500
Epoch 135/500
Epoch 136/500
Epoch 137/500
Epoch 138/500
Epoch 139/500
Epoch 140/500
Epoch 00140: saving model to Single input model/checkpoints/checkpoint.ckpt

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 141/500
Epoch 142/500
Epoch 143/500
Epoch 144/500
Epoch 145/500
Epoch 146/500
Epoch 147/500
Epoch 148/500
Epoch 149/500
Epoch 150/500
Epoch 00150: saving model to Single input model/checkpoints/checkpoint.ckpt

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 151/500
Epoch 152/500
Epoch 153/500
Epoch 154/500
Epoch 155/500
Epoch 156/500
Epoch 157/500
Epoch 158/500
Epoch 159/500
Epoch 160/500
Epoch 00160: saving model to Single input model/checkpoints/checkpoint.ckpt

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 161/500
Epoch 162/

Epoch 171/500
Epoch 172/500
Epoch 173/500
Epoch 174/500
Epoch 175/500
Epoch 176/500
Epoch 177/500
Epoch 178/500
Epoch 179/500
Epoch 180/500
Epoch 00180: saving model to Single input model/checkpoints/checkpoint.ckpt

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 181/500
Epoch 182/500
Epoch 183/500
Epoch 184/500
Epoch 185/500
Epoch 186/500
Epoch 187/500
Epoch 188/500
Epoch 189/500
Epoch 190/500
Epoch 00190: saving model to Single input model/checkpoints/checkpoint.ckpt

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 191/500
Epoch 192/500
Epoch 193/500
Epoch 194/500
Epoch 195/500
Epoch 196/500
Epoch 197/500
Epoch 198/500
Epoch 199/500
Epoch 200/500
Epoch 00200: saving model to Single input model/checkpoints/checkpoint.ckpt

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 201/500
Epoch 202/500
Epoch 203/500
Epoch 204/500
Epoch 205/500
Epoch 206/500
Epoch 207/500
Epoch 208/500
Epoch 209/500
Epoch 210/500
Epoch 00210: saving model to Single input mo

Epoch 213/500
Epoch 214/500
Epoch 215/500
Epoch 216/500
Epoch 217/500
Epoch 218/500
Epoch 219/500
Epoch 220/500
Epoch 00220: saving model to Single input model/checkpoints/checkpoint.ckpt

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 221/500
Epoch 222/500
Epoch 223/500
Epoch 224/500
Epoch 225/500
Epoch 226/500
Epoch 227/500
Epoch 228/500
Epoch 229/500
Epoch 230/500
Epoch 00230: saving model to Single input model/checkpoints/checkpoint.ckpt

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 231/500
Epoch 232/500
Epoch 233/500
Epoch 234/500
Epoch 235/500
Epoch 236/500
Epoch 237/500
Epoch 238/500
Epoch 239/500
Epoch 240/500
Epoch 00240: saving model to Single input model/checkpoints/checkpoint.ckpt

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 241/500
Epoch 242/500
Epoch 243/500
Epoch 244/500
Epoch 245/500
Epoch 246/500
Epoch 247/500
Epoch 248/500
Epoch 249/500
Epoch 250/500
Epoch 00250: saving model to Single input model/checkpoints/checkpoint.c

Epoch 298/500
Epoch 299/500
Epoch 300/500
Epoch 00300: saving model to Single input model/checkpoints/checkpoint.ckpt

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 301/500
Epoch 302/500
Epoch 303/500
Epoch 304/500
Epoch 305/500
Epoch 306/500
Epoch 307/500
Epoch 308/500
Epoch 309/500
Epoch 310/500
Epoch 00310: saving model to Single input model/checkpoints/checkpoint.ckpt

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 311/500
Epoch 312/500
Epoch 313/500
Epoch 314/500
Epoch 315/500
Epoch 316/500
Epoch 317/500
Epoch 318/500
Epoch 319/500
Epoch 320/500
Epoch 00320: saving model to Single input model/checkpoints/checkpoint.ckpt

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 321/500
Epoch 322/500
Epoch 323/500
Epoch 324/500
Epoch 325/500
Epoch 326/500
Epoch 327/500
Epoch 328/500
Epoch 329/500
Epoch 330/500
Epoch 00330: saving model to Single input model/checkpoints/checkpoint.ckpt

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 331/

Epoch 381/500
Epoch 382/500
Epoch 383/500
Epoch 384/500
Epoch 385/500
Epoch 386/500
Epoch 387/500
Epoch 388/500
Epoch 389/500
Epoch 390/500
Epoch 00390: saving model to Single input model/checkpoints/checkpoint.ckpt

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 391/500
Epoch 392/500
Epoch 393/500
Epoch 394/500
Epoch 395/500
Epoch 396/500
Epoch 397/500
Epoch 398/500
Epoch 399/500
Epoch 400/500
Epoch 00400: saving model to Single input model/checkpoints/checkpoint.ckpt

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 401/500
Epoch 402/500
Epoch 403/500
Epoch 404/500
Epoch 405/500
Epoch 406/500
Epoch 407/500
Epoch 408/500
Epoch 409/500
Epoch 410/500
Epoch 00410: saving model to Single input model/checkpoints/checkpoint.ckpt

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 411/500
Epoch 412/500
Epoch 413/500
Epoch 414/500
Epoch 415/500
Epoch 416/500
Epoch 417/500
Epoch 418/500
Epoch 419/500
Epoch 420/500
Epoch 00420: saving model to Single input mo

#### Plot loss function

In [None]:
print(history.history.keys())  
   
 plt.figure(1)  
   
 # summarize history for accuracy  
   
 plt.subplot(211)  
 plt.plot(history.history['accuracy'])  
 plt.plot(history.history['val_accuracy'])  
 plt.title('model accuracy')  
 plt.ylabel('accuracy')  
 plt.xlabel('epoch')  
 plt.legend(['train', 'test'], loc='upper left')


### Training the double input model

#### Creation

In [57]:
double_input_model = double_input_model()

if double_input_latest: double_input_model.load_weights(double_input_latest)

TypeError: __call__() missing 1 required positional argument: 'inputs'

#### Training

In [50]:
history=double_input_model.fit([distances_train, residues_train], y_train,
                       shuffle = True,
                       batch_size = 1024,
                       validation_data = ([distances_test, residues_test], y_test),
                       callbacks = [double_input_cp_callback], epochs = 150)

Train on 3264 samples, validate on 817 samples
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 00050: saving model to Double input model/checkpoints/checkpoint.ckpt

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/50

Epoch 110/500
Epoch 111/500
Epoch 112/500
Epoch 113/500
Epoch 114/500
Epoch 115/500
Epoch 116/500
Epoch 117/500
Epoch 118/500
Epoch 119/500
Epoch 120/500
Epoch 121/500
Epoch 122/500
Epoch 123/500
Epoch 124/500
Epoch 125/500
Epoch 126/500
Epoch 127/500
Epoch 128/500
Epoch 129/500
Epoch 130/500
Epoch 131/500
Epoch 132/500
Epoch 133/500
Epoch 134/500
Epoch 135/500
Epoch 136/500
Epoch 137/500
Epoch 138/500
Epoch 139/500
Epoch 140/500
Epoch 141/500
Epoch 142/500
Epoch 143/500
Epoch 144/500
Epoch 145/500
Epoch 146/500
Epoch 147/500
Epoch 148/500
Epoch 149/500
Epoch 150/500
Epoch 00150: saving model to Double input model/checkpoints/checkpoint.ckpt

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 151/500
Epoch 152/500
Epoch 153/500
Epoch 154/500
Epoch 155/500
Epoch 156/500
Epoch 157/500
Epoch 158/500
Epoch 159/500
Epoch 160/500
Epoch 161/500


Epoch 162/500
Epoch 163/500
Epoch 164/500
Epoch 165/500
Epoch 166/500
Epoch 167/500
Epoch 168/500
Epoch 169/500
Epoch 170/500
Epoch 171/500
Epoch 172/500
Epoch 173/500
Epoch 174/500
Epoch 175/500
Epoch 176/500
Epoch 177/500
Epoch 178/500
Epoch 179/500
Epoch 180/500
Epoch 181/500
Epoch 182/500
Epoch 183/500
Epoch 184/500
Epoch 185/500
Epoch 186/500
Epoch 187/500
Epoch 188/500
Epoch 189/500
Epoch 190/500
Epoch 191/500
Epoch 192/500
Epoch 193/500
Epoch 194/500
Epoch 195/500
Epoch 196/500
Epoch 197/500
Epoch 198/500
Epoch 199/500
Epoch 200/500
Epoch 00200: saving model to Double input model/checkpoints/checkpoint.ckpt

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 201/500
Epoch 202/500
Epoch 203/500
 224/3264 [=>............................] - ETA: 0s - loss: 5.3409e-05 - acc: 1.0000

KeyboardInterrupt: 

#### Plot loss function

In [None]:
print(history.history.keys())  
   
 plt.figure(1)  
   
 # summarize history for accuracy  
   
 plt.subplot(211)  
 plt.plot(history.history['accuracy'])  
 plt.plot(history.history['val_accuracy'])  
 plt.title('model accuracy')  
 plt.ylabel('accuracy')  
 plt.xlabel('epoch')  
 plt.legend(['train', 'test'], loc='upper left')
