In [18]:
import os as os

import tensorflow as tf

from sklearn.model_selection import train_test_split

## Data processing

In [66]:
def convertLabel(label): #gives a nb to the aminoacid
    aminoacidList = ["ALA", "ARG", "ASN", "ASP", "CYS", "GLU", "GLN", "GLY", "HIS", "ILE",
                    "LEU", "LYS", "MET", "PHE", "PRO", "PYL", "SEL", "SER", "THR", "TRP", "TYR", "VAL"]
    for i in range (0, 22):
        if label == aminoacidList[i]:
            return i
    print("ERROR")

In [67]:
def load_data(filename):


    labelList = []

    with open(filename) as f:

        nbOfSamples = int(f.readline())
        i = 0

        shapes = f.readline() # length of sequence + nb of features
        input_shape = int(shapes.split(",")[1])

        # inputs
        distancesList = np.zeros((nbOfSamples, input_shape))
        previousResiduesList = np.zeros((nbOfSamples, 7))
        
        # outputs
        labelList = np.zeros(nbOfSamples)
        


        while i < nbOfSamples:
            # Extracting labels
            sequence = f.readline().split(",")
            labelList[i] = convertLabel(sequence[8])
            previousResiduesList[i, :] = np.array([convertLabel(sequence[i]) for i in range(0, 7)])

            # Extracting distances
            distancesString = f.readline()
            distancesArray = np.array([float(s) for s in distancesString.split(",")])
            distancesList[i, :] = distancesArray

            # empty line
            f.readline()

            i += 1
    return input_shape, distancesList, previousResiduesList, labelList

## Creating models

### Single input model

In [68]:
def single_input_model():
    # activation function: leaky ReLU
    leakyrelu = lambda x: tf.keras.activations.relu(x, alpha=0.01, max_value=None, threshold=0)

    # creation du reseau de neurones
    model = tf.keras.models.Sequential([

        # hidden layer
        tf.keras.layers.Dense(units = 105, activation = leakyrelu, input_shape = (105,)),
      #  keras.layers.Dropout(rate=0.2),
        tf.keras.layers.Dense(units = 210, activation = leakyrelu),
       # keras.layers.Dropout(rate=0.2),
        tf.keras.layers.Dense(units = 210, activation = leakyrelu),
        #keras.layers.Dropout(rate=0.2),
        tf.keras.layers.Dense(units = 210, activation = leakyrelu),
        tf.keras.layers.Dense(units = 100, activation = leakyrelu),
        #keras.layers.Dropout(rate=0.2),
        tf.keras.layers.Dense(units = 75, activation = leakyrelu),
        #keras.layers.Dropout(rate=0.2),
        tf.keras.layers.Dense(units = 53, activation = leakyrelu),
        #tf.keras.layers.Dropout(rate=0.2),

        # final layer
        tf.keras.layers.Dense(units = 22, activation = tf.nn.softmax),
    ])

    model.compile(optimizer=tf.keras.optimizers.Adam(),
                  loss=tf.keras.losses.sparse_categorical_crossentropy,
                  metrics=['accuracy'])

    return model

In [69]:
# Define checkppoints
single_input_checkpoint_path = "Single input model/checkpoints/checkpoint.ckpt"
single_input_checkpoint_dir = os.path.dirname(single_input_checkpoint_path)

single_input_cp_callback = tf.keras.callbacks.ModelCheckpoint(
    single_input_checkpoint_path, save_weights_only=True, verbose=1,
    # save weights, every 10-epoch
    period=10)

single_input_latest = tf.train.latest_checkpoint(single_input_checkpoint_dir)

### Double input model

In [82]:
def double_input_model():
    # activation function: leaky ReLU
    leakyrelu = lambda x: tf.keras.activations.relu(x, alpha=0.01, max_value=None, threshold=0)

    # distances branch
    distancesBranch = tf.keras.models.Sequential([
        tf.keras.layers.Dense(units = 105, activation = leakyrelu, input_shape = (105,)),
        #tf.keras.layers.Dropout(rate=0.2),
        tf.keras.layers.Dense(units = 210, activation = leakyrelu),
        #tf.keras.layers.Dropout(rate=0.2),
        tf.keras.layers.Dense(units = 210, activation = leakyrelu),
        #tf.keras.layers.Dropout(rate=0.2),
        tf.keras.layers.Dense(units = 100, activation = leakyrelu),
       # tf.keras.layers.Dropout(rate=0.2),
        tf.keras.layers.Dense(units = 75, activation = leakyrelu),
        #tf.keras.layers.Dropout(rate=0.2),
        tf.keras.layers.Dense(units = 53, activation = leakyrelu),
        #tf.keras.layers.Dropout(rate=0.2)
    ])
    
    # residues branch   
    residuesBranch = tf.keras.models.Sequential([
        tf.keras.layers.Dense(units = 7, activation = leakyrelu, input_shape = (7,))
    ])
    
    # concatenation
    combinedInput = tf.keras.layers.concatenate([distancesBranch.output, residuesBranch.output])

    x = tf.keras.layers.Dense(units = 30, activation = leakyrelu, input_shape = (59,))(combinedInput)
    x = tf.keras.layers.Dense(units = 22, activation = tf.nn.softmax)(x)
    
    model = tf.keras.models.Model(inputs = [distancesBranch.input, residuesBranch.input], outputs = x)

    model.compile(optimizer=tf.keras.optimizers.Adam(),
                  loss=tf.keras.losses.sparse_categorical_crossentropy,
                  metrics=['accuracy'])

    return model

In [71]:
# Define checkppoints
double_input_checkpoint_path = "Double input model/checkpoints/checkpoint.ckpt"
double_input_checkpoint_dir = os.path.dirname(double_input_checkpoint_path)

double_input_cp_callback = tf.keras.callbacks.ModelCheckpoint(
    double_input_checkpoint_path, save_weights_only=True, verbose=1,
    # save weights, every 10-epoch
    period=10)

double_input_latest = tf.train.latest_checkpoint(double_input_checkpoint_dir)

## Saving models

In [72]:
def freeze_session(session, keep_var_names=None, output_names=None, clear_devices=True):
    """
    Freezes the state of a session into a pruned computation graph.

    Creates a new computation graph where variable nodes are replaced by
    constants taking their current value in the session. The new graph will be
    pruned so subgraphs that are not necessary to compute the requested
    outputs are removed.
    @param session The TensorFlow session to be frozen.
    @param keep_var_names A list of variable names that should not be frozen,
                          or None to freeze all the variables in the graph.
    @param output_names Names of the relevant graph outputs.
    @param clear_devices Remove the device directives from the graph for better portability.
    @return The frozen graph definition.
    """
    graph = session.graph
    with graph.as_default():
        freeze_var_names = list(set(v.op.name for v in tf.global_variables()).difference(keep_var_names or []))
        output_names = output_names or []
        output_names += [v.op.name for v in tf.global_variables()]
        input_graph_def = graph.as_graph_def()
        if clear_devices:
            for node in input_graph_def.node:
                node.device = ''
        frozen_graph = tf.graph_util.convert_variables_to_constants(
            session, input_graph_def, output_names, freeze_var_names)
        return frozen_graph

## Training

### Importing data

In [73]:
# Data processing
input_shape, distances, previousResidues, output = load_data("Data/matrices.csv")


distances_train, distances_test, residues_train, residues_test, y_train, y_test = train_test_split(
    distances, previousResidues, output, test_size=0.2, shuffle = False, random_state=2)

### Training the single input model

#### Creation

In [60]:
single_input_sess = tf.Session()
keras.backend.set_session(sess)

single_input_model = single_input_model()

if single_input_latest: single_input_model.load_weights(single_input_latest)

#### Training

In [61]:
single_input_model.fit(distances_train, y_train,
                       validation_data = (distances_test, y_test),
                       callbacks = [single_input_cp_callback], epochs = 20)

Train on 976 samples, validate on 245 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 00010: saving model to Single input model/checkpoints/checkpoint.ckpt

Consider using a TensorFlow optimizer from `tf.train`.



Consider using a TensorFlow optimizer from `tf.train`.


Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 00020: saving model to Single input model/checkpoints/checkpoint.ckpt

Consider using a TensorFlow optimizer from `tf.train`.



Consider using a TensorFlow optimizer from `tf.train`.




<tensorflow.python.keras.callbacks.History at 0x12a8b2be0>

#### Saving

In [85]:
single_input_model.save("Single input model/single_input_model.h5")

AttributeError: 'function' object has no attribute 'save'

### Training the double input model

#### Creation

In [83]:


double_input_model = double_input_model()

if double_input_latest: double_input_model.load_weights(double_input_latest)

#### Training

In [84]:
double_input_model.fit([distances_train, residues_train], y_train,
                       validation_data = ([distances_test, residues_test], y_test),
                       callbacks = [double_input_cp_callback], epochs = 20)

Train on 976 samples, validate on 245 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 00010: saving model to Double input model/checkpoints/checkpoint.ckpt

Consider using a TensorFlow optimizer from `tf.train`.



Consider using a TensorFlow optimizer from `tf.train`.


Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 00020: saving model to Double input model/checkpoints/checkpoint.ckpt

Consider using a TensorFlow optimizer from `tf.train`.



Consider using a TensorFlow optimizer from `tf.train`.




<tensorflow.python.keras.callbacks.History at 0x12c3c5400>

#### Saving

In [None]:
frozen_graph = freeze_session(tf.keras.backend.get_session(), output_names=[out.op.name for out in model.outputs])
tf.train.write_graph(frozen_graph, './', "Double input model/double_input_model.pb", as_text=False)