In [4]:
import os as os

import numpy as np

import tensorflow as tf

from sklearn.model_selection import train_test_split

In [5]:
def convertLabel(sequence): #gives a nb to the aminoacid
    label = sequence[7]
    aminoacidList = ["ALA", "ARG", "ASN", "ASP", "CYS", "GLU", "GLN", "GLY", "HIS", "ILE",
                    "LEU", "LYS", "MET", "PHE", "PRO", "PYL", "SEL", "SER", "THR", "TRP", "TYR", "VAL"]
    for i in range (0, 22):
        if label == aminoacidList[i]:
            return i

In [6]:
filename = "matrices1.csv"
def load_data():


    labelList = []

    with open(filename) as f:

        nbOfSamples = int(f.readline())
        i = 0

        shapes = f.readline() # length of sequence + nb of features
        input_shape = int(shapes.split(",")[1])


        distancesList = np.zeros((nbOfSamples, input_shape))
        labelList = np.zeros(nbOfSamples)


        while i < nbOfSamples:
            # Extracting labels
            sequence = f.readline().split(",")
            labelList[i] = convertLabel(sequence)

            # Extracting distances
            distancesString = f.readline()
            distancesArray = np.array([float(s) for s in distancesString.split(",")])
            distancesList[i, :] = distancesArray

            # empty line
            f.readline()

            i += 1
    return input_shape, distancesList, labelList

In [16]:
def create_model():
    # activation function: leaky ReLU
    leakyrelu = lambda x: tf.keras.activations.relu(x, alpha=0.01, max_value=None, threshold=0)

    # creation du reseau de neurones
    model = tf.keras.models.Sequential([

        # hidden layer
        tf.keras.layers.Dense(units = 105, activation = leakyrelu, input_shape = (105,)),
      #  keras.layers.Dropout(rate=0.2),
        tf.keras.layers.Dense(units = 210, activation = leakyrelu),
       # keras.layers.Dropout(rate=0.2),
        tf.keras.layers.Dense(units = 210, activation = leakyrelu),
        #keras.layers.Dropout(rate=0.2),
        tf.keras.layers.Dense(units = 210, activation = leakyrelu),
        tf.keras.layers.Dense(units = 100, activation = leakyrelu),
        #keras.layers.Dropout(rate=0.2),
        tf.keras.layers.Dense(units = 75, activation = leakyrelu),
        #keras.layers.Dropout(rate=0.2),
        tf.keras.layers.Dense(units = 53, activation = leakyrelu),
        #keras.layers.Dropout(rate=0.2),

        # final layer
        tf.keras.layers.Dense(units = 22, activation = tf.nn.softmax),
    ])

    model.compile(optimizer=tf.keras.optimizers.Adam(),
                  loss=tf.keras.losses.sparse_categorical_crossentropy,
                  metrics=['accuracy'])

    return model

In [17]:
# Data processing
input_shape, distances, labels = load_data()


x_train, x_test, y_train, y_test = train_test_split(
    distances, labels, test_size=0.2, shuffle = True, random_state=2)

In [18]:
# Create checkppoints

checkpoint_path = "checkpoints/checkpoint.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)

cp_callback = tf.keras.callbacks.ModelCheckpoint(
    checkpoint_path, save_weights_only=True, verbose=1,
    # save weights, every 5-epoch
    period=5)

latest = tf.train.latest_checkpoint(checkpoint_dir)

In [19]:
# create a model instance

model = create_model()

if latest: model.load_weights(latest)

In [21]:
# training
model.fit(x_train, y_train,callbacks = [cp_callback], epochs = 1) # save checkpoints at the end of each epoch



<tensorflow.python.keras.callbacks.History at 0x128bd7be0>

In [23]:
# saving the model

def freeze_session(session, keep_var_names=None, output_names=None, clear_devices=True):
    """
    Freezes the state of a session into a pruned computation graph.

    Creates a new computation graph where variable nodes are replaced by
    constants taking their current value in the session. The new graph will be
    pruned so subgraphs that are not necessary to compute the requested
    outputs are removed.
    @param session The TensorFlow session to be frozen.
    @param keep_var_names A list of variable names that should not be frozen,
                          or None to freeze all the variables in the graph.
    @param output_names Names of the relevant graph outputs.
    @param clear_devices Remove the device directives from the graph for better portability.
    @return The frozen graph definition.
    """
    graph = session.graph
    with graph.as_default():
        freeze_var_names = list(set(v.op.name for v in tf.global_variables()).difference(keep_var_names or []))
        output_names = output_names or []
        output_names += [v.op.name for v in tf.global_variables()]
        input_graph_def = graph.as_graph_def()
        if clear_devices:
            for node in input_graph_def.node:
                node.device = ''
        frozen_graph = tf.graph_util.convert_variables_to_constants(
            session, input_graph_def, output_names, freeze_var_names)
        return frozen_graph

    
frozen_graph = freeze_session(tf.keras.backend.get_session(), output_names=[out.op.name for out in model.outputs])
tf.train.write_graph(frozen_graph, './', 'single_input_model.pb', as_text=False)

Instructions for updating:
Use tf.compat.v1.graph_util.convert_variables_to_constants
Instructions for updating:
Use tf.compat.v1.graph_util.extract_sub_graph
INFO:tensorflow:Froze 85 variables.
INFO:tensorflow:Converted 85 variables to const ops.


'./single_input_model.pb'

In [9]:
val_loss, val_acc = model.evaluate(x_test, y_test)
print(x_test, val_acc)

[[ 3.78681  7.10552 10.4359  ...  3.81986  6.62413  3.79397]
 [ 3.77235  7.05716 10.2457  ...  3.79955  7.03971  3.79758]
 [ 3.85262  7.44246  8.57222 ...  3.79685  5.32892  3.79353]
 ...
 [ 3.80673  6.76573 10.1454  ...  3.77637  6.56263  3.77654]
 [ 3.81168  6.17607  9.75549 ...  3.80904  5.39576  3.80871]
 [ 3.79908  6.36971 10.0598  ...  3.78432  6.93845  3.7861 ]] 0.8436911495616432


In [10]:
Y = model.predict(x_test)
print(Y, Y[1, :])

[[5.3969073e-01 6.7140195e-06 6.2557554e-04 ... 1.8102363e-04
  6.8735884e-05 5.2358825e-02]
 [9.9583793e-01 1.4883305e-03 3.1931300e-08 ... 1.0190373e-08
  1.4338251e-05 1.3657890e-05]
 [2.6435481e-07 4.2474437e-11 7.2808199e-09 ... 6.5229406e-12
  2.4000888e-06 6.0425198e-09]
 ...
 [3.4980474e-15 4.0880919e-13 4.5192414e-03 ... 4.3407101e-05
  2.1701706e-10 1.1670590e-09]
 [1.2702364e-07 9.9999452e-01 1.9789296e-27 ... 1.0001491e-26
  3.9817233e-10 1.2133797e-18]
 [5.4997608e-06 4.6032314e-06 2.4186775e-10 ... 6.9361796e-09
  9.9953015e-09 1.6927938e-11]] [9.9583793e-01 1.4883305e-03 3.1931300e-08 7.7083848e-07 6.0922861e-10
 2.1184855e-03 2.5168242e-06 6.3595849e-05 4.4813729e-04 5.3293110e-08
 5.8160530e-09 9.3065555e-06 2.9085532e-12 8.1820787e-08 2.4497677e-07
 5.7204244e-17 8.9311928e-17 2.7664664e-06 1.7836626e-08 1.0190373e-08
 1.4338251e-05 1.3657890e-05]


In [11]:
def max_index(A):
    index = 0
    a = A[0]
    for i in range(len(A)):
        if (A[i] > a):
            index = i
            a = A[i]
    return index

In [12]:
Y1 = []
for i in range(len(Y)):
    Y1.append(max_index(Y[i, :]))

In [13]:
print(Y1, y_test, Y1-y_test)

[0, 0, 6, 2, 7, 5, 17, 7, 20, 19, 21, 5, 20, 1, 3, 1, 18, 6, 10, 1, 2, 18, 11, 0, 9, 7, 18, 19, 20, 0, 7, 21, 19, 6, 1, 0, 12, 7, 21, 0, 7, 6, 17, 0, 11, 18, 20, 19, 0, 17, 7, 7, 21, 0, 11, 0, 7, 19, 17, 2, 9, 7, 0, 21, 0, 2, 13, 5, 7, 0, 9, 0, 18, 6, 2, 7, 17, 7, 2, 7, 11, 11, 20, 9, 0, 10, 6, 1, 17, 7, 9, 17, 21, 12, 17, 12, 0, 21, 2, 2, 1, 6, 7, 17, 3, 13, 0, 1, 21, 2, 21, 2, 10, 0, 10, 13, 13, 7, 7, 7, 7, 19, 3, 10, 0, 12, 18, 0, 9, 3, 13, 6, 5, 17, 2, 7, 20, 2, 6, 7, 3, 3, 0, 12, 18, 11, 21, 2, 6, 17, 7, 21, 11, 5, 21, 12, 21, 2, 9, 0, 6, 21, 0, 11, 17, 7, 7, 7, 20, 7, 2, 7, 8, 20, 5, 5, 21, 17, 20, 1, 7, 2, 17, 7, 21, 17, 3, 5, 19, 21, 18, 10, 17, 1, 18, 2, 7, 21, 17, 3, 14, 2, 2, 9, 1, 3, 11, 19, 21, 9, 6, 2, 2, 9, 9, 20, 6, 1, 3, 11, 18, 17, 7, 5, 17, 9, 10, 5, 7, 3, 17, 6, 5, 18, 6, 10, 11, 17, 13, 7, 2, 17, 5, 18, 9, 21, 14, 6, 12, 3, 7, 2, 9, 1, 3, 18, 20, 9, 13, 3, 14, 13, 0, 20, 13, 3, 1, 2, 21, 21, 17, 18, 17, 5, 7, 13, 2, 18, 1, 17, 1, 7, 3, 9, 3, 5, 7, 0, 10, 0, 18, 0, 