In [1]:
import numpy as np
import tensorflow as tf

from getConfig import getConfig
from splitData import splitData
from buildModel import buildModel
from loadData import loadData
from prepData import prepData
from hyperParms import getParms
import utils

In [2]:
def setLabels(enzymeLength, non_enzymeLength):
    # Add the one-hot encoded label. 1 means "enzyme"; 0 means "not an enzyme"
    labels = np.concatenate([np.ones([enzymeLength, 1]), np.zeros([non_enzymeLength, 1])], axis=0).flatten()
    return tf.keras.utils.to_categorical(labels, num_classes=2)

In [3]:
def loadParms(x):
    ''' Load a dictionary with the hyperparameter combinations for one run '''
    d = {}
    d['l1Size']      = x[0]
    d['l2Size']      = x[1]
    d['l1Activation']= x[2]
    d['l2Activation']= x[3]
    d['batchSize']   = x[4]
    d['lr']          = x[5]
    d['std']         = x[6]
    d['dropout']     = x[7]
    d['optimizer']   = x[8]
    return d

In [4]:
def printResults(model, results):
    print("{:<8}{:<8}{}".format("parm#", "Loss", "Accuracy"))
    for x in range(len(results)):
        print("{:<8}{:<10.3}{:<10.3}".format(x, results[x][1], results[x][2]))

In [5]:
config = getConfig()

In [6]:
modelNames = loadData(config, 'Pfam_model_names_list.pickle')
enzyme_features = loadData(config, 'Pfam_name_list_new_data.pickle')
non_enzyme_features = loadData(config, 'Pfam_name_list_non_enzyme.pickle')

In [7]:
#utils.confirmNames(enzyme_features, modelNames)
#utils.getRandom(modelNames)
#utils.confirmNames(non_enzyme_features, modelNames)
#utils.getRandom(enzyme_features)

In [8]:
enzyme_features = prepData(enzyme_features, modelNames)
non_enzyme_features = prepData(non_enzyme_features, modelNames)

# This will be the input data with zeros and ones in various columns
# Each column corresponds to an enzyme or protein
features = np.concatenate([enzyme_features, non_enzyme_features], axis=0)

labels = setLabels(len(enzyme_features), len(non_enzyme_features))
assert (len(labels) == len(features)), "length mismatch between features and labels"

In [9]:
del modelNames
del enzyme_features
del non_enzyme_features

In [10]:
x_train, x_test, y_train, y_test = splitData(features, labels, config)

In [11]:
parms = getParms()

epochs = 2
numFeatures = features.shape[1]

In [None]:
results = []
count = 1
    
#start_time = time.time()
print("\n{} parameter combinations".format(len(parms)))
print("\n{:<10}{}".format("Test loss","Test accuracy"))
    
for x in parms:
    parmDict = loadParms(x)
    model = buildModel(numFeatures, parmDict)
    model.compile(loss='categorical_crossentropy',
              optimizer=tf.keras.optimizers.Adam(),
              metrics=['accuracy'])
    history = model.fit(x_train, y_train,
                        batch_size=parmDict['batchSize'],
                        epochs=epochs,
                        verbose=0,
                        validation_data=(x_test, y_test))
    #print('model fit')
    score = model.evaluate(x_test, y_test, verbose=0)
    tup = (x, score[0], score[1])
    results.append(tup)

printResults(model, results)


8 parameter combinations

Test loss Test accuracy


In [None]:
#parms[6]