In [1]:
import sys
import json
import numpy as np
import pandas as pd

print ("Importing modules...", end=' ')
import modules
# print ("Done")

##################################################

print ("Reading data from disk...", end=' ')
sys.stdout.flush()

df = pd.read_parquet('./datasets/Metal_all_20180601.parquet')
seqs = np.array(df.sequence)
metals = np.array(df.ligandId)
fingerprints = np.array(df.fingerprint)

print ("Done")

##################################################

print ("Using FOFE encoder...", end=' ')
sys.stdout.flush()
metal_dict = {}
with open("./dictionaries/metal_dict", 'r') as fp:
        metal_dict = json.load(fp)
        
num_to_metal = {e: k for k, e in metal_dict.items()}
print ("Done")

##################################################
print("Loading metal_predictor...", end=' ')

from keras.models import model_from_json
# load json and create model
json_file = open('./models/metal_predict.json', 'r')
loaded_model_json = json_file.read()
json_file.close()

metal_predictor = model_from_json(loaded_model_json)
# load weights into new model
metal_predictor.load_weights("./models/metal_predict.h5")
print ("Done")

##################################################
factor = 2.33
def threshold_func(y_in, factor):
    y_out = np.zeros_like(y_in)
    for i in range(y_in.shape[0]):
        th= np.mean(y_in[i]) + factor * np.std(y_in[i])
        y_out[i] = (y_in[i] > th)
    return y_out

print ("Threshold factor set to " + str(factor))
print ("##################################################")

Importing modules... 

Using TensorFlow backend.


Reading data from disk... Done
Using FOFE encoder... Done
Loading metal_predictor... Done
Threshold factor set to 2.33
##################################################


In [16]:
choice = np.random.randint(58207)
print ("Choose index [" + str(choice) + "]")

Choose index [46049]


In [18]:
print ("##################################################")
print ("The seuqnce is [" + seqs[choice][:15] + "...]\n")
metal_out = metal_predictor.predict(modules.FOFE(seqs[choice]))

max_index = np.argmax(metal_out)
metal = num_to_metal[max_index]

print ("This sample is binded to [" + metal + "]")
print ("            Ground truth [" + metals[choice] + "]\n")

json_file = open('./models/' + metal + '.json', 'r')
loaded_model_json = json_file.read()
json_file.close()

MBS_predictor = model_from_json(loaded_model_json)
# load weights into new model
MBS_predictor.load_weights('./models/' + metal + '.h5')
# print ("Loaded " + metal + " predictor")

MBS_out = MBS_predictor.predict(modules.FOFE(seqs[choice]))
# print (MBS_out)
MBS_OneHot = threshold_func(MBS_out, factor)
MBS = [np.where(e==1)[0] for e in MBS_OneHot][0]
print ("This sample has [", end="")
print (*MBS, sep=",", end="")
print ("] binding sites")
print ("   Ground truth [", end="")
print (*fingerprints[choice], sep=",", end="")
print ("]")

##################################################
The seuqnce is [MGGSHHHHHHRSEST...]

This sample is binded to [MN]
            Ground truth [MN]

This sample has [570,571] binding sites
   Ground truth [570,571,962]
