# Imports

In [1]:
import numpy as np
import pandas as pd
import pickle
from IPython.display import display, clear_output
import re
# from tqdm import tqdm
import tensorflow as tf

# tf.enable_eager_execution()
# tf.executing_eagerly()
from tensorflow.python.ops.numpy_ops import np_config
np_config.enable_numpy_behavior()


import string
import random
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split

#import matplotlib.pyplot as plt
#import seaborn as sns
#!pip install scikit-optimize
# import skopt
# from skopt import BayesSearchCV
# from skopt import gp_minimize, forest_minimize
# from skopt.space import Real, Categorical, Integer
# from skopt.plots import plot_convergence
# from skopt.plots import plot_objective, plot_evaluations
# from skopt.plots import plot_histogram, plot_objective_2D
# from skopt.utils import use_named_args

In [2]:
global encode_dict 
encode_dict = {l:i for i,l in enumerate(string.ascii_uppercase + " ", 1)}

In [3]:
#Maximum sequence length including padding
global MAXLEN
MAXLEN = 65

# Functions

In [4]:
# TEST_TXT = "Eirmod horrida ingénii pariant secundum? Cognitionem compositis conséquat dicantur exercitus, intellegitur invenire negat oportet sapientium suam. Ceteris diu erat fecerit, impéndéré intelleges máerores malorum mei re reprehendunt? Constringendos intus mentitum quale urna! Convenire cotidie dixit malé vigiliae?"

In [5]:
def preprocessInput(filename: str) -> pd.DataFrame:
    ''' Preprocess CSV file into a Pandas DataFrame.
    
    Expects the file name or path of a csv file with named columns containing strings representing product names.
    Returns a Pandas Dataframe containing uppercased versions of the strings on each cell.
    
    Parameters
    ----------
    filename : str
        
    Returns
    -------
    df : Pandas DataFrame
    '''  
    df = pd.read_csv(filename)
    print(df.info())
    
    print("Processing file: ---------------------------------------")
    original_count = len(df.index)
    print("Dropping sequences longer than the maxlen:")
    for column in df.columns:
        df.drop(df[df[column].apply(len).gt(MAXLEN)].index, inplace = True)
    new_count = len(df.index)
    print("\tDropped", original_count - new_count, "that exceeded the maximum sequence length.")
    # Uppercase all values
    print("\tUppercasing string sequences.")
    df = df.applymap(lambda x: str.upper(x))
    print("Done processing: --------------------------------------")
    print(df.info())
    return df

In [6]:
def add_noise(w: str, percent: float = 0.1) -> str:
    ''' Adds a specified proportion of noise to a string.
    
    Expects a string and a number stating the percent of noise to add to this string.
    The string is modified by editing, deleting, or adding characters in/to the string.
    The modification to perform is determined randomly by generating a random number from an uniform distribution [0,1].
    If the number is < 1/3 edit one position with new random character.
    If the number is < 2/3 delete one position.
    Finally, if the number is > 2/3 add one random character. 
    
    Parameters
    ----------
    w : str
        The string to add noise to.
    
    percent: float, defaults to 10% if not specified
        Percentange representing the proportion of noise to add to the string.
        
        
    Returns
    -------
    w : str
        Modified string with noise added.
    '''  
    positions = random.choices(range(len(w)), k=int(percent*len(w)))
    print("Adding noise to", int(percent*len(w)), "% of the string")
    for p in positions:
        r = random.uniform(0,1)
        
        # if <1/3 edit one position with new random character, # else if <2/3 delete one position, else add one random character 
        if r <= 0.3333: # edit
            w = w[:p] + random.choice(string.ascii_uppercase) + w[p+1:]
        elif r<= 0.6667: # delete
            w = w[:p] + w[p+1:]
        else: # add
            w = w[:p] + random.choice(string.ascii_uppercase) + w[p:]
    return w

In [7]:
# add_noise(TEST_TXT)

In [8]:
# add_noise(TEST_TXT, .01)

In [9]:
def clean(text: str) -> str:
    '''Removes all the non-ascii and special characters from a string and returns the string's alphabetichal characters with spaces.
    
    Expects a string to be cleaned and removes all the non-ascii and special characters. 
    This is done by applying a substitution to regex matches
    Returns the cleaned string containing uppercased versions of the characters.
    
    Parameters
    ----------
    text : str
        
    Returns
    -------
    text : str
    '''
    regex = re.compile('[^a-zA-Z ]')
    r = regex.sub('', text)
    result = re.sub(' +', ' ', r)
    result = result.strip()
    return result.upper()

In [10]:
# clean(TEST_TXT)

In [11]:
def clean_dataset(x: pd.Series, y: pd.Series) -> (pd.Series, pd.Series):
    '''Applies the cleaning function to the dataset.
    
    Expects two Pandas Series, namely the 'FAERS_drug_match' and the 'lookup_value' columns.
    Applies the cleaning function to them and returns them separately.
    
    Parameters
    ----------
    x : pd.Series
        A pandas Series containing the 'FAERS_drug_match' column.
    y : pd.Series
        A pandas Series containing the 'lookup_value' column.
      
    Returns
    -------
    x : pd.Series
        Returns the cleaned 'FAERS_drug_match' series.
    y : pd.Series 
        Returns the cleaned 'lookup_value' series.
    '''
    return x.apply(clean), y.apply(clean)

In [12]:
# clean_dataset(test)

In [13]:
def encode_dataset(x: pd.Series, y: pd.Series) -> (pd.Series, pd.Series):
    '''Applies the encoding function to the dataset.
    
    Expects two cleaned Pandas Series, namely the 'FAERS_drug_match' and the 'lookup_value' columns.
    Returns these Series enconded into an array containing an integer mapping to each character and space (1-66) separately.
    
    Parameters
    ----------
    x : pd.Series
        A pandas Series containing the clean 'FAERS_drug_match' column.
    y : pd.Series
        A pandas Series containing the clean 'lookup_value' column.
      
    Returns
    -------
    x : pd.Series
        Returns the encoded 'FAERS_drug_match' series.
    y : pd.Series 
        Returns the encoded 'lookup_value' series.
    '''
    return x.apply(lambda string: list(map(encode_dict.get, string))), y.apply(lambda string: list(map(encode_dict.get, string)))

In [14]:
# encode_dataset(*clean_dataset(test))[0].head()

In [15]:
# def clean_encode_padding(q, maxlen):
#     q = clean(q)
#     return tf.keras.preprocessing.sequence.pad_sequences(
#         [encode_dict[m] for m in q] , padding="post", maxlen=maxlen)

In [16]:
def padding_dataset(X: pd.Series,Y: pd.Series, maxlen: int = MAXLEN) -> (pd.Series, pd.Series):
    '''Applies the padding function to the dataset.
    
    Expects two cleaned and encoded Pandas Series, namely the 'FAERS_drug_match' and the 'lookup_value' columns.
    Returns the enconded Series padded.
    
    Parameters
    ----------
    x : pd.Series
        A pandas Series containing the clean encoded 'FAERS_drug_match' column.
    y : pd.Series
        A pandas Series containing the clean encoded 'lookup_value' column.
      
    Returns
    -------
    x : pd.Series
        Returns the padded 'FAERS_drug_match' series.
    y : pd.Series 
        Returns the padded 'lookup_value' series.
    '''
    return X.transform(lambda x: x + ([0]* (maxlen-len(x)))), Y.transform(lambda x: x + ([0]* (maxlen-len(x))))
#     return tf.keras.preprocessing.sequence.pad_sequences(X, padding="post", maxlen=maxlen), tf.keras.preprocessing.sequence.pad_sequences(Y, padding="post", maxlen=maxlen)

In [17]:
# padding_dataset(*encode_dataset(*clean_dataset(test)), MAXLEN)[0].head()

In [18]:
def generate_negative_pairs() -> (pd.Series, pd.Series):
    '''Create negative pairs where 'FAERS_drug_match' does not match the correct 'lookup_value'.

    For each unique name in the 'FAERS_drug_match' column of the train set, get the product name
    and then pick four random different product names. For each of those 4 additional product names 
    check if it matches any of the names in the training set if its not then add it to the dataset as 
    a negative pair. The goal of this is to help further distance the embeddings in the vector space.

    
    Parameters
    ----------
    The function has no parameters but it expects a Pandas dataframe called Unique_df
    containing the 'dUnique_label' series and another Pandas dataframe called train
    containing the 'FAERS_drug_match' and the 'lookup_value' series.

    Returns
    -------
    faers_match : pd.Series
        Returns the 'FAERS_drug_match' series.
    lookup : pd.Series 
        Returns the 'lookup_value' series.
    '''
    faers_match = []
    lookup = []
    for np_name in train['FAERS_drug_match']:
        np_temp = dUnique_df['dUnique_label'][dUnique_df['dUnique_label'] != np_name].sample(4)
        np_temp = np_temp[~np_temp.isin(train['lookup_value'].loc[train['FAERS_drug_match'] == np_name])]     
        faers_match.extend([np_name]* len(np_temp))
        lookup.extend(np_temp)
    return faers_match, lookup

------------------------------------------------

# Data loading and preprocessing

In [19]:
fName = '../data/NP_FAERS_mapped_20220215.csv'
fName_unmapped = '../unmapped_data/upper_unmap_orig_drug_names_202201201812.csv'
fName_negatives = '../data/NP_FAERS_negative_pairs_20220222.csv'

## Create the train/test split

In [20]:
train, test = train_test_split(preprocessInput(fName), test_size=0.20, random_state = 42)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5358 entries, 0 to 5357
Data columns (total 2 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   FAERS_drug_match  5358 non-null   object
 1   lookup_value      5358 non-null   object
dtypes: object(2)
memory usage: 83.8+ KB
None
Processing file: ---------------------------------------
Dropping sequences longer than the maxlen:
	Dropped 374 that exceeded the maximum sequence length.
	Uppercasing string sequences.
Done processing: --------------------------------------
<class 'pandas.core.frame.DataFrame'>
Int64Index: 4984 entries, 1 to 5357
Data columns (total 2 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   FAERS_drug_match  4984 non-null   object
 1   lookup_value      4984 non-null   object
dtypes: object(2)
memory usage: 116.8+ KB
None


In [21]:
print("Train:", train.shape, "Test:" , test.shape)

Train: (3987, 2) Test: (997, 2)


## Clean, Encode and Pad the datasets

In [22]:
padded_xTest, padded_yTest = padding_dataset(*encode_dataset(*clean_dataset(test.FAERS_drug_match, test.lookup_value)))

In [23]:
x, y = clean_dataset(train.FAERS_drug_match, train.lookup_value)
padded_x, padded_y = padding_dataset(*encode_dataset(x,y))

In [24]:
negative_set = preprocessInput(fName_negatives)
padded_xneg, padded_yneg = padding_dataset(*encode_dataset(*clean_dataset(negative_set.FAERS_drug_match, negative_set.lookup_value)))

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9242 entries, 0 to 9241
Data columns (total 2 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   FAERS_drug_match  9242 non-null   object
 1   lookup_value      9242 non-null   object
dtypes: object(2)
memory usage: 144.5+ KB
None
Processing file: ---------------------------------------
Dropping sequences longer than the maxlen:
	Dropped 1372 that exceeded the maximum sequence length.
	Uppercasing string sequences.
Done processing: --------------------------------------
<class 'pandas.core.frame.DataFrame'>
Int64Index: 7870 entries, 3 to 9240
Data columns (total 2 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   FAERS_drug_match  7870 non-null   object
 1   lookup_value      7870 non-null   object
dtypes: object(2)
memory usage: 184.5+ KB
None


In [25]:
print("Test padded x:", padded_xTest.shape, "Test padded y:", padded_yTest.shape)
print("Train padded x:", padded_x.shape, "Train padded y:", padded_y.shape)
print("Train padded x_neg:", padded_xneg.shape, "Train padded y_neg:", padded_yneg.shape)

Test padded x: (997,) Test padded y: (997,)
Train padded x: (3987,) Train padded y: (3987,)
Train padded x_neg: (7870,) Train padded y_neg: (7870,)


## Creating Pandas DF for simplified view of the dataset 

In [26]:
dUnique_df = pd.DataFrame(columns = ['dUnique_label','dUnique_seq', 'dUnique_seq_padded'])
dUnique_df['dUnique_label'] = y.unique()
dUnique_df['dUnique_seq'] = dUnique_df['dUnique_label'].transform(lambda x: list(map(encode_dict.get,list(x))))
dUnique_df['dUnique_seq_padded'] = list(tf.keras.preprocessing.sequence.pad_sequences(dUnique_df['dUnique_seq'].array, padding="post", maxlen=MAXLEN))
dUnique_df.head()

Unnamed: 0,dUnique_label,dUnique_seq,dUnique_seq_padded
0,ECHINACEA,"[5, 3, 8, 9, 14, 1, 3, 5, 1]","[5, 3, 8, 9, 14, 1, 3, 5, 1, 0, 0, 0, 0, 0, 0,..."
1,GARLIC,"[7, 1, 18, 12, 9, 3]","[7, 1, 18, 12, 9, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0..."
2,VACCINIUM MACROCARPON,"[22, 1, 3, 3, 9, 14, 9, 21, 13, 27, 13, 1, 3, ...","[22, 1, 3, 3, 9, 14, 9, 21, 13, 27, 13, 1, 3, ..."
3,ZINGIBER OFFICINALE,"[26, 9, 14, 7, 9, 2, 5, 18, 27, 15, 6, 6, 9, 3...","[26, 9, 14, 7, 9, 2, 5, 18, 27, 15, 6, 6, 9, 3..."
4,SERENOA REPENS,"[19, 5, 18, 5, 14, 15, 1, 27, 18, 5, 16, 5, 14...","[19, 5, 18, 5, 14, 15, 1, 27, 18, 5, 16, 5, 14..."


## Add positive pairs
Data from NP_FAERS_mapped_20220215.csv -- the manually create references set for ~70 drugs

In [27]:
x1TrainRNN = list(padded_x)
x2TrainRNN = list(padded_y)
yTrainRNN = [1] * len(padded_x)
print(len(x1TrainRNN), len(x2TrainRNN), len(yTrainRNN))

3987 3987 3987


## Add negative pairs from unmmaped
Data from NP_FAERS_negative_pairs_20220222.csv -- the negative pairs created by random sampling from the NP_FAERS_mapped_20220215.csv

In [28]:
yTrainRNN.extend([0] * len(padded_xneg))
x1TrainRNN.extend(padded_xneg)
x2TrainRNN.extend(padded_yneg)
print(len(x1TrainRNN), len(x2TrainRNN), len(yTrainRNN))

11857 11857 11857


## Generate additional negative pairs

In [29]:
faers_match, lookup = generate_negative_pairs()

In [30]:
dfneg2 = pd.DataFrame(columns=['FAERS_drug_match', 'lookup_value'])
dfneg2['FAERS_drug_match'] = faers_match 
dfneg2['lookup_value'] = lookup
padded_xneg2, padded_yneg2 = padding_dataset(*encode_dataset(*clean_dataset(dfneg2.FAERS_drug_match, dfneg2.lookup_value)), MAXLEN)

## Add additional negative pairs 
Generated from training data using generate_negative_pairs

In [31]:
yTrainRNN.extend([0] * len(padded_xneg2))
x1TrainRNN.extend(padded_xneg2)
x2TrainRNN.extend(padded_yneg2)
print(len(x1TrainRNN), len(x2TrainRNN), len(yTrainRNN))

27579 27579 27579


In [32]:
x1TrainRnnS, x1ValRnnS, x2TrainRnnS, x2ValRnnS, yTrainRnnS, yValRnnS = train_test_split(x1TrainRNN, x2TrainRNN, yTrainRNN, test_size=0.20, random_state=42)

In [33]:
len(x1TrainRnnS)

22063

# Save data to csv

In [34]:
pd.DataFrame(data={"x1": x1TrainRnnS, "x2": x2TrainRnnS, "y": yTrainRnnS}).to_pickle("../data/proccesed_train_set.pkl")
pd.DataFrame(data={"x1": x1ValRnnS, "x2": x2ValRnnS, "y": yValRnnS}).to_pickle("../data/proccesed_test_set.pkl")

# Load data from csv

In [35]:
if "x1TrainRnnS" not in locals():
    train = pd.read_pickle("../data/proccesed_train_set.pkl")
    x1TrainRnnS, x2TrainRnnS = train[["x1", "x2"]].applymap(lambda x: np.asarray(x).astype('float32')).values.T
    yTrainRnnS = train["y"].astype('float32')
    test = pd.read_pickle("../data/proccesed_test_set.pkl")
    x1ValRnnS, x2ValRnnS = test[["x1", "x2"]].applymap(lambda x: np.asarray(x).astype('float32')).values.T
    yValRnnS = test["y"].astype('float32')
    del train
    del test

--------------------------------

# Build model, load weights and evaluate on test data

In [63]:
@tf.function  # The decorator converts `cosine_similarity` into a tensolflow `Function`.
def cosine_similarity(vects: tf.TensorArray) -> tf.TensorArray:
    '''Cosine similarity to be calculated as sum(x*y)/(sqrt(sum(x))*sqrt(sum(y))).
    This is achieved through Tensorflow functions to retain performance.
    
    Parameters
    ----------
    vects: tf.TensorArray
        
    
    Returns
    -------
    cosine_distance: tf.TensorArray
       The result of the cosine similarity between the vectors.    
    '''
    x, y = vects
    return tf.math.divide(tf.reduce_sum(tf.multiply(x,y), axis=1, keepdims=True), tf.multiply(tf.norm(x, ord=2, axis=1, keepdims=True), tf.norm(y, ord=2, axis=1, keepdims=True)))

In [64]:
@tf.function  # The decorator converts `cosine_distance` into a tensolflow `Function`.
def cosine_distance(vects: tf.TensorArray) -> tf.TensorArray:
    '''Cosine distance to be calculated as 1-(cosine similarity).
    Where cosine similarity equals sum(x*y)/(sqrt(sum(x))*sqrt(sum(y))).
    This is achieved through Tensorflow functions to retain performance.
    
    Parameters
    ----------
    vects: tf.TensorArray
        
    
    Returns
    -------
    cosine_distance: tf.TensorArray
        The result of 1-cosine similarity between the vectors. 
    '''
    x, y = vects
    return 1 - tf.math.divide(tf.reduce_sum(tf.multiply(x,y), axis=1, keepdims=True), tf.multiply(tf.norm(x, ord=2, axis=1, keepdims=True), tf.norm(y, ord=2, axis=1, keepdims=True)))

In [65]:
def loss(margin=1):
    def contrastive_loss(y_true, y_pred):
        square_pred = tf.math.square(y_pred)
        margin_square = tf.math.square(tf.math.maximum(margin - (y_pred), 0))
        return tf.math.reduce_mean(
            (1 - y_true) * square_pred + (y_true) * margin_square
        )

    return contrastive_loss

In [71]:
def build_model2(model_type, embedding_dim, num_rnn_node, num_dense_node, num_layer, activation_fn, learning_rate, optimizer, margin):
    input_x = tf.keras.layers.Input(MAXLEN)
    input_1 = tf.keras.layers.Input(MAXLEN)
    input_2 = tf.keras.layers.Input(MAXLEN)
    embedding = tf.keras.layers.Embedding(input_dim=28, output_dim=embedding_dim, mask_zero=True)
    x = embedding(input_x)
    x = tf.keras.layers.BatchNormalization()(x)
    
    if model_type == "lstm":
        x = tf.keras.layers.LSTM(num_rnn_node)(x)
    elif model_type=="gru":
        x = tf.keras.layers.GRU(num_rnn_node)(x)
 
    num = num_dense_node
    for _ in range(num_layer):
        x = tf.keras.layers.Dense(num, activation=activation_fn)(x)
        num /= 2
        
    embedding_network = tf.keras.Model(input_x, x)

    tower_1 = embedding_network(input_1)
    tower_2 = embedding_network(input_2)

    merge_layer = tf.keras.layers.Lambda(cosine_similarity)([tower_1, tower_2])
    normal_layer = tf.keras.layers.BatchNormalization()(merge_layer)
    output_layer = tf.keras.layers.Dense(1, activation="sigmoid")(normal_layer)
    contr = tf.keras.Model(inputs=[input_1, input_2], outputs=output_layer)
    
    if optimizer == "Adam":
        opt = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    elif optimizer =="RMSprop":                
        opt = tf.keras.optimizers.RMSprop(learning_rate=learning_rate)

    contr.compile(loss=loss(margin= margin), optimizer=opt, metrics=["accuracy"])
    
    return contr

In [72]:
model = build_model2(model_type = "lstm", embedding_dim = 256, num_rnn_node = 248, num_dense_node = 124, num_layer = 1, activation_fn = "tanh", learning_rate = 2e-4, optimizer= "Adam", margin = 0.8)

In [73]:
# model.load_weights("../exp3-hyperparameter-tuning/alstm-22-0.01.hdf5")
model.load_weights("../ModelCheckpointSaves/best_model.h5")
model.summary()

Model: "model_9"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_14 (InputLayer)          [(None, 65)]         0           []                               
                                                                                                  
 input_15 (InputLayer)          [(None, 65)]         0           []                               
                                                                                                  
 model_8 (Functional)           (None, 124)          540028      ['input_14[0][0]',               
                                                                  'input_15[0][0]']               
                                                                                                  
 lambda_4 (Lambda)              (None, 1)            0           ['model_8[0][0]',          

In [74]:
i = padded_xTest.index[0]
predicts = model.predict([np.tile(padded_xTest.loc[i], (dUnique_df['dUnique_seq_padded'].shape[0],1)), np.stack(dUnique_df['dUnique_seq_padded'])])
argsort = predicts.flatten().argsort()
# Top-5 smalles distances
test.loc[i, 'rank1_drug'] = dUnique_df['dUnique_label'][np.where(argsort == 0)[0][0]]
test.loc[i, 'rank2_drug'] = dUnique_df['dUnique_label'][np.where(argsort == 1)[0][0]]
test.loc[i, 'rank3_drug'] = dUnique_df['dUnique_label'][np.where(argsort == 2)[0][0]]
test.loc[i, 'rank4_drug'] = dUnique_df['dUnique_label'][np.where(argsort == 3)[0][0]]
test.loc[i, 'rank5_drug'] = dUnique_df['dUnique_label'][np.where(argsort == 4)[0][0]]

2022-07-29 22:35:01.907456: E tensorflow/stream_executor/cuda/cuda_dnn.cc:389] Could not create cudnn handle: CUDNN_STATUS_INTERNAL_ERROR
2022-07-29 22:35:01.907501: W tensorflow/core/framework/op_kernel.cc:1745] OP_REQUIRES failed at cudnn_rnn_ops.cc:1553 : UNKNOWN: Fail to find the dnn implementation.


UnknownError: Graph execution error:

Detected at node 'cond/CudnnRNNV3' defined at (most recent call last):
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/runpy.py", line 196, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/runpy.py", line 86, in _run_code
      exec(code, run_globals)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/traitlets/config/application.py", line 976, in launch_instance
      app.start()
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/ipykernel/kernelapp.py", line 712, in start
      self.io_loop.start()
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/tornado/platform/asyncio.py", line 215, in start
      self.asyncio_loop.run_forever()
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/asyncio/base_events.py", line 600, in run_forever
      self._run_once()
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/asyncio/base_events.py", line 1896, in _run_once
      handle._run()
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/asyncio/events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 510, in dispatch_queue
      await self.process_one()
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 499, in process_one
      await dispatch(*args)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 406, in dispatch_shell
      await result
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 730, in execute_request
      reply_content = await reply_content
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/ipykernel/ipkernel.py", line 383, in do_execute
      res = shell.run_cell(
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/ipykernel/zmqshell.py", line 528, in run_cell
      return super().run_cell(*args, **kwargs)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 2881, in run_cell
      result = self._run_cell(
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 2936, in _run_cell
      return runner(coro)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3135, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3338, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3398, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "/tmp/ipykernel_198503/4002307798.py", line 2, in <cell line: 2>
      predicts = model.predict([np.tile(padded_xTest.loc[i], (dUnique_df['dUnique_seq_padded'].shape[0],1)), np.stack(dUnique_df['dUnique_seq_padded'])])
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/engine/training.py", line 2033, in predict
      tmp_batch_outputs = self.predict_function(iterator)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/engine/training.py", line 1845, in predict_function
      return step_function(self, iterator)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/engine/training.py", line 1834, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/engine/training.py", line 1823, in run_step
      outputs = model.predict_step(data)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/engine/training.py", line 1791, in predict_step
      return self(x, training=False)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/engine/training.py", line 490, in __call__
      return super().__call__(*args, **kwargs)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/engine/base_layer.py", line 1014, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 92, in error_handler
      return fn(*args, **kwargs)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/engine/functional.py", line 458, in call
      return self._run_internal_graph(
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/engine/functional.py", line 596, in _run_internal_graph
      outputs = node.layer(*args, **kwargs)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/engine/training.py", line 490, in __call__
      return super().__call__(*args, **kwargs)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/engine/base_layer.py", line 1014, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 92, in error_handler
      return fn(*args, **kwargs)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/engine/functional.py", line 458, in call
      return self._run_internal_graph(
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/engine/functional.py", line 596, in _run_internal_graph
      outputs = node.layer(*args, **kwargs)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/layers/rnn/base_rnn.py", line 515, in __call__
      return super(RNN, self).__call__(inputs, **kwargs)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/engine/base_layer.py", line 1014, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 92, in error_handler
      return fn(*args, **kwargs)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/layers/rnn/lstm.py", line 673, in call
      runtime) = lstm_with_backend_selection(**normal_lstm_kwargs)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/layers/rnn/lstm.py", line 1184, in lstm_with_backend_selection
      gru_lstm_utils.function_register(defun_gpu_lstm, **params)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/layers/rnn/gru_lstm_utils.py", line 244, in function_register
      concrete_func = func.get_concrete_function(*args, **kwargs)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/layers/rnn/lstm.py", line 1149, in gpu_lstm_with_fallback
      return tf.cond(
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/layers/rnn/lstm.py", line 1121, in cudnn_lstm_fn
      return gpu_lstm(
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/layers/rnn/lstm.py", line 997, in gpu_lstm
      outputs, h, c, _, _ = tf.raw_ops.CudnnRNNV3(
Node: 'cond/CudnnRNNV3'
Fail to find the dnn implementation.
	 [[{{node cond/CudnnRNNV3}}]]
	 [[model_9/model_8/lstm_4/PartitionedCall]] [Op:__inference_predict_function_40574]

In [70]:
test.head()

Unnamed: 0,FAERS_drug_match,lookup_value,rank1_drug,rank2_drug,rank3_drug,rank4_drug,rank5_drug,lookup_rank,lookup_rank_related
1633,SAW PELMETTO,SCRUB-PALMETTO,,,,,,inf,inf
2969,FLAXSEED (LINUM USIATISSIMUM SEED OIL),LINUM USITATISSIMUM,,,,,,inf,inf
518,GINGERAID,GINGER,,,,,,inf,inf
2674,UHE RED YEAST RICE CRANBERRY,RED YEAST RICE,,,,,,inf,inf
755,QUEEN CITY HEMP 500MG (CBD),HEMP EXTRACT,,,,,,inf,inf


# Evaluating on test data - NP names only

In [49]:
test = test.assign(rank1_drug="", rank2_drug="", rank3_drug="", rank4_drug="", rank5_drug="", lookup_rank= np.Inf, lookup_rank_related = np.Inf)

In [50]:
vocab = pd.read_csv('../data/lb_to_common_names.csv')
vocab.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 958 entries, 0 to 957
Data columns (total 4 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   latin_binomial        958 non-null    object
 1   common_name           958 non-null    object
 2   latin_binomial_clean  958 non-null    object
 3   common_name_clean     958 non-null    object
dtypes: object(4)
memory usage: 30.1+ KB


# Evaluation of drug name predictions
### Find ranks 1-n from the predicted similarities for the test data

In [51]:
padded_xTest.head()

1633    [19, 1, 23, 27, 16, 5, 12, 13, 5, 20, 20, 15, ...
2969    [6, 12, 1, 24, 19, 5, 5, 4, 27, 12, 9, 14, 21,...
518     [7, 9, 14, 7, 5, 18, 1, 9, 4, 0, 0, 0, 0, 0, 0...
2674    [21, 8, 5, 27, 18, 5, 4, 27, 25, 5, 1, 19, 20,...
755     [17, 21, 5, 5, 14, 27, 3, 9, 20, 25, 27, 8, 5,...
Name: FAERS_drug_match, dtype: object

In [52]:
test.head()

Unnamed: 0,FAERS_drug_match,lookup_value,rank1_drug,rank2_drug,rank3_drug,rank4_drug,rank5_drug,lookup_rank,lookup_rank_related
1633,SAW PELMETTO,SCRUB-PALMETTO,,,,,,inf,inf
2969,FLAXSEED (LINUM USIATISSIMUM SEED OIL),LINUM USITATISSIMUM,,,,,,inf,inf
518,GINGERAID,GINGER,,,,,,inf,inf
2674,UHE RED YEAST RICE CRANBERRY,RED YEAST RICE,,,,,,inf,inf
755,QUEEN CITY HEMP 500MG (CBD),HEMP EXTRACT,,,,,,inf,inf


In [53]:
def find_ranks(model: tf.keras.Model) -> pd.DataFrame:
    """For each row in the test set (input), use the model to predict if the 'FAERS_drug_match' entry matches any of the 'lookup_value' entries.
       This is done at the encoded sequence level for both name all unique drugnames
       
         Parameters
    ----------
    model : tf.keras.Model
        A Keras model based Siamese Network that takes three inputs. 
        Namely, two input sequeces and a third input binary target specifying wether the two sequeces match.
    y : pd.Series
        A pandas Series containing the clean encoded 'lookup_value' column.
      
    Returns
    -------
    x : pd.Series
        Returns the padded 'FAERS_drug_match' series.
    y : pd.Series 
        Returns the padded 'lookup_value' series.
    
    """
    for i in padded_xTest.index:
        predicts = model.predict([np.tile(padded_xTest.loc[i], (dUnique_df['dUnique_seq_padded'].shape[0],1)), np.stack(dUnique_df['dUnique_seq_padded'])])
        argsort = predicts.flatten().argsort()
        # Top-5 smalles distances
        test.loc[i, 'rank1_drug'] = dUnique_df['dUnique_label'][np.where(argsort == 0)[0][0]]
        test.loc[i, 'rank2_drug'] = dUnique_df['dUnique_label'][np.where(argsort == 1)[0][0]]
        test.loc[i, 'rank3_drug'] = dUnique_df['dUnique_label'][np.where(argsort == 2)[0][0]]
        test.loc[i, 'rank4_drug'] = dUnique_df['dUnique_label'][np.where(argsort == 3)[0][0]]
        test.loc[i, 'rank5_drug'] = dUnique_df['dUnique_label'][np.where(argsort == 4)[0][0]]
        # Does any of them match
        lookup_clean = test.loc[i]['lookup_value']
        predicted_rank = test.loc[i][['rank1_drug', 'rank2_drug', 'rank3_drug', 'rank4_drug', 'rank5_drug']].eq(lookup_clean).to_numpy().nonzero()
        
        lookup_rank = np.Inf    
        if len(predicted_rank[0]) > 0 :
            lookup_rank = predicted_rank[0][0] + 1
            test.loc[i, 'lookup_rank'] = lookup_rank
        
        # Let's compare to latin binomial
        lb_res = vocab.loc[vocab['latin_binomial_clean'] == lookup_clean]
        common_res = vocab.loc[vocab['common_name_clean'] == lookup_clean]
        lookup_result = ''
        if len(lb_res) > 0:
            lookup_result = lb_res.common_name_clean.values[0]
        elif len(common_res) > 0:
            lookup_result = common_res.latin_binomial_clean.values[0]
        
        related_rank = np.Inf
        if lookup_result != '':
            annotated_rank = test.loc[i][['rank1_drug', 'rank2_drug', 'rank3_drug', 'rank4_drug', 'rank5_drug']].eq(lookup_result).to_numpy().nonzero()
            if len(annotated_rank[0]) > 0: 
                related_rank = annotated_rank[0][0] + 1
        

        #find related mappings to lookup value in predicted values 
        test.loc[i, 'lookup_rank_related'] = min(lookup_rank, related_rank)
    
    test.head()
    

In [54]:
# i = padded_xTest.index[0]
# predicts = model.predict([np.tile(padded_xTest.loc[i], (dUnique_df['dUnique_seq_padded'].shape[0],1)), np.stack(dUnique_df['dUnique_seq_padded'])])
# argsort = predicts.flatten().argsort()
# predicts.flatten()
# argsort
# dUnique_df[['dUnique_label','dUnique_seq_padded']]
# dUnique_df.iloc[6][['dUnique_label','dUnique_seq_padded']]
# dUnique_df['dUnique_label'][np.where(argsort == 3)[0][0]]

## Assing ranks to the matching 
matches are assigned their corresponding rank
non-matches are left null

In [55]:
find_ranks(model)

2022-07-29 22:31:15.094515: E tensorflow/stream_executor/cuda/cuda_dnn.cc:389] Could not create cudnn handle: CUDNN_STATUS_INTERNAL_ERROR
2022-07-29 22:31:15.094547: W tensorflow/core/framework/op_kernel.cc:1745] OP_REQUIRES failed at cudnn_rnn_ops.cc:1553 : UNKNOWN: Fail to find the dnn implementation.
2022-07-29 22:31:15.095277: E tensorflow/stream_executor/cuda/cuda_dnn.cc:389] Could not create cudnn handle: CUDNN_STATUS_INTERNAL_ERROR
2022-07-29 22:31:15.095294: W tensorflow/core/framework/op_kernel.cc:1745] OP_REQUIRES failed at cudnn_rnn_ops.cc:1553 : UNKNOWN: Fail to find the dnn implementation.


UnknownError: Graph execution error:

Detected at node 'cond/CudnnRNNV3' defined at (most recent call last):
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/runpy.py", line 196, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/runpy.py", line 86, in _run_code
      exec(code, run_globals)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/traitlets/config/application.py", line 976, in launch_instance
      app.start()
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/ipykernel/kernelapp.py", line 712, in start
      self.io_loop.start()
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/tornado/platform/asyncio.py", line 215, in start
      self.asyncio_loop.run_forever()
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/asyncio/base_events.py", line 600, in run_forever
      self._run_once()
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/asyncio/base_events.py", line 1896, in _run_once
      handle._run()
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/asyncio/events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 510, in dispatch_queue
      await self.process_one()
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 499, in process_one
      await dispatch(*args)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 406, in dispatch_shell
      await result
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 730, in execute_request
      reply_content = await reply_content
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/ipykernel/ipkernel.py", line 383, in do_execute
      res = shell.run_cell(
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/ipykernel/zmqshell.py", line 528, in run_cell
      return super().run_cell(*args, **kwargs)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 2881, in run_cell
      result = self._run_cell(
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 2936, in _run_cell
      return runner(coro)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3135, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3338, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3398, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "/tmp/ipykernel_198503/3028911184.py", line 2, in <cell line: 1>
      predicts = model.predict([np.tile(padded_xTest.loc[i], (dUnique_df['dUnique_seq_padded'].shape[0],1)), np.stack(dUnique_df['dUnique_seq_padded'])])
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/engine/training.py", line 2033, in predict
      tmp_batch_outputs = self.predict_function(iterator)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/engine/training.py", line 1845, in predict_function
      return step_function(self, iterator)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/engine/training.py", line 1834, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/engine/training.py", line 1823, in run_step
      outputs = model.predict_step(data)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/engine/training.py", line 1791, in predict_step
      return self(x, training=False)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/engine/training.py", line 490, in __call__
      return super().__call__(*args, **kwargs)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/engine/base_layer.py", line 1014, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 92, in error_handler
      return fn(*args, **kwargs)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/engine/functional.py", line 458, in call
      return self._run_internal_graph(
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/engine/functional.py", line 596, in _run_internal_graph
      outputs = node.layer(*args, **kwargs)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/engine/training.py", line 490, in __call__
      return super().__call__(*args, **kwargs)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/engine/base_layer.py", line 1014, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 92, in error_handler
      return fn(*args, **kwargs)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/engine/functional.py", line 458, in call
      return self._run_internal_graph(
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/engine/functional.py", line 596, in _run_internal_graph
      outputs = node.layer(*args, **kwargs)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/layers/rnn/base_rnn.py", line 515, in __call__
      return super(RNN, self).__call__(inputs, **kwargs)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/engine/base_layer.py", line 1014, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 92, in error_handler
      return fn(*args, **kwargs)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/layers/rnn/lstm.py", line 673, in call
      runtime) = lstm_with_backend_selection(**normal_lstm_kwargs)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/layers/rnn/lstm.py", line 1184, in lstm_with_backend_selection
      gru_lstm_utils.function_register(defun_gpu_lstm, **params)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/layers/rnn/gru_lstm_utils.py", line 244, in function_register
      concrete_func = func.get_concrete_function(*args, **kwargs)
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/layers/rnn/lstm.py", line 1149, in gpu_lstm_with_fallback
      return tf.cond(
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/layers/rnn/lstm.py", line 1121, in cudnn_lstm_fn
      return gpu_lstm(
    File "/home/israel/anaconda3/envs/tf/lib/python3.10/site-packages/keras/layers/rnn/lstm.py", line 997, in gpu_lstm
      outputs, h, c, _, _ = tf.raw_ops.CudnnRNNV3(
Node: 'cond/CudnnRNNV3'
Fail to find the dnn implementation.
	 [[{{node cond/CudnnRNNV3}}]]
	 [[model_1/model/lstm/PartitionedCall_1]] [Op:__inference_predict_function_8748]

In [None]:
test.head()

In [None]:
# test.to_csv("../evaluation/test_siamese_evaluation_lstm_model_np_name.csv", index=False)

# Add related mappings rank to test set evaluation

In [None]:
if "test" not in locals():
    test = pd.read_csv('../evaluation/test_siamese_evaluation_lstm_model_np_name.csv')
    test.info()

In [None]:
test_mrr = test[['FAERS_drug_match', 'lookup_rank', 'lookup_rank_related']]
test_mrr.head()

In [None]:
test_mrr.info()

In [None]:
#number of 0 relevant results
test_mrr.loc[test_mrr['lookup_rank'].isna()].shape

In [None]:
test_mrr.loc[test_mrr['lookup_rank_related'].isna()].shape

In [None]:
test_mrr_exact = test_mrr[test_mrr['lookup_rank'].notna()]
test_mrr_exact = test_mrr_exact.drop(['lookup_rank_related'], axis=1)
test_mrr_exact.info()

In [None]:
test_mrr_rel = test_mrr[test_mrr['lookup_rank_related'].notna()]
test_mrr_rel = test_mrr_rel.drop(['lookup_rank'], axis=1)
test_mrr_rel.info()

In [None]:
exact_reciprocal = 1/test_mrr_exact['lookup_rank']
test_mrr_exact['reciprocal_rank'] = exact_reciprocal
test_mrr_exact.head()

In [None]:
##get the mean of reciprocal ranks for exact matches
test_mrr_exact.reciprocal_rank.mean()

In [None]:
#get median and stdev
test_mrr_exact.lookup_rank.median()

In [None]:
test_mrr_exact.reciprocal_rank.median()

In [None]:
test_mrr_exact.lookup_rank.std()

In [None]:
test_mrr_exact.reciprocal_rank.std()

In [None]:
rel_reciprocal = 1/test_mrr_rel['lookup_rank_related']
test_mrr_rel['reciprocal_rank'] = rel_reciprocal
test_mrr_rel.head()

In [None]:
test_mrr_rel.reciprocal_rank.mean()

In [None]:
test_mrr_rel.reciprocal_rank.median()

In [None]:
test_mrr_rel.reciprocal_rank.std()

In [None]:
test_mrr_rel.lookup_rank_related.median()

In [None]:
test_mrr_rel.lookup_rank_related.std()

--------------------------------------

# Average NP name length 

In [None]:
main_dataset = preprocessInput(fName)

In [None]:
stats = main_dataset['FAERS_drug_match'].apply(len).describe()
stats

In [None]:
stats["mean"] + stats["std"] * 2

In [None]:
main_dataset['lookup_value'].apply(len).describe()

In [None]:
main_dataset['FAERS_drug_match'].apply(len).sort_values(ascending=False)[0:10]

In [None]:
main_dataset['FAERS_drug_match'].apply(len).gt(80).describe()

In [None]:
print("Gt 65:", 5358 - 4984)
print("Gt 70:", 5358 - 5212)
print("Gt 80:", 5358 - 5238)

In [None]:
%matplotlib inline
main_dataset['FAERS_drug_match'].apply(len).hist(bins=15)

In [None]:
main_dataset.iloc[5132]['FAERS_drug_match']

In [None]:
main_dataset.iloc[5132]['FAERS_drug_match']

In [None]:
dfneg2['FAERS_drug_match'].apply(len).describe()

In [None]:
dfneg2['lookup_value'].apply(len).describe()

In [None]:
dUnique_df['dUnique_label'].apply(len).describe()

In [None]:
dUnique_df['dUnique_seq'].apply(len).describe()

In [None]:
dfneg2['FAERS_drug_match'].apply(len).idxmax()

In [None]:
dfneg2.iloc[8183]['FAERS_drug_match']