# Description

This notebook is used to tabulate final results and compare novel generated molecules vs HIV inhibitors vs Remdesivir which entered clinical trials just a few days ago.

## Now the best results and save to sdf for last double check tabulation in PyRx

In [None]:
import pandas as pd
from rdkit import Chem, DataStructs
import random
import numpy as np
import rdkit.Chem.PropertyMol

In [None]:
final = pd.read_csv('./generations/master_results_table_gen10.csv',sep=',')
final = final.sort_values('score', ascending=True)
print(final.shape)
final.head()

In [None]:
final_max = final.groupby("smile").max()[["score","gen"]].reset_index()
final_max = final_max.sort_values('score', ascending=True)
print(final_max.shape)
final_max.head()

In [None]:
final_joined = pd.merge(final_max, final, on=['smile','gen'], suffixes=('_old','_new'), how='left')
final_joined = final_joined[(final_joined['score_new'] <= -15.0) & (final_joined['weight'] < 900)]
final_joined['score'] = final_joined['score_new']
final_joined = final_joined.drop('score_old', axis=1)
final_joined = final_joined.drop('score_new', axis=1)
print(final_joined.shape)
final_joined.head()

In [None]:
hiv = final[final['source'] == 'hiv']
hiv.head()

In [None]:
final_table = final_joined.append(hiv)
final_table.head()

In [None]:
# Note I also manually added Remdesivir because as working on this it went into clinical trials
# https://en.wikipedia.org/wiki/Remdesivir
final_table.to_csv(r'./generations/master_results_table_final2.csv', index=False)

In [None]:
final_table = pd.read_csv('./generations/master_results_table_final.csv')
final_table.tail()

In [None]:
def set_molecule(row):
    mol = Chem.MolFromSmiles(row['smile'])
    pm = Chem.PropertyMol.PropertyMol(mol)
    title = 'id' + str(row['id']) + 'gen'+ str(row['gen'])
#     print(title)
    # Enables for tracking which molecule is which in PyRx GUI and PyRx results export
    pm.SetProp('Title', title)
    return pm

In [None]:
mols_for_export = final_table.apply(set_molecule, axis=1)
# list(mols_for_export)

In [None]:
w = Chem.SDWriter('./generations/genfinal.sdf')
for m in mols_for_export:
    w.write(m)

## Rerun everything through PyRx once more to double check, then publish final metrics

In [None]:
new_scores = pd.read_csv('./generations/results/results_genfinal.csv',sep=',')
new_scores.head()

In [None]:
new_scores = new_scores.groupby("Ligand").agg({'Binding Affinity': [np.min,np.average]}).reset_index()
new_scores.head()

In [None]:
new_scores['id'] = new_scores['Ligand'].str.split("_").str[1].str.split("gen").str[0].str.split("id").str[1]
new_scores['gen'] = new_scores['Ligand'].str.split("_").str[1].str.split("gen").str[1]
new_scores['score_best'] = new_scores["Binding Affinity"]["amin"]
new_scores['score_avg'] = new_scores["Binding Affinity"]["average"]
new_scores = new_scores[['id','gen','score_best','score_avg']]
new_scores.head()

In [None]:
new_scores.id = new_scores.id.astype(str)
new_scores.gen = new_scores.gen.astype(str)
final_table.id = final_table.id.astype(str)
final_table.gen = final_table.gen.astype(str)

In [None]:
new_table = pd.merge(final_table, new_scores, on=['id','gen'], suffixes=('_old','_new'), how='left')
new_table['score_best'] = new_table[('score_best', '')]
new_table['score_avg'] = new_table[('score_avg', '')]
new_table = new_table.drop([('score_best', ''),('score_avg', '')], axis=1)
new_table = new_table.sort_values('score_best', ascending=True)
new_table.tail()

In [None]:
new_table.columns

In [None]:
hiv_smiles = new_table[(new_table['source'] == 'hiv') & (new_table['score_best'].notnull())]
hiv_smiles_list = list(hiv_smiles['smile'])
hiv_smiles.head(20)

In [None]:
hiv_fingerprints = []
for smile in hiv_smiles_list:
    hiv_fingerprints.append(Chem.RDKFingerprint(Chem.MolFromSmiles(smile)))

def calc_hiv_similarity_score(row):
    fingerprint = Chem.RDKFingerprint(Chem.MolFromSmiles(row['smile']))
    similarity = np.mean(DataStructs.BulkTanimotoSimilarity(fingerprint,hiv_fingerprints))
    return similarity

remdesivir_fingerprint = Chem.RDKFingerprint(Chem.MolFromSmiles('CCC(CC)COC(=O)[C@H](C)NP(=O)(OC[C@H]1O[C@](C#N)([C@H](O)[C@@H]1O)C1=CC=C2N1N=CN=C2N)OC1=CC=CC=C1'))

new_table['similarity_to_hiv_inhibitors'] = new_table.apply(calc_hiv_similarity_score, axis=1)
new_table['similarity_to_remdesivir'] = new_table['smile'].apply(lambda x: DataStructs.TanimotoSimilarity(Chem.RDKFingerprint(Chem.MolFromSmiles(x)),remdesivir_fingerprint))
new_table.head(50)

In [None]:
new_table.to_csv(r'./generations/master_results_table_final.csv', index=False)