In [1]:
from cresset import flare
import pandas as pd
import numpy as np
import os
from time import process_time

In [2]:
def get_testset(path):
    testset_path = os.path.join(path,'testset_dock') + '.csv'
    
    if os.path.exists(testset_path) == True:
        testset = pd.read_csv(testset_path)
    else:
        cluster = pd.read_csv(os.path.join(path, 'cluster_table') + '.csv')
        meta = pd.read_csv(os.path.join(path,'metadata') + '.csv')
        Mpro_soaks = pd.read_csv(os.path.join(path,'Mpro_soaks') + '.csv')

        meta = meta.rename(columns = {'crystal_name':'sub_crystal_name','RealCrystalName':'crystal_name','alternate_name':'compound_ID_meta','smiles':'smiles_meta_0','new_smiles':'smiles_meta_1'})
        Mpro_soaks = Mpro_soaks.rename(columns = {'Compound ID':'compound_ID_soaks','Rapid Fire avg IC50 (uM)':'R_IC50','Fluorescence avg IC50 (uM)':'F_IC50','Sample Name':'crystal_name'})
        testset = pd.merge(meta, Mpro_soaks, how = 'left', on = 'crystal_name')
        testset = testset[~testset['F_IC50'].isnull()].sort_values(by = ['site_name','crystal_name'])[['site_name','sub_crystal_name','crystal_name','SMILES','compound_ID_meta','compound_ID_soaks','R_IC50','F_IC50']].reset_index(drop = True).copy()
        testset = pd.merge(testset, cluster[['sub_crystal_name','cluster']], how = 'left', on = 'sub_crystal_name')
        
        trueCID = {'Mpro-P0243':'EDJ-MED-d08626de-4',
                   'Mpro-P0793':'EDG-MED-5d232de5-8',
                   'Mpro-P0816':'EDG-MED-5d232de5-7'}
        for k,v in trueCID.items():
            row = (testset.crystal_name == k)
            testset.loc[row, 'compound_ID_meta'] = v
            testset.loc[row, 'compound_ID_soaks'] = v
        
        testset = testset[testset['F_IC50'] < 99]
        testset.drop_duplicates(subset = ['crystal_name'], keep = 'first', inplace = True)
        testset['pIC50'] = -np.log10(testset['F_IC50']/1000000)
        testset = testset.reset_index(drop = True)
        testset.to_csv(testset_path, index = 0)
    return testset

In [3]:
def rescore(project, time, df):
    rescore = flare.Docking()
    rescore.protein = project.proteins[-1]
    rescore.ligands = [project.proteins[-1].ligands[1]]
    rescore.system.grid_box = project.proteins[-1].ligands[1]
    rescore.system.quality = flare.LeadFinderSystem.Quality.ScoreOnlyFixed
    rescore.minimize_ligands = False

    print(rescore.setup_messages())
    std = process_time()
    rescore.start()
    rescore.wait()
    time_spent = round(process_time() - std, 3)
    print('Time spent {} sec'.format(time_spent))
    
    title = project.proteins[-1].title
    result = rescore.ligands[0].properties.items()
    df = df.set_index(['sub_crystal_name'], drop = True)
    df.loc[title,'dG_rescore'] = result[-4][1].value
    df.loc[title,'RS_rescore'] = result[-5][1].value
    df.loc[title,'VS_rescore'] = result[-3][1].value
    df.loc[title,'LE_rescore'] = result[-2][1].value
    df = df.reset_index()
    return time + time_spent, df

In [4]:
def getLig(protein, project, nb_dir):
    for seq in protein.sequences:
        if (len(protein.ligands)== 0) and (seq.type == flare.Sequence.Type.Ligand):
            project.ligands.append(seq)

    if len(protein.ligands)== 0:
        try:
            print('Try to find alternative structure')
            ligand_path = os.path.join(nb_dir,'aligned',protein.title,protein.title) + '.pdb'
            project.ligands.extend(flare.read_file(ligand_path))
        except:
            print(ligand_path + 'not found')
            
    project.ligands[-1].protein = protein
    project.ligands[-1].title = protein.title
    
    project.ligands.append(project.ligands[-1])
    project.ligands[-1].add_hydrogens()
    project.ligands[-1].title = protein.title + '_rescore'

In [5]:
def loadStruc(projects, title, nb_dir):
    complex_path = os.path.join(nb_dir, 'aligned', title, title) + '_bound.pdb'
    
    if os.path.exists(complex_path):
        for project in projects:
            project.proteins.extend(flare.read_file(complex_path))
            protein = project.proteins[-1]
            protein.title = title 
            
            print('\nPreparing complex ' + title)
            prep = flare.ProteinPrep()
            prep.proteins = [protein]
            try:
                prep.start()
                prep.wait()
            except:
                for seq in protein.sequences:
                    if seq.type in [flare.Sequence.Type.Water, flare.Sequence.Type.Other]:
                        del seq[:]
                prep.start()
                prep.wait()
            
            print('Minimising complex ' + title)
            minimization = flare.Minimization()
            minimization.protein = protein
            minimization.start()
            minimization.wait()
            
            project.proteins[-1].add_hydrogens()
            getLig(protein, project, nb_dir)
    else:
        print(complex_path + ' not found')

In [6]:
if __name__ == "__main__":
    nb_dir = 'D:\JupyterNotebook\BSc\Flare'
    rescore_time = 0
    
    testset = get_testset(nb_dir)
    test_result = testset.copy()
    proj = flare.Project()
    
    for index, row in testset.iterrows():
        title = row['sub_crystal_name']
        loadStruc([proj], title, nb_dir)
        if (len(proj.proteins[-1].ligands) > 0) and (proj.proteins[-1].title == title):
            rescore_time, test_result = rescore(proj, rescore_time, test_result)
    proj.save(os.path.join(nb_dir, 'rescore') + '.flr')
    test_result.to_csv(os.path.join(nb_dir, 'rescore_result')+ '.csv', index = 0)
    print('\nTotal time', rescore_time)