In [1]:
import pandas as pd
import os
import glob
import json
import numpy as np
import shutil
from Bio import SeqIO
from pymol import cmd
from pymol import CmdException

In [2]:
def check_ligand_location_pymol(pdb_file, id, prot_chain="B", pep_chain="C", debug=False, out_pdb=""):
    ref_pdb = "input_files/ref_gpcr.pdb"

    cmd.reinitialize()
    cmd.load(pdb_file, "struct")
    cmd.load(ref_pdb, "ref")
    #cmd.do('tmalign struct and chain %s, ref' % prot_chain)
    cmd.cealign("ref", 'struct and chain %s' % prot_chain) # OBS CEalign has target and mobile inverted
    com_prot = [round(x, 3) for x in cmd.centerofmass('struct and chain %s' % prot_chain)]
    com_pep = [round(x, 3) for x in cmd.centerofmass('struct and chain %s' % pep_chain)]

    if debug:
        print("> %s" % pdb_file)
        cmd.color("silver", "chain %s and elem C" % prot_chain)
        cmd.show("spheres", "chain %s" % pep_chain)
        cmd.hide("cartoon", "chain %s" % pep_chain)
        print("\tCOM prot:", com_prot)
        print("\t COM pep: ", com_pep)
        pse_file = "debug.pse"
        cmd.save(pse_file)
    else:
        cmd.delete("ref")

    if com_prot[2] > com_pep[2]:
        is_outside = True
    else:
        is_outside = False

    if debug:
        print("\t> Is_outside:", is_outside)
    if prot_chain == "B" and pep_chain == "C":
        cmd.alter("chain B", "chain=\"A\"")
        cmd.alter("chain C", "chain=\"B\"")
        cmd.alter("chain D", "chain=\"C\"")

    if out_pdb != "":
        cmd.save(out_pdb, "struct")
    return is_outside

In [3]:
results_dir = "af_jobs_af23"
out_dir = "output_files_af23/"
out_csv = "%s/af_results.csv" % out_dir
pdb_out_dir = "%s/individual_pdbs" % out_dir
for filename in glob.glob("%s/*.pdb" % pdb_out_dir): os.remove(filename)

results_data = pd.DataFrame()
for job_dir in sorted(glob.glob("%s/*/*/" % results_dir)):
    _, rec, lig, _ = job_dir.split("/")
    comp = "%s-%s" % (rec, lig)

    outside_found = False
    outside_pdb = np.nan
    rank = np.nan
    finished = False
    score = np.nan
    relaxed_struct = False
    has_x = False

    if os.path.exists("%s/%s/ranked_0.pdb" % (job_dir, comp)):
        print(rec, lig, "finished")

        fasta_entry = SeqIO.parse(open("%s/%s.fasta" % (job_dir, comp)), "fasta")
        for record in fasta_entry:
            if "X" in record.seq:
                has_x = True
        finished = True

        with open("%s/%s/ranking_debug.json" % (job_dir, comp), "r") as json_file:
            ranking_data = json.load(json_file)

        outside_pdb = "%s/%s/first_outside.pdb" % (job_dir, comp)
        for pdb_file in sorted(glob.glob(job_dir+"%s/ranked_*.pdb" % (comp))):
            if has_x:
                is_outside = check_ligand_location_pymol(pdb_file=pdb_file, id=comp, prot_chain="B", pep_chain="C",
                                                         out_pdb=outside_pdb)
                relaxed = False
            else:
                try:
                    is_outside = check_ligand_location_pymol(pdb_file=pdb_file, id=comp, prot_chain="A", pep_chain="B",
                                                             out_pdb=outside_pdb)
                    relaxed = True
                except CmdException:
                    is_outside = check_ligand_location_pymol(pdb_file=pdb_file, id=comp, prot_chain="B", pep_chain="C",
                                                             out_pdb=outside_pdb)
                    relaxed=False

            if is_outside:
                print("\t%s *is* outside" % (pdb_file.split("/")[-1]))
                outside_found = True
                rank = pdb_file.split("_")[-1].split(".")[0]
                out_pdb = "%s/%s-%s-rank%s.pdb" % (pdb_out_dir, rec, lig, rank)
                score = round(ranking_data["iptm+ptm"]["model_%s_multimer_v3_pred_0" % (int(rank)+1)], 2)
                shutil.copy(outside_pdb, out_pdb)
                break
            else:
                #print("\t%s is NOT outside" % (pdb_file.split("/")[-1]))
                pass

    else:
        print(rec, lig, "NOT finished")

    tmp_data = {"complex": comp, "finished": finished, "receptor": rec, "ligand": lig,
                                       "outside_found": outside_found, "chosen_rank": rank,
                                       "score": score, "relaxed": relaxed}
    tmp_data = pd.DataFrame.from_dict(tmp_data, orient="index").T.set_index("complex")
    results_data = pd.concat([results_data, tmp_data])

results_data.to_csv(out_csv)
shutil.make_archive("%s/all_chosen_pdbs" % out_dir, 'zip', pdb_out_dir)

5ht1b_human 137 finished
	ranked_4.pdb *is* outside
5ht1d_human 137 finished
	ranked_4.pdb *is* outside
ackr1_human 753 finished
	ranked_0.pdb *is* outside
ackr1_human 758 finished
	ranked_0.pdb *is* outside
ackr1_human 759 finished
	ranked_0.pdb *is* outside
ackr1_human 769 finished
	ranked_0.pdb *is* outside
ackr1_human 771 finished
	ranked_0.pdb *is* outside
ackr1_human 797 finished
	ranked_0.pdb *is* outside
ackr1_human 820 finished
	ranked_0.pdb *is* outside
ackr1_human 821 finished
	ranked_0.pdb *is* outside
ackr1_human 829 finished
	ranked_0.pdb *is* outside
ackr1_human 836 finished
	ranked_0.pdb *is* outside
ackr2_human 753 finished
	ranked_0.pdb *is* outside
ackr2_human 756 finished
	ranked_0.pdb *is* outside
ackr2_human 757 finished
	ranked_0.pdb *is* outside
ackr2_human 758 finished
	ranked_0.pdb *is* outside
ackr2_human 759 finished
	ranked_0.pdb *is* outside
ackr2_human 769 finished
	ranked_0.pdb *is* outside
ackr2_human 770 finished
	ranked_0.pdb *is* outside
ackr2_human 

'/Users/nqz918/Documents/data/code/af_arman/output_files_af23/all_chosen_pdbs.zip'

In [4]:
print("\n > %s passed" % len(results_data[(results_data["finished"] == True) & (results_data["outside_found"] == True)].index))
print(" > %s have not finished" % len(results_data[results_data["finished"] == False].index))
print("\t", " ".join(results_data[results_data["finished"] == False].index))
print(" > %s have no ligand on outside" % len(results_data[(results_data["outside_found"] == False) & (results_data["finished"] == True)].index))
print("\t", " ".join(results_data[(results_data["outside_found"] == False) & (results_data["finished"] == True)].index))
results_data


 > 713 passed
 > 6 have not finished
	 rl3r1_human-2000 rl3r2_human-2000 rl3r2_mouse-6519 rxfp1_human-1988 rxfp1_human-1989 rxfp1_human-1990
 > 17 have no ligand on outside
	 ccr6_human-3648 fpr1_human-1031 fpr1_human-3568 fpr1_human-3569 fpr2_human-1031 fpr2_human-3568 fpr2_human-3569 fshr_human-1157 g37l1_human-6067 gasr_human-8411 gpr37_human-6067 gpr39_human-5336 gpr39_human-5337 hrh4_human-1272 ntr1_human-1577 par1_human-4453 par4_human-4456


Unnamed: 0_level_0,finished,receptor,ligand,outside_found,chosen_rank,score,relaxed
complex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
5ht1b_human-137,True,5ht1b_human,137,True,4,0.55,True
5ht1d_human-137,True,5ht1d_human,137,True,4,0.55,True
ackr1_human-753,True,ackr1_human,753,True,0,0.8,True
ackr1_human-758,True,ackr1_human,758,True,0,0.84,True
ackr1_human-759,True,ackr1_human,759,True,0,0.79,True
...,...,...,...,...,...,...,...
v2r_human-2176,True,v2r_human,2176,True,0,0.9,True
xcr1_human-3647,True,xcr1_human,3647,True,0,0.68,True
xcr1_human-4370,True,xcr1_human,4370,True,0,0.64,True
xcr1_human-4446,True,xcr1_human,4446,True,0,0.67,True
