In [1]:
from molmimic.common.featurizer import ProteinFeaturizer
from toil.job import Job
import os
import numpy as np
from joblib import Parallel, delayed
import glob

In [4]:
def write_surface_xyz(pdb_file):
    protein = ProteinFeaturizer(pdb_file, os.path.splitext(os.path.basename(pdb_file))[0], 
                                Job(), os.path.dirname(pdb_file), force_feature_calculation=True)
    [protein.get_accessible_surface_area_residue(protein._remove_altloc(a)) for a in protein.get_atoms()]
    
    xyz = np.array([[atom.serial_number, features.residue_rasa, *atom.coord] for atom, features in \
        zip(protein.get_atoms(), protein.atom_features.itertuples()) \
        if not features.residue_buried])
    np.savez(os.path.splitext(pdb_file)[0], xyz)
    print("Done", pdb_file)

In [None]:
Parallel(n_jobs=8)(delayed(write_surface_xyz)(p) for p in \
                  glob.glob("BM5-clean/HADDOCK-ready/*/*_u.pdb"))

In [9]:
for p in glob.glob("BM5-clean/HADDOCK-ready/*/*_u.pdb"):
    if not os.path.isfile(os.path.splitext(p)[0]+".npz"): 
        try:
            write_surface_xyz(p)
        except (SystemExit, KeyboardInterrupt):
            raise
        except Exception as e:
            print(p, ":", e)

BM5-clean/HADDOCK-ready/2YVJ/2YVJ_l_u.pdb : could not convert string to float: 'NA'


In [36]:
def parse_tbl(tbl_file):
    path = os.path.dirname(tbl_file)
    pdb = os.path.basename(path)
    true_file = os.path.join(path, pdb+"_true.tsv")
    print("2", true_file)
    with open(tbl_file) as f:
        print(f.read().splitlines()[:3])
    with open(tbl_file) as tbl, open(true_file, "w") as out:
        print("#Receptor_resid\tLigand_resid", file=out)
        resi_A = None
        for line in tbl:
            print(line)
            if "assign" in line and "segid A" in line:
                resi_A = line.strip().split()[3]
            elif "resid" in line and "segid A" in line:
                resi_B = line.strip().split()[2]
                print("{}\t{}".format(resi_A, resi_B), file=out)
    print("Done", pdb)

In [None]:
for tbl_file in sorted(glob.glob("BM5-clean/HADDOCK-ready/*/ambig.tbl")):
    with open(tbl_file) as f:
        print(f.read().splitlines()[:3])
    path = os.path.dirname(tbl_file)
    pdb = os.path.basename(path)
    true_file = os.path.join(path, pdb+"_true.tsv")
    print("1", true_file)
    if True: # not os.path.isfile(true_file): 
        try:
            print("tbl", tbl_file)
            parse_tbl(tbl_file)
        except (SystemExit, KeyboardInterrupt):
            raise
        except Exception as e:
            print(p, ":", e)