In [1]:
import sys
sys.path.insert(1, '../scripts')

In [2]:
from os import path
import numpy as np
import collections
import mdtraj as md
import parseaf as pa

In [3]:
def read_seq_from_pdb(filepath):
    three_to_one_map =  {'ALA':'A', 'CYS':'C', 'ASP':'D', 'GLU':'E', 'PHE':'F', \
                         'GLY':'G', 'HIS':'H', 'ILE':'I', 'LYS':'K', 'LEU':'L', \
                         'MET':'M', 'ASN':'N', 'PRO':'P', 'GLN':'Q', 'ARG':'R', \
                         'SER':'S', 'THR':'T', 'VAL':'V', 'TRP':'W', 'TYR':'Y', '-':'-'}
    with open(filepath) as file:
        resids = []
        pLDDTs = []
        for line in file:
            if line[0:4] == "ATOM" and line[13:15] == 'N ':
                resids.append(line[17:20])
                pLDDTs.append(float(line[61:67].strip()))
    return ''.join([three_to_one_map[aa] for aa in resids]), np.mean(pLDDTs)

In [4]:
def get_percent_helix(af_pdb):
    if not af_pdb is None:
        ss = md.compute_dssp(af_pdb, simplified=True)[0]
        print(ss)
        helix_cnt = collections.Counter(ss)['H']
        helix_p = helix_cnt / 50
    else:
        helix_p = None
    return helix_p

In [5]:
fdir = '/mnt/d/research/drummond-lab/hcrpaper/results/rank1/'
phelix = []
for i in range(1,31):
    fpath1 = fdir + 'sv'+str(i)+'_unrelaxed_rank_1_model_3.pdb'
    fpath2 = fdir + 'sv'+str(i)+'_unrelaxed_rank_1_model_5.pdb'
    if path.exists(fpath1):
        print('sv'+str(i))
        pdb = md.load(fpath1)
        p = get_percent_helix(pdb)
        print(read_seq_from_pdb(fpath1), p)
        phelix.append(p)
    else:
        print('sv'+str(i))
        pdb = md.load(fpath2)
        p = get_percent_helix(pdb)
        print(read_seq_from_pdb(fpath2), p)
        phelix.append(p)

sv1
['C' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H'
 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H'
 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'C']
('EKEKEKEKEKEKEKEKEKEKEKEKEKEKEKEKEKEKEKEKEKEKEKEKEK', 97.7094) 0.96
sv2
['C' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H'
 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H'
 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'C']
('EEEKKKEEEKKKEEEKKKEEEKKKEEEKKKEEEKKKEEEKKKEEEKKKEK', 97.73) 0.96
sv3
['C' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H'
 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H'
 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'C']
('KEKKKEKKEEKKEEKEKEKEKEEKKKEEKEKEKEKKKEEKEKEEKKEEEE', 97.68580000000001) 0.96
sv4
['C' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H'
 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H'
 'H' 'H' 'H

In [6]:
fdir = '/mnt/d/research/drummond-lab/hcrpaper/results/rank5/'
phelix = []
for i in range(1,31):
    fpath1 = fdir + 'sv'+str(i)+'_unrelaxed_rank_5_model_1.pdb'
    fpath2 = fdir + 'sv'+str(i)+'_unrelaxed_rank_5_model_2.pdb'
    fpath3 = fdir + 'sv'+str(i)+'_unrelaxed_rank_5_model_4.pdb'
    if path.exists(fpath1):
        print('sv'+str(i))
        pdb = md.load(fpath1)
        p = get_percent_helix(pdb)
        print(read_seq_from_pdb(fpath1), p)
        phelix.append(p)
    elif path.exists(fpath2):
        print('sv'+str(i))
        pdb = md.load(fpath2)
        p = get_percent_helix(pdb)
        print(read_seq_from_pdb(fpath2), p)
        phelix.append(p)
    else:
        print('sv'+str(i))
        pdb = md.load(fpath3)
        p = get_percent_helix(pdb)
        print(read_seq_from_pdb(fpath3), p)
        phelix.append(p)

sv1
['C' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H'
 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H'
 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'C']
('EKEKEKEKEKEKEKEKEKEKEKEKEKEKEKEKEKEKEKEKEKEKEKEKEK', 93.66100000000002) 0.96
sv2
['C' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H'
 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H'
 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'C']
('EEEKKKEEEKKKEEEKKKEEEKKKEEEKKKEEEKKKEEEKKKEEEKKKEK', 94.13320000000002) 0.96
sv3
['C' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H'
 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H'
 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'C']
('KEKKKEKKEEKKEEKEKEKEKEEKKKEEKEKEKEKKKEEKEKEEKKEEEE', 93.1002) 0.96
sv4
['C' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H'
 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H'