Steps before this:
1. Find a ligand you are interested in on PDB
2. Find its three letter code and query PDB using: Structure Attributes > Component Identifier - Has No Covalent Linkage > (Three letter code)
3. Click the "download" button and copy the names from the box to a text file.
4. Use the bash script get_ligand_sdf.sh and your PDB text file to download the .sdf files from PDB to your local directory.

In [1]:
from openbabel import pybel
import sys, os, glob
import subprocess

ligand = 'GLC'

if not os.path.exists(f'input/params/{ligand}'):
    os.makedirs(f'input/params/{ligand}', exist_ok=False)

inout_dir = f'input/params/{ligand}'
rosetta_dir = '../rosetta/source/scripts/python/public'

in_file = glob.glob(f'{inout_dir}/*_{ligand}.sdf')
for file in in_file:
    
    filename = os.path.splitext(os.path.basename(file))[0]
    dirname = os.path.dirname(file)
    
    mol = next(pybel.readfile('sdf',file))
    mol.OBMol.AddHydrogens(False, True, 7.0)

    mol.write('sdf',f'{dirname}/{filename}_H.sdf', overwrite=True)



In [None]:
# Location of script to convert molfile to a params file and its corresponding pdb file

in_file = glob.glob(f'{inout_dir}/*{ligand}_H.sdf')

for file in in_file:
    
    filename = os.path.splitext(os.path.basename(file))[0]
    
    move_params = f'mv {ligand}.params {inout_dir}/{ligand}.params'
    move_pdb = f'mv {ligand}_0001.pdb {inout_dir}/{filename}.pdb'

    molfile_to_params_cmd = f'python {rosetta_dir}/molfile_to_params.py {file} --name {ligand} --clobber ; {move_pdb} ; {move_params}'

    print(molfile_to_params_cmd)

    p = subprocess.Popen(molfile_to_params_cmd, shell=True)

In [4]:
all_pdbs = glob.glob(f'{inout_dir}/*{ligand}_H.pdb')
lines_in_file = []

for pdb_file in all_pdbs:

    with open(pdb_file, 'r') as pdb_count:
        
        num_lines = sum(1 for line in pdb_count)
        lines_in_file.append([num_lines])

print(lines_in_file)

[[25], [25], [25], [25], [25], [25], [25], [25], [25], [24], [25], [25], [25], [25], [25], [25], [25], [25], [24], [24], [24], [24], [24], [25], [25], [25], [25], [24], [25], [24], [25], [25], [25], [25], [25], [25], [25], [25], [25], [25], [25], [24], [25], [25], [25], [25], [25], [25], [25], [24], [25], [25], [25], [25], [25], [24], [25], [25], [25], [25], [25], [25], [25], [25], [25], [25], [25], [25], [25], [25], [24], [25], [25], [25], [25], [25], [23], [25], [25], [25], [25], [25], [25], [25], [25], [25], [25], [25], [25], [25], [25], [25], [25], [25], [25], [25], [25], [25], [25], [25], [25], [24], [25], [25], [25], [25], [25], [25], [25], [25], [25], [24], [25], [25], [25], [25], [25], [25], [25], [25], [25], [25], [25], [25], [24], [25], [25], [24], [24], [24], [25], [24]]


In [5]:
all_pdbs = glob.glob(f'{inout_dir}/*{ligand}_H.pdb')

if not os.path.exists(f'{inout_dir}/ignore'):
    os.mkdir(f'{inout_dir}/ignore')

expected_lines = 25

with open(f'{inout_dir}/{ligand}_conformers.pdb', 'w') as pdb_out:

    for pdb_file in all_pdbs:
        filename = os.path.basename(pdb_file)
        
        with open(pdb_file, 'r') as pdb_count:
            num_lines = sum(1 for line in pdb_count)
        
        if num_lines == expected_lines:
            
            with open(pdb_file, 'r') as pdb_in:

                for l in pdb_in:
                    pdb_out.write(l)

        else:
            os.rename(pdb_file, f'{inout_dir}/ignore/{filename}')

In [6]:
pdb_dir = 'input/pdbs'

if not os.path.exists(f'{pdb_dir}/{ligand}'):
    os.makedirs(f'{pdb_dir}/{ligand}', exist_ok=False)

all_pdbs = glob.glob(f'{inout_dir}/*{ligand}_H.pdb')

for curr_pdb in all_pdbs:
    
    curr_filename = os.path.basename(curr_pdb).strip('_H.pdb')
    comb_file = f'{pdb_dir}/{ligand}/{curr_filename}.pdb'

    with open(f'{pdb_dir}/ALA.pdb','r') as f1, open(curr_pdb,'r') as f2, open(comb_file, 'w') as f_out:
        for l in f1:
            f_out.write(l)

        for l in f2:
            f_out.write(l)
            
