In [15]:
import pandas as pd
from rdkit.Chem import PandasTools
import os

## 1. Download hits from Fragalysis
See http://hippo.winokan.com/en/latest/fragalysis.html#hippo.fragalysis.download_target 
## 2. Prepare `templates` folder by moving the **apo-desolv.pdbs there
## 3. Prepare master CSV from SDF

In [16]:
# prepare master csv
orig = PandasTools.LoadSDF('scaffolds_354.sdf')
rows = []
for i, row in orig.iterrows():
    if i == 0:
        continue
    hits: list[str] = row['ref_mols'].split(',')
    if len(hits) > 2:
        print(f'{i} has more than 2 hits')
    new_row = {
        'smiles': row['original SMILES'],
        'compound_set': row['ID'],
        'template': row['ref_pdb']
    }
    for i, hit in enumerate(hits):
        new_row[f'hit{i+1}'] = hit
    rows.append(new_row.copy())
df = pd.concat([pd.DataFrame(rows)])
# make df smaller for example purposes
df = df.head()
df.to_csv('syndirella_master.csv', index=False)
df

Unnamed: 0,smiles,compound_set,template,hit1,hit2
0,Cn1nccc1C(=O)NCc1csc(-c2ncn[nH]2)n1,CHIKVMac-DLS-JA001,cx0281a,cx0892a,cx0281a
1,Cc1nsc(N[C@@H]2CCC[C@H]2c2csc(-c3nc[nH]n3)n2)n1,CHIKVMac-DLS-JA002,cx0281a,cx0406a,cx0281a
2,CCc1nc(NC(=O)c2ccnc(NC3CC=CC3)c2)no1,CHIKVMac-DLS-JA003,cx0300a,cx0441a,cx0316a
3,O=C1CC[C@H](Cn2cnccc2=O)N1,CHIKVMac-DLS-JA004,cx0300a,cx0314a,cx1114a
4,O=c1ccncn1C[C@H]1CCS(=O)(=O)C1,CHIKVMac-DLS-JA005,cx0300a,cx0314a,cx1182a


## 5. Save each row to seperate CSV

In [17]:
# save each row of master csv as a separate csv
for i, row in df.iterrows():
    # keep row names
    row_df = row.to_frame().T
    os.makedirs('syndirella_input', exist_ok=True)
    row_df.to_csv(f'syndirella_input/syndirella_input{i}.csv', index=True)

## 6. Add correct file paths to job script template at template.sh
## 7. Replace `NUM` in template.sh with the correct number of the job

In [18]:
# Define the file path
file_path = 'template.sh'

# Define the string to search and the replacement string
search_string = 'NUM'

for i in range(len(df)):
    replacement_string = str(i)
    
    # Open the file, read its contents, replace the occurrences, and write it back
    with open(file_path, 'r') as file:
        file_contents = file.read()
    
    # Replace the occurrences of the search string with the replacement string
    new_contents = file_contents.replace(search_string, replacement_string)
    
    new_path = f'jobs/job{i}.sh'
    
    # If you want to write to a new file, you can do:
    with open(new_path, 'w') as new_file:
        new_file.write(new_contents)

print("Replacement complete.")

Replacement complete.


## 7. Create a job list

In [19]:
with open('job_list.txt', 'w') as f:
    for i in range(len(df)):
        f.write(f'job{i}.sh\n')

## 8. Install syndirella into conda environment you'll use to run
## 9. Run the jobs using `run_jobs.sh`

```
chmod +x run_jobs.sh
./run_jobs.sh job_list.txt /path/to/job_directory /path/to/log_directory
```