In [23]:
%load_ext autoreload
%autoreload 2
from src.chem_draw import draw_pwy_svg
from src.utils import ensure_dirs
from svgutils import compose as sc
import pickle
import numpy as np
import subprocess
import pandas as pd

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [24]:
# Params
starters = '2mg'
targets = 'mvacid'
generations = 2

expansion_dir = '../data/processed_expansions/'
fn = f"{starters}_to_{targets}_gen_{generations}_tan_sample_1_n_samples_1000.pkl" # Expansion file name

# Load processed expansions
with open(expansion_dir + fn, 'rb') as f:
    pe = pickle.load(f)


In [25]:
print(pe.starter_target_pairs)

{('2mg', 'mvacid')}


In [26]:
starter = '2mg'
target = 'mvacid'
sort_by = ['prc_mcs']
filter_by = {'mdf':0, 'enzyme_validation':1.0}

paths = pe.get_paths_w_st(starter=starter,
                  target=target,
                  sort_by=sort_by,
                  filter_by=filter_by
                  )

In [22]:
paths

[<src.post_processing.Path at 0x7f6c0398e550>,
 <src.post_processing.Path at 0x7f6c0398e690>,
 <src.post_processing.Path at 0x7f6c0398e190>,
 <src.post_processing.Path at 0x7f6c0398e890>,
 <src.post_processing.Path at 0x7f6c0398e7d0>]

In [31]:
st_pair = (starter, target)
sheets = []
sheetnames = []

# Generate pwy svgs & csv
print("Generating svgs & csv")
headers = ['starter', 'target', 'mdf', 'pwy_ave_mcs'] + [f"mcs_rxn_{g+1}" for g in range(generations)]
pwy_svg_outdir = f"../artifacts/pwy_svgs/{fn[:-3]}/{'_'.join(st_pair)}/"
ensure_dirs(pwy_svg_outdir)

rows_for_sheet = []
for path in paths:
    row = [] # One path
    prc_mcs = pe.get_prc_mcs(path) # Top-analogue prc_mcs for each predicted reaction in path 
    row += [path.starter, path.target, path.mdf, sum(prc_mcs) / len(prc_mcs)]

    for g in range(generations):
        if g < len(prc_mcs):
            row.append(prc_mcs[g])
        else:
            row.append(None)

    sma_hash_pairs = []
    for prid in path.reaction_ids:
        pr = pe.predicted_reactions[prid]
        analogue = pr.top_analogue()['analogue']
        sma_hash_pairs.append([(pr.smarts, prid), (analogue.smarts, analogue.id)])
    
    outpath = pwy_svg_outdir + f"{path.id:04}.svg"
    draw_pwy_svg(sma_hash_pairs, outpath)

    rows_for_sheet.append(row)

# Make df for this st pair
sheets.append(pd.DataFrame(rows_for_sheet, columns=headers))
sheetnames.append("_".join(st_pair))

# Convert svgs to pdfs
print("Generating pdfs")
tmp_pdf_outdir = f"../artifacts/tmp_pdfs/{fn[:-3]}/{'_'.join(st_pair)}/"
ensure_dirs(tmp_pdf_outdir)
for path in paths:
    cmd = ["inkscape", f"--export-pdf={tmp_pdf_outdir}{path.id:04}.pdf", f"{pwy_svg_outdir}{path.id:04}.svg"]
    subprocess.run(cmd)

# Concatenate pdfs
print("Concatenating pdfs")
pwy_pdf_outdir = f"../artifacts/pwy_pdfs/{fn[:-3]}/"
ensure_dirs(pwy_pdf_outdir)
cat_pdf_fn = pwy_pdf_outdir + '_'.join(st_pair) + '.pdf'
individual_pwys = [f"{tmp_pdf_outdir}{path.id:04}.pdf" for path in paths]

cmd = ["pdfunite", *individual_pwys, cat_pdf_fn]
subprocess.run(cmd)

# Concatenate sheets into xls
print("Saving xlsx")
writer = pd.ExcelWriter(f"../artifacts/pwy_xls/{fn[:-3]}" + '.xlsx') # Arbitrary output name
for i, df in enumerate(sheets):
    df.to_excel(writer,sheet_name=sheetnames[i])
writer.save()

# Remove stuff
dirs = ['../artifacts/' + elt for elt in ['tmp_pdfs', 'rxn_svgs', 'pwy_svgs', 'mol_svgs']]
for elt in dirs:
    subprocess.run([f"rm -r {elt}/*"], shell=True)

Generating svgs & csv
Generating pdfs


Failed to get connection
** (org.inkscape.Inkscape:220291): CRITICAL **: 18:12:14.027: dbus_g_proxy_new_for_name: assertion 'connection != NULL' failed

** (org.inkscape.Inkscape:220291): CRITICAL **: 18:12:14.027: dbus_g_proxy_call: assertion 'DBUS_IS_G_PROXY (proxy)' failed

** (org.inkscape.Inkscape:220291): CRITICAL **: 18:12:14.027: dbus_g_connection_register_g_object: assertion 'connection != NULL' failed
Failed to get connection
** (org.inkscape.Inkscape:220309): CRITICAL **: 18:12:14.327: dbus_g_proxy_new_for_name: assertion 'connection != NULL' failed

** (org.inkscape.Inkscape:220309): CRITICAL **: 18:12:14.327: dbus_g_proxy_call: assertion 'DBUS_IS_G_PROXY (proxy)' failed

** (org.inkscape.Inkscape:220309): CRITICAL **: 18:12:14.327: dbus_g_connection_register_g_object: assertion 'connection != NULL' failed


Concatenating pdfs
Saving xlsx
