Calculating all paths, energies

In [1]:
from openbabel import openbabel

In [2]:
from rdkit import Chem
from rdkit.Chem import rdBase
from rdkit.Chem import Draw
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem import Descriptors
from rdkit.Chem import Crippen

In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as ss

In [4]:
from treelib import Node, Tree

In [5]:
from ast import literal_eval

In [6]:
class Molecule(object):
    def __init__(self, path):
        self.path = path

In [7]:
def split(word):
    return[char for char in word]

In [8]:
def tree_copier(tree, identifier_factor):
    tree_copy = Tree()
    nodes = tree.all_nodes()
    for i in range(len(nodes)):
        dummy_tag = nodes[i].tag
        dummy_identifier = nodes[i].identifier
        dummy_data = nodes[i].data
        try:
            dummy_parent = (tree.parent(dummy_identifier)).identifier 
        except:
            dummy_parent = -1
        if dummy_parent == -1:
            tree_copy.create_node(dummy_tag, (dummy_identifier+identifier_factor), data = dummy_data)
        else:
            tree_copy.create_node(dummy_tag, (dummy_identifier+identifier_factor), parent=(dummy_parent+identifier_factor), data = dummy_data)
    return(tree_copy)

In [9]:
def index_finder(Product, rels, path):
    indexes = []
    energies = []
    for i in range(len(rels['Index'])):
        place = literal_eval(rels['Products'][i])
        for j in range(len(place)):
            if place[j] == Product:
                indexes.append(rels['Index'][i])
                energies.append(rels['Energy Change'][i])
    valid_indexes = []
    valid_energies = []
    for i in range(len(indexes)):
        valid = True
        precursors = precursor_finder(indexes[i], rels)
        for j in range(len(precursors)):
            if precursors[j] in path:
                valid = False
                break
        if valid == True:
            valid_indexes.append(indexes[i])
            valid_energies.append(energies[i]) 
    return(valid_indexes, valid_energies)

In [10]:
def precursor_finder(index, rels):
    precursors = []
    for i in range(len(rels['Index'])):
        if rels['Index'][i] == index:
            dummy = literal_eval(rels['Reagents'][i])
            for j in range(len(dummy)):
                precursors.append(dummy[j])
    return(precursors)

In [11]:
def find_component(node, rels):
    Product = node.tag
    path = split((node.data).path)
    indexes, energy_changes = index_finder(Product, rels, path)
    precursors = []
    for i in range(len(indexes)):
        precursors.append(precursor_finder(indexes[i], rels))
    return(precursors, energy_changes)

In [12]:
def map_tree(Smiles, rels):
    rels = pd.read_csv(rels, sep='\t')
    #base_molecules = ['N', 'C=O', 'C(CO)=O', 'O'] #FormoseAmm
    base_molecules = ['C=O', 'C(CO)=O', 'O'] #Formose
    #base_molecules = ['N', 'C(C(C(C(C(CO)O)O)O)O)=O', 'O'] #GlucoseAmm
    #base_molecules = ['C(C(C(C(C(CO)O)O)O)O)=O', 'O'] #Glucose
    #base_molecules = ['C(C(C)=O)(O)=O', 'O'] #PyruvicAcid
    all_trees = []
    all_energies = []
    tree_statuses = []
    tree1 = Tree()
    tree1.create_node(Smiles, 0, data=Molecule(Smiles))
    all_trees.append(tree1)
    all_energies.append(0)
    tree_statuses.append(False)
    
    complete = False
    while complete == False:
        for i in range(len(all_trees)):
            if tree_statuses[i] == False:
                finished = False
            current_nodes = all_trees[i].all_nodes()
            current_depths = []
            for j in range(len(current_nodes)):
                current_depths.append(all_trees[i].depth(current_nodes[j]))
            level_counter = max(current_depths)
            node_counter = len(current_nodes)-1
            path_energy = all_energies[i]
            while finished == False:
                nodes = all_trees[i].all_nodes()
                active_nodes = []
                nodes_with_children = []
                for k in range(len(nodes)):
                    try:     
                        temp = (all_trees[i].parent(nodes[k].identifier)).identifier
                    except:
                        temp = -1
                    if temp != -1:
                        nodes_with_children.append(temp)
                for k in range(len(nodes)):
                    tag = nodes[k].tag
                    level = all_trees[i].depth(nodes[k])
                    if nodes[k].identifier not in nodes_with_children and tag not in base_molecules:
                        active_nodes.append(nodes[k])
                if active_nodes == []:
                    finished = True
                    all_energies[i] = path_energy
                    break
                else:
                    for z in range(len(active_nodes)):
                        precursors, energy_changes = find_component(active_nodes[z], rels)
                        product = active_nodes[z].identifier
                        dummy = (active_nodes[z].data).path
                        place = []
                        num_trees = len(all_trees)
                        for m in range(len(dummy)):
                            place.append(dummy[m])
                        if len(precursors) > 1:
                            for p in range(1, len(precursors)):
                                tree = tree_copier(all_trees[i], (num_trees*1000))
                                dummy_energy = path_energy
                                dummy_product = product 
                                all_energies.append(dummy_energy + energy_changes[p])
                                tree_statuses.append(False)
                                dummy_node_counter = num_trees*1000 + node_counter
                                for q in range(len(precursors[p])):
                                    dummy_node_counter += 1
                                    tree.create_node(precursors[p][q], dummy_node_counter, parent=(product+num_trees*1000), data=Molecule(place + [precursors[p][q]])) 
                                all_trees.append(tree)
                                num_trees+=1
                        path_energy += energy_changes[0]
                        for n in range(len(precursors[0])):
                            node_counter +=1
                            all_trees[i].create_node(precursors[0][n], node_counter, parent=product, data=Molecule(place + [precursors[0][n]])) 
                    level_counter+=1
            if finished == True:
                tree_statuses[i] = True
        #for i in range(len(all_trees)):
            #all_trees[i].show()
        #print(tree_statuses)
        dummy = True
        for i in range(len(tree_statuses)):
            if tree_statuses[i] == False:
                dummy = False
        if dummy == True:
            complete = True
        data = {'Tree':all_trees, 'Path Energy':all_energies}
        df = pd.DataFrame(data)
        df.to_csv('Testdf.csv', header=None, index=None, sep='\t', mode='a')
    return(df)

In [13]:
%%time
a = map_tree('C(C(C(CO)O)O)=O', './RelsDataWithThermo/Formose/Formose_1RelsWithThermo.tsv')

CPU times: user 9.26 ms, sys: 2.62 ms, total: 11.9 ms
Wall time: 19.2 ms


In [34]:
a['Path Energy'][1]

-16.39

In [37]:
a['Tree'][0].show(data_property='path')

C(C(C(CO)O)O)=O
├── ['C', '(', 'C', '(', 'C', '(', 'C', 'O', ')', 'O', ')', 'O', ')', '=', 'O', 'C(CO)=O']
└── ['C', '(', 'C', '(', 'C', '(', 'C', 'O', ')', 'O', ')', 'O', ')', '=', 'O', 'C(CO)=O']



In [59]:
def parser(string): #read in paths from csv file and display nicely
    chars = list(string)
    newline_indexes = []
    for i in range(len(chars)):
        if chars[i] == '\n':
            newline_indexes.append(i)
    dummy = ''
    for i in range(0, newline_indexes[0]):
        dummy += chars[i]
    print(dummy)
    for i in range(len(newline_indexes)-1):
        dummy = ''
        for j in range(newline_indexes[i]+1, newline_indexes[i+1]):
            dummy += chars[j]
        print(dummy)

In [572]:
df1 = pd.read_csv('./TestDF.csv', sep='\t')

In [575]:
parser(df1['Tree'][0])

A
├── D
│   └── I
└── E
    └── K
