# Optimization Framework using auto-diff to predict binding rates


## Import the necessary modules

In [1]:
# make sure jupyter path is correct for loading local moudules
import sys
# path to steric_simulator module relative to notebook
sys.path.append("../../")
import copy
from KineticAssembly_AD import ReactionNetwork, VectorizedRxnNet, VecSim, Optimizer, EquilibriumSolver
import networkx as nx
import torch
from torch import DoubleTensor as Tensor

EnergyExplorer Module is not available. Check Rosetta installation. <ipykernel.iostream.OutStream object at 0x7f8037db9f10>


## Setup Reaction Network
#### Read the corresponding input file and call the ReactionNetwork class

In [2]:
base_input = './trimer.pwr'
rn = ReactionNetwork(base_input, one_step=True)
rn.resolve_tree()


['default_assoc', 1.0]
['A']
100.0
['B']
100.0
['C']
100.0
Parsing rule...
Parsing rule...
Parsing rule...
New node added - Node index: 3 ; Node label: AB 
New node added - Node index: 4 ; Node label: AC 
Trying internal bonds
New node added - Node index: 5 ; Node label: BC 
New node added - Node index: 6 ; Node label: ABC 
Trying internal bonds
Trying internal bonds
Trying internal bonds
Trying internal bonds
Trying internal bonds
Trying internal bonds
Reaction Network Completed


## Checking reaction network
Looping over all network nodes to check if all species are created
Creating a dictionary for later reference. This dictionary holds the reactants as keys and values as the reaction index

In [3]:
uid_dict = {}
sys.path.append("../")
import numpy as np
from reaction_network import gtostr
for n in rn.network.nodes():
    print(n,"--",gtostr(rn.network.nodes[n]['struct']))
    for k,v in rn.network[n].items():
        uid = v['uid']
        r1 = set(gtostr(rn.network.nodes[n]['struct']))
        p = set(gtostr(rn.network.nodes[k]['struct']))
        r2 = p-r1
        reactants = (r1,r2)
        uid_dict[(n,k)] = uid

print(uid_dict)

0 -- A
1 -- B
2 -- C
3 -- AB
4 -- AC
5 -- BC
6 -- ABC
{(0, 3): 0, (0, 4): 1, (0, 6): 5, (1, 3): 0, (1, 5): 2, (1, 6): 3, (2, 4): 1, (2, 5): 2, (2, 6): 4, (3, 6): 4, (4, 6): 3, (5, 6): 5}


## Set the initial parameter values 
For a hetero-trimer there are 6 reaction rates.
Also defines the Vectorized Rxn NEt class

In [4]:
new_kon = torch.zeros([rn._rxn_count], requires_grad=True).double()
new_kon = [5,5,5,5,5,5]

update_kon_dict = {}
for edge in rn.network.edges:
    update_kon_dict[edge] = new_kon[uid_dict[edge]]

nx.set_edge_attributes(rn.network,update_kon_dict,'k_on')
for edge in rn.network.edges:
    print(rn.network.get_edge_data(edge[0],edge[1]))


vec_rn = VectorizedRxnNet(rn, dev='cpu')
print(vec_rn.kon)

{'k_on': 5, 'k_off': None, 'lcf': 1, 'rxn_score': tensor([-20.], dtype=torch.float64), 'uid': 0}
{'k_on': 5, 'k_off': None, 'lcf': 1, 'rxn_score': tensor([-20.], dtype=torch.float64), 'uid': 1}
{'k_on': 5, 'k_off': None, 'lcf': 1, 'rxn_score': tensor([-40.], dtype=torch.float64), 'uid': 5}
{'k_on': 5, 'k_off': None, 'lcf': 1, 'rxn_score': tensor([-20.], dtype=torch.float64), 'uid': 0}
{'k_on': 5, 'k_off': None, 'lcf': 1, 'rxn_score': tensor([-20.], dtype=torch.float64), 'uid': 2}
{'k_on': 5, 'k_off': None, 'lcf': 1, 'rxn_score': tensor([-40.], dtype=torch.float64), 'uid': 3}
{'k_on': 5, 'k_off': None, 'lcf': 1, 'rxn_score': tensor([-20.], dtype=torch.float64), 'uid': 1}
{'k_on': 5, 'k_off': None, 'lcf': 1, 'rxn_score': tensor([-20.], dtype=torch.float64), 'uid': 2}
{'k_on': 5, 'k_off': None, 'lcf': 1, 'rxn_score': tensor([-40.], dtype=torch.float64), 'uid': 4}
{'k_on': 5, 'k_off': None, 'lcf': 1, 'rxn_score': tensor([-40.], dtype=torch.float64), 'uid': 4}
{'k_on': 5, 'k_off': None, 'lc

In [5]:
uid_dict = {}
sys.path.append("../")
import numpy as np
from reaction_network import gtostr
for n in rn.network.nodes():
    #print(n)
    #print(rn.network.nodes()[n])
    for k,v in rn.network[n].items():
        uid = v['uid']
        r1 = set(gtostr(rn.network.nodes[n]['struct']))
        p = set(gtostr(rn.network.nodes[k]['struct']))
        r2 = p-r1
        reactants = (r1,r2)
        uid_val = {'reactants':reactants,'kon':v['k_on'],'score':v['rxn_score'],'koff':v['k_off'],'uid':uid}
        if uid not in uid_dict.keys():
            uid_dict[uid] = uid_val
    print(gtostr(rn.network.nodes[n]['struct']))
    #for r_set in rn.get_reactant_sets(n):
    #    print(tuple(r_set))
    #print(rn.network[n]['struct'])
ind_sort = np.argsort(vec_rn.kon.detach().numpy())
for i in ind_sort:
    print(vec_rn.kon[i])
    print(uid_dict[i])

A
B
C
AB
AC
BC
ABC
tensor(5., dtype=torch.float64, grad_fn=<SelectBackward>)
{'reactants': ({'A'}, {'B'}), 'kon': 5, 'score': tensor([-20.], dtype=torch.float64), 'koff': None, 'uid': 0}
tensor(5., dtype=torch.float64, grad_fn=<SelectBackward>)
{'reactants': ({'A'}, {'C'}), 'kon': 5, 'score': tensor([-20.], dtype=torch.float64), 'koff': None, 'uid': 1}
tensor(5., dtype=torch.float64, grad_fn=<SelectBackward>)
{'reactants': ({'B'}, {'C'}), 'kon': 5, 'score': tensor([-20.], dtype=torch.float64), 'koff': None, 'uid': 2}
tensor(5., dtype=torch.float64, grad_fn=<SelectBackward>)
{'reactants': ({'B'}, {'C', 'A'}), 'kon': 5, 'score': tensor([-40.], dtype=torch.float64), 'koff': None, 'uid': 3}
tensor(5., dtype=torch.float64, grad_fn=<SelectBackward>)
{'reactants': ({'C'}, {'A', 'B'}), 'kon': 5, 'score': tensor([-40.], dtype=torch.float64), 'koff': None, 'uid': 4}
tensor(5., dtype=torch.float64, grad_fn=<SelectBackward>)
{'reactants': ({'A'}, {'C', 'B'}), 'kon': 5, 'score': tensor([-40.], dtyp

## Using the optimizer ##

### Define an instance of the optimizer class
#### Input Arguments:

reaction_network : Input the vectorized rxn network

sim_runtime: The runtime of the kinetic simulation. Needs to be same as the time over the experimental reaction data.

optim_iterations: No. of iterations to run the optimization. Can start at low values(100) and increase depending upon memory usage.

learning_rate = The size of the gradient descent step for updating parameter values. Needs to be atleast (1e-3-1e-1)* min{parameter value}. If learning rate is too high, it can take a longer step and sometimes lead to negative value of parameters which is unphysical. Requires some trial runs to find the best value. 

device: cpu or gpu

method: Choose which pytorch based optimized to use for gradient descent - Adam or RMSprop

mom: Only for RMSprop method. Use momentum term during gradient descent. 


In [6]:
vec_rn.reset(reset_params=True)
optim = Optimizer(reaction_network=vec_rn,
                  sim_runtime=1,
                  optim_iterations=10,
                  learning_rate=1e-2,
                  device='cpu',method="RMSprop")


Using CPU


### Call the optimization method

#### Input arguments

conc_scale, conc_thresh, mod_bool, mod_factor = As defined for the VecSim class. 

max_thresh: Max. allowed values of parameters being updated. Beyond this maximum a penalty is imposed on the cost function. (Regularization)

max_yield: It is a control variable that is used to store the updated parameter values over all iterations for further analysis. The parameter values are stored only if the current yield exceed this max_yield. 

yield_species: Yield of the species being optimized(node index)

In [7]:
optim.rn.update_reaction_net(rn)
optim.optimize(conc_scale=1,conc_thresh=1,mod_bool=True,mod_factor=10,max_thresh=1e8,max_yield=0,yield_species=-1)

Reaction Parameters before optimization: 
[Parameter containing:
tensor([5., 5., 5., 5., 5., 5.], dtype=torch.float64, requires_grad=True)]
Optimizer State: <bound method Optimizer.state_dict of RMSprop (
Parameter Group 0
    alpha: 0.99
    centered: False
    eps: 1e-08
    lr: 0.01
    momentum: 0
    weight_decay: 0
)>
Using CPU
Start of simulation: memory Used:  81.7
Next time:  tensor(1.1231, dtype=torch.float64, grad_fn=<AddBackward0>)
Yield on sim. iteration 0 was f67.3%.
current params: tensor([5., 5., 5., 5., 5., 5.], dtype=torch.float64)
Grad:  tensor([-0.0002,  0.0014,  0.0014, -0.0074, -0.0496, -0.0074],
       dtype=torch.float64)
Using CPU
Start of simulation: memory Used:  81.8
Next time:  tensor(2.2982, dtype=torch.float64, grad_fn=<AddBackward0>)
Yield on sim. iteration 1 was f68.7%.
current params: tensor([5.1000, 4.9000, 4.9000, 5.1000, 5.1000, 5.1000], dtype=torch.float64)
Grad:  tensor([-0.0124, -0.0084, -0.0131, -0.3562,  0.2452,  0.0839],
       dtype=torch.flo

<KineticAssembly_AD.vectorized_rxn_net.VectorizedRxnNet at 0x7f7f60c366d0>

## Track the loss function over optim iterations

In [None]:

import matplotlib.pyplot as plt
fig,ax = plt.subplots()


ax.plot(optim.mse_error)

ax.tick_params(labelsize='xx-large')

ax.set_xlabel("Iterations",fontsize=25)
ax.set_ylabel("MSE",fontsize=25)

# ax.legend(fontsize='large')


In [None]:
uid_dict = {}
sys.path.append("../")
import numpy as np
from reaction_network import gtostr
for n in rn.network.nodes():
    #print(n)
    #print(rn.network.nodes()[n])
    for k,v in rn.network[n].items():
        uid = v['uid']
        r1 = set(gtostr(rn.network.nodes[n]['struct']))
        p = set(gtostr(rn.network.nodes[k]['struct']))
        r2 = p-r1
        reactants = (r1,r2)
        uid_val = {'reactants':reactants,'kon':v['k_on'],'score':v['rxn_score'],'koff':v['k_off'],'uid':uid}
        if uid not in uid_dict.keys():
            uid_dict[uid] = uid_val
    print(gtostr(rn.network.nodes[n]['struct']))
    #for r_set in rn.get_reactant_sets(n):
    #    print(tuple(r_set))
    #print(rn.network[n]['struct'])
ind_sort = np.argsort(vec_rn.kon.detach().numpy())
for i in ind_sort:
    print(vec_rn.kon[i])
    print(uid_dict[i])

## Get all the parameter values

### This can be stored in a file for later analysis or used to find the best parameter value depending upon a condition. For e.g. the values that give a minimum error.

In [None]:
yields= []
final_params=[]
for i in range(len(optim.final_yields)):
    yields.append(optim.final_yields[i].item())
#     print(optim.final_solns[i].numpy())
    final_params.append(optim.final_solns[i].numpy())

sort_indx=np.argsort(np.array(yields))
sorted_yields=np.array(yields)#[sort_indx]
sorted_params = np.array(final_params)#[sort_indx]


print(sorted_params[-1])

### Can also plot the yield at the sim_runtime over all optim iterations

In [None]:
optim.plot_yield()