# Optimization Framework using auto-diff to optimize binding rates




## Import necessary modules

Every Jupyter Notebook requires the path to the KineticAssembly_AD modules (.py files in the root directory) to be mentioned. This can be done by adding the path to the 'PATH' variable of the system environment. 

Additonal modules are also imported which are required to run any analysis.

In [1]:
# make sure jupyter path is correct for loading local moudules
import sys
path_to_repo="C:\\Users\\denys\\AMGEN\\"   
#Insert your path here
# path_to_repo=""
sys.path.append(path_to_repo)

import copy
from KineticAssembly_AD import ReactionNetwork, VectorizedRxnNet, VecSim, Optimizer, EquilibriumSolver
import networkx as nx
import torch
from torch import DoubleTensor as Tensor

## Setup Reaction Network
Before we begin to run the optimization routine, we need to create a Reaction Network that stores all the parameters required to run a simulation and other routines. The Reaction Network can be created by reading an input file. More information on how to create an input file can be found in the User Guide. 

Here a simple trimer model is used to run a simulation.
#### Read the corresponding input file and call the ReactionNetwork class

In [2]:
base_input = './tetramer_titration_multi.pwr'
rn = ReactionNetwork(base_input, one_step=True)
rn.resolve_tree()

['default_assoc', 1.0]
['titration_time_int', 100]
Setting Titration End Point
['monomer_add_only', True]
Found Creation rxn
Found Creation rxn
[(0, {'struct': <networkx.classes.graph.Graph object at 0x0000021D7BC87908>, 'copies': tensor([0.], dtype=torch.float64), 'subunits': 1}), (1, {'struct': <networkx.classes.graph.Graph object at 0x0000021D73E76390>, 'copies': tensor([0.], dtype=torch.float64), 'subunits': 1}), (2, {'struct': <networkx.classes.graph.Graph object at 0x0000021D73E764E0>, 'copies': tensor([100.], dtype=torch.float64), 'subunits': 1}), (3, {'struct': <networkx.classes.graph.Graph object at 0x0000021D73E76160>, 'copies': tensor([100.], dtype=torch.float64), 'subunits': 1})]
New node added - Node index: 4 ; Node label: AM 
New node added - Node index: 5 ; Node label: AB 
New node added - Node index: 6 ; Node label: AS 
New node added - Node index: 7 ; Node label: BM 
New node added - Node index: 8 ; Node label: MS 
New node added - Node index: 9 ; Node label: ABM 
New 

## Checking reaction network

The ReactionNetwork is a networkx object which creates a graph network with each node as species that can be present in the system according to the binding rules given in the input file. Each node has a unique index number that can be used to access attributes stored for that species. Each edge represents a reaction and is associated with a unique reaction_id, on and off rates and the dG value for that reaction.


After creating a Reaction Network we can looping over all network nodes to check if all species are created
Creating a dictionary for later reference. This dictionary holds the reactants as keys and values as the reaction index

In [3]:
uid_dict = {}
sys.path.append("../")
import numpy as np
from reaction_network import gtostr

print("Species present in the Reaction Network: ")
print("%3s  %2s  %2s" %("Index","--",'Species'))
for n in rn.network.nodes():
    print("%3d  %4s  %-6s" %(n,"--",gtostr(rn.network.nodes[n]['struct'])))
    for k,v in rn.network[n].items():
        uid = v['uid']
        r1 = set(gtostr(rn.network.nodes[n]['struct']))
        p = set(gtostr(rn.network.nodes[k]['struct']))
        r2 = p-r1
        reactants = (r1,r2)
        uid_dict[(n,k)] = uid

print()
print("Total Number of Reactions: ",rn._rxn_count)
print("Total Number of Species: ",len(rn.network.nodes()))
        
# Dictionary that stores source,destination of an edge and maps it to its unique id
#Key : (First Reactant, Product)
#Value : (Reaction_id)
print()
print(uid_dict)

Species present in the Reaction Network: 
Index  --  Species
  0    --  A     
  1    --  M     
  2    --  B     
  3    --  S     
  4    --  AM    
  5    --  AB    
  6    --  AS    
  7    --  BM    
  8    --  MS    
  9    --  ABM   
 10    --  AMS   
 11    --  BS    
 12    --  ABS   
 13    --  BMS   
 14    --  ABMS  

Total Number of Reactions:  24
Total Number of Species:  15

{(0, 4): 0, (0, 5): 1, (0, 6): 2, (0, 9): 16, (0, 10): 17, (0, 12): 18, (0, 14): 21, (1, 4): 0, (1, 7): 3, (1, 8): 4, (1, 9): 5, (1, 10): 6, (1, 13): 19, (1, 14): 20, (2, 5): 1, (2, 7): 3, (2, 11): 7, (2, 9): 8, (2, 12): 9, (2, 13): 10, (2, 14): 11, (3, 6): 2, (3, 8): 4, (3, 11): 7, (3, 10): 12, (3, 12): 13, (3, 13): 14, (3, 14): 15, (4, 9): 8, (4, 10): 12, (5, 9): 5, (5, 12): 13, (6, 10): 6, (6, 12): 9, (7, 13): 14, (7, 9): 16, (8, 13): 10, (8, 10): 17, (9, 14): 15, (10, 14): 11, (11, 12): 18, (11, 13): 19, (12, 14): 20, (13, 14): 21}


## Set the initial parameter values 
The next step is to define the initial conditions for the simulation. The initial concentrations are specified from the input file. However, the initial value of the association rates can be specified either through the input file 

From the user_input file, currently the code only allows 1 value to be read (from default_assoc parameter).

To set starting rates to different values the next code block takes in a list/array of all rxn rates and updates them in the reaction network object.

For a hetero-trimer there are 6 reaction rates.
Also defines the Vectorized Rxn Net class

In [4]:
#Define an empty torch tensor with length equal to number of reactions
new_kon = torch.zeros([rn._rxn_count], requires_grad=True).double()

#Set the initial value of the association rates
#Note that this code sets all association rates at the same value
'''
#To set individual rates to different values, we need to create an list/array with different values.
length = rn._rxn_count
min_val = 0.1
max_val = 3.0
init_val = []

for i in range(length):
    # Linearly interpolate the current maximum from min_val up to max_val
    current_max = min_val + (i / (length - 1)) * (max_val - min_val)
    # Draw a random float uniformly between min_val and current_max
    val = np.random.uniform(min_val, current_max)
    init_val.append(val)
'''
#Else we could assign all initial values to be equal to 1; performs bad for lower indeces
init_val = 1


new_kon = new_kon + Tensor([init_val])
print("Len of new_kon", len(new_kon))
update_kon_dict = {}
for edge in rn.network.edges:
    update_kon_dict[edge] = new_kon[uid_dict[edge]]

nx.set_edge_attributes(rn.network,update_kon_dict,'k_on')
for edge in rn.network.edges:
    print(rn.network.get_edge_data(edge[0],edge[1]))

vec_rn = VectorizedRxnNet(rn, dev='cpu')
print(vec_rn.kon)

Len of new_kon 24
{'k_on': tensor(1., dtype=torch.float64, grad_fn=<SelectBackward>), 'k_off': None, 'lcf': 1, 'rxn_score': tensor([-20.], dtype=torch.float64), 'uid': 0}
{'k_on': tensor(1., dtype=torch.float64, grad_fn=<SelectBackward>), 'k_off': None, 'lcf': 1, 'rxn_score': tensor([-20.], dtype=torch.float64), 'uid': 1}
{'k_on': tensor(1., dtype=torch.float64, grad_fn=<SelectBackward>), 'k_off': None, 'lcf': 1, 'rxn_score': tensor([-20.], dtype=torch.float64), 'uid': 2}
{'k_on': tensor(1., dtype=torch.float64, grad_fn=<SelectBackward>), 'k_off': None, 'lcf': 1, 'rxn_score': tensor([-40.], dtype=torch.float64), 'uid': 16}
{'k_on': tensor(1., dtype=torch.float64, grad_fn=<SelectBackward>), 'k_off': None, 'lcf': 1, 'rxn_score': tensor([-40.], dtype=torch.float64), 'uid': 17}
{'k_on': tensor(1., dtype=torch.float64, grad_fn=<SelectBackward>), 'k_off': None, 'lcf': 1, 'rxn_score': tensor([-40.], dtype=torch.float64), 'uid': 18}
{'k_on': tensor(1., dtype=torch.float64, grad_fn=<SelectBackw

In [5]:
uid_dict = {}
sys.path.append("../")
import numpy as np
from reaction_network import gtostr

print("Species present in the Reaction Network: ")
print("%3s  %2s  %2s" %("Index","--",'Species'))

for n in rn.network.nodes():
    #print(n)
    #print(rn.network.nodes()[n])
    print("%3d  %4s  %-6s" %(n,"--",gtostr(rn.network.nodes[n]['struct'])))
    for k,v in rn.network[n].items():
        uid = v['uid']
        r1 = set(gtostr(rn.network.nodes[n]['struct']))
        p = set(gtostr(rn.network.nodes[k]['struct']))
        r2 = p-r1
        reactants = ("".join(list(r1)),"".join(list(r2)))
        uid_val = {'reactants':reactants,'kon':v['k_on'],'score':v['rxn_score'],'koff':v['k_off'],'uid':uid}
        if uid not in uid_dict.keys():
            uid_dict[uid] = uid_val

print()
print("Initial Binding Rates: ")

ind_sort = np.argsort(vec_rn.kon.detach().numpy())
print("%-16s%-3s %12s" %("Reaction","Id","kon"))
print()
   
for i in ind_sort:
    print("%-4s%1s%4s %7d \t%8.2f" %(uid_dict[i]['reactants'],"+",uid_dict[i]['reactants'][1],uid_dict[i]['uid'],vec_rn.kon[i].item()))
    

Species present in the Reaction Network: 
Index  --  Species
  0    --  A     
  1    --  M     
  2    --  B     
  3    --  S     
  4    --  AM    
  5    --  AB    
  6    --  AS    
  7    --  BM    
  8    --  MS    
  9    --  ABM   
 10    --  AMS   
 11    --  BS    
 12    --  ABS   
 13    --  BMS   
 14    --  ABMS  

Initial Binding Rates: 
Reaction        Id           kon



KeyError: 23

## Using the optimizer ##

### Define an instance of the optimizer class
#### Input Arguments:

reaction_network : Input the vectorized rxn network

sim_runtime: The runtime of the kinetic simulation. Needs to be same as the time over the experimental reaction data.

optim_iterations: No. of iterations to run the optimization. Can start at low values(100) and increase depending upon memory usage.

learning_rate = The size of the gradient descent step for updating parameter values. Needs to be atleast (1e-3-1e-1)* min{parameter value}. If learning rate is too high, it can take a longer step and sometimes lead to negative value of parameters which is unphysical. Requires some trial runs to find the best value. 

device: cpu or gpu

method: Choose which pytorch based optimized to use for gradient descent - Adam or RMSprop

mom: Only for RMSprop method. Use momentum term during gradient descent. 


In [6]:
# import torch.utils.benchmark as benchmark
import time as time_mod

t1 = time_mod.perf_counter()

In [7]:
vec_rn.reset(reset_params=True)
optim = Optimizer(reaction_network=vec_rn,
                  sim_runtime=1,
                  optim_iterations=100,
                  learning_rate=1e-2,
                  device='cpu',method="RMSprop")


### Call the optimization method

#### Input arguments

conc_scale: Controls the conc step at each iteration. Since the numerical integration is not performed over fixed time steps but over fixed conc. steps. For e.g. for a value of 1uM, at each iteration step a total of app. 1uM is reacted (includes all species). Can be run using the default value. A general rule is use conc_scale = 0.01 * Max_yield

conc_thresh: This can be used to periodically decrease the conc_scale parameter. After each iteration if the conc_scale is greater than the conc_thresh, then the conc_scale is decreased by mod_factor. Can be run using the default value. 

mod_bool: This argument is necessary to fix the mass balance criteria. Sometimes if the conc_scale is large, then the simulation can lead to a higher consumption of a particular species which is very low in conc, and create more of this species out of nothing. Default value:True

max_thresh: Max. allowed values of parameters being updated. Beyond this maximum a penalty is imposed on the cost function. (Regularization)

max_yield: It is a control variable that is used to store the updated parameter values over all iterations for further analysis. The parameter values are stored only if the current yield exceed this max_yield. 

yield_species: Yield of the species being optimized(node index)

In [8]:
optim.rn.update_reaction_net(rn)
optim.optimize(conc_scale=1e-1,conc_thresh=1e-1,mod_bool=True,mod_factor=10,max_thresh=1e8,max_yield=0)

Reaction Parameters before optimization: 
[Parameter containing:
tensor([1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 0.1000, 0.1000], dtype=torch.float64,
       requires_grad=True)]
Optimizer State: <bound method Optimizer.state_dict of RMSprop (
Parameter Group 0
    alpha: 0.99
    centered: False
    eps: 1e-08
    lr: 0.01
    momentum: 0
    weight_decay: 0
)>
Using CPU
[0.10180916 0.10180916]
Yield on sim. iteration 0 was 0.09%.
current params: tensor([1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 0.1000, 0.1000], dtype=torch.float64)
tensor(0.0009, dtype=torch.float64, grad_fn=<DivBackward0>)
Using CPU
[2.93430014e-05 2.93430014e-05]
Yield on sim. iteration 1 was 3.23%.
current params: ten

KeyboardInterrupt: 

In [None]:
t2 = time_mod.perf_counter()
print("Time taken for complete analysis: %.4f" %(t2-t1))

## Track the yield over optim iterations

In [None]:
optim.plot_yield()

## Get all the parameter values

### This can be stored in a file for later analysis or used to find the best parameter value depending upon a condition. For e.g. the values that give a minimum error.

In [None]:
yields= []
final_params=[]

final_t50 = []
final_t85 = []
final_t95 = []
final_t99 = []

for i in range(len(optim.final_yields)):
    yields.append(optim.final_yields[i])
    final_params.append(optim.final_solns[i])
    
    #Storing the different time points it reaches a particular yield threshold
    if optim.final_t85[i] == -1:
        final_t85.append(1) 
    else:
        final_t85.append(optim.final_t85[i]) 
    if optim.final_t95[i] == -1:
        final_t95.append(1)
    else:
        final_t95.append(optim.final_t95[i])


final_yield_arr = np.array(yields)
final_param_arr = np.array(final_params)
final_t85 = np.array(final_t85)
final_t95 = np.array(final_t95)

### Plotting the ratio of ktri vs kdim

In [None]:
import matplotlib.pyplot as plt

mask_r = final_yield_arr > 0.1

#Calculate the ratio
ratio = final_param_arr[:,-1]/final_param_arr[:,0]

#Normalize the time scale (t = t*conc*max_rate)
conc=vec_rn.initial_copies[0].item()
scale_time = final_t95[mask_r]*conc*np.max(final_param_arr[mask_r],axis=1)
#Calculate the y_per_time
y_per_time = 0.95/scale_time

fig,ax = plt.subplots(figsize=(8,6))
ax.plot(ratio,y_per_time,linestyle='',marker='o')
ax.set_ylabel("Efficiency",fontsize=20)
ax.set_xlabel("Ratio",fontsize=20)
ax.tick_params(labelsize=25)


In [None]:
max_indx = np.argmax(y_per_time)
max_ratio = ratio[max_indx]
max_rates = final_param_arr[max_indx]
print("Ratio with maximum efficiency: ",max_ratio)

reaction_rates = np.zeros(rn._rxn_count)
counter=0
for cls,uids in vec_rn.rxn_class.items():
    for rid in uids:
        reaction_rates[rid]=max_rates[counter]
    counter+=1

print("Optimal Rates: ",list(reaction_rates))


In [None]:
import pandas as pd
print(pd.__version__)