# A notebook to perform a genetic algorithm for conformer analysis. This is currently being implemented on just molecules.

In [1]:
import os
import sys
import logging
FORMAT = "%(filename)s:%(lineno)d %(funcName)s %(levelname)s %(message)s"
logging.basicConfig(format=FORMAT, level=logging.INFO)

import re
import imp
import itertools
import random
import numpy as np
from numpy import array
import pandas as pd
import matplotlib
matplotlib.use('Agg')
%matplotlib inline
from matplotlib import pyplot as plt
import seaborn as sns


# do this before we have a chance to import openbabel!
import rdkit, rdkit.Chem.rdDistGeom, rdkit.DistanceGeometry

from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit import rdBase

import py3Dmol

from rmgpy.molecule import Molecule
from rmgpy.species import Species
from rmgpy.reaction import Reaction


from multi_molecule import *
from multi_reaction import *

from ase.calculators.morse import * #chosing this calculator for now because it's fast
from ase.calculators.dftb import *
from ase.calculators.lj import *
from ase.calculators.emt import *

from copy import deepcopy

thermo.py:760 loadLibraries INFO Loading thermodynamics library from CBS_QB3_1dHR.py in /Users/nathan/Code/RMG-database/input/thermo/libraries...
thermo.py:760 loadLibraries INFO Loading thermodynamics library from KlippensteinH2O2.py in /Users/nathan/Code/RMG-database/input/thermo/libraries...
thermo.py:760 loadLibraries INFO Loading thermodynamics library from primaryThermoLibrary.py in /Users/nathan/Code/RMG-database/input/thermo/libraries...
thermo.py:760 loadLibraries INFO Loading thermodynamics library from thermo_DFT_CCSDTF12_BAC.py in /Users/nathan/Code/RMG-database/input/thermo/libraries...
thermo.py:774 loadGroups INFO Loading thermodynamics group database from /Users/nathan/Code/RMG-database/input/thermo/groups...
transport.py:294 loadGroups INFO Loading transport group database from /Users/nathan/Code/RMG-database/input/transport/groups...
database.py:165 loadFamilies INFO Loading the user-specified kinetics families from /Users/nathan/Code/RMG-database/input/kinetics/famil

In [2]:
mol= Multi_Molecule("COCCNC")
mol.view_mol()

In [3]:
possible_dihedrals = np.arange(0, 360, 30)


calc = EMT()
mol.ase_molecule.set_calculator(calc)

population_size = 100

population = []

for i in range(population_size):
    mol_copy = deepcopy(mol)
    dihedrals = []
    
    for torsion in mol_copy.torsions:
        dihedral = np.random.choice(possible_dihedrals)
        dihedrals.append(dihedral)
        i,j,k,l = torsion.indices
        RHS = torsion.RHS
        
        mol_copy.ase_molecule.set_dihedral(a1 = i,
                                a2 = j, 
                                a3 = k, 
                                a4 = l, 
                                angle= float(dihedral), 
                                indices=RHS)
    
    mol_copy.update_geometry_from_ase_mol()
    
    
    e = mol_copy.ase_molecule.get_potential_energy()
    
    population.append( [e] + dihedrals )
    
df = pd.DataFrame(population)
columns = ["Energy"]
for i in range(len(mol.torsion_list)):
    columns = columns +["Torsion " + str(i)] 
df.columns = columns
df

Unnamed: 0,Energy,Torsion 0,Torsion 1,Torsion 2
0,6.060527,120,90,60
1,6.234764,120,0,240
2,34.715638,330,60,330
3,6.121764,330,150,210
4,13.699830,330,60,60
5,6.955710,30,0,210
6,6.431664,120,330,150
7,6.040407,270,90,150
8,45.020408,60,0,270
9,6.059106,60,150,60


In [4]:
df = df.sort("Energy")
df

  if __name__ == '__main__':


Unnamed: 0,Energy,Torsion 0,Torsion 1,Torsion 2
60,5.991998,0,210,90
12,5.992723,330,270,210
93,5.993920,0,210,180
89,5.994573,330,240,270
58,5.998988,30,240,240
49,6.011627,30,180,180
38,6.012842,300,270,210
47,6.020397,330,300,300
62,6.029739,150,60,120
45,6.033420,180,270,150


## We now have a population of 20 molecules that have been sorted based on their energies.

### Now we need to set the following:

* Chance of cross-over
* Chance of mutation
* Percentage of the population that survives

`I honestly don't know which numbers to select, but here are random guesses`

In [5]:
crossover_probability = 0.8
mutation_probability = 0.2
top_selection_percent = 0.333333333

population_size = 20

In [6]:
top_population_size = int(top_selection_percent * float(population_size))
bottom_population_size = int((1-top_selection_percent) * float(population_size))

In [7]:
dff = df.iloc[:top_population_size,:]


In [8]:
population = []
for i in range(population_size):
    m,f = random.sample(np.arange(top_population_size), 2)
    
    print "The `male` is {0} and the `female` is {1}".format(m,f)
    
    
    
    mol_copy = deepcopy(mol)
    dihedrals = []
    
    for i, torsion in enumerate(mol_copy.torsions):
        
        print i
        
        chance = random.random()
        
        if chance < mutation_probability:
            dihedral = np.random.choice(possible_dihedrals)
            print "MUTATION. The dihedral is {}".format(dihedral)
            
        
        else:
            crossover = random.random()
            if 0.5 > crossover:
                dihedral = dff.iloc[m, i+1]
                print "The male torsion was chosen. The dihedral is {}".format(dihedral)
            else:
                dihedral = dff.iloc[f, i+1]
                print "The male torsion was chosen. The dihedral is {}".format(dihedral)
        
        
        print
        dihedrals.append(dihedral)
        i,j,k,l = torsion.indices
        RHS = torsion.RHS
        
        mol_copy.ase_molecule.set_dihedral(a1 = i,
                                a2 = j, 
                                a3 = k, 
                                a4 = l, 
                                angle= float(dihedral), 
                                indices=RHS)
    
    mol_copy.update_geometry_from_ase_mol()
    
    
    e = mol_copy.ase_molecule.get_potential_energy()
    
    population.append( [e] + dihedrals )
    
new_population = pd.DataFrame(population)
columns = ["Energy"]
for i in range(len(mol.torsion_list)):
    columns = columns +["Torsion " + str(i)] 
new_population.columns = columns
new_population.sort("Energy")

The `male` is 0 and the `female` is 1
0
MUTATION. The dihedral is 90

1
The male torsion was chosen. The dihedral is 210.0

2
MUTATION. The dihedral is 240

The `male` is 2 and the `female` is 3
0
The male torsion was chosen. The dihedral is 330.0

1
The male torsion was chosen. The dihedral is 240.0

2
MUTATION. The dihedral is 240

The `male` is 1 and the `female` is 3
0
The male torsion was chosen. The dihedral is 330.0

1
The male torsion was chosen. The dihedral is 270.0

2
MUTATION. The dihedral is 60

The `male` is 4 and the `female` is 5
0
The male torsion was chosen. The dihedral is 30.0

1
The male torsion was chosen. The dihedral is 240.0

2
The male torsion was chosen. The dihedral is 240.0

The `male` is 4 and the `female` is 3
0
The male torsion was chosen. The dihedral is 30.0

1
MUTATION. The dihedral is 270

2
The male torsion was chosen. The dihedral is 240.0

The `male` is 0 and the `female` is 5
0
The male torsion was chosen. The dihedral is 30.0

1
The male torsion



Unnamed: 0,Energy,Torsion 0,Torsion 1,Torsion 2
18,5.992723,330.0,270.0,210.0
17,5.997715,0.0,210.0,210.0
6,5.997715,0.0,210.0,210.0
16,5.998155,0.0,210.0,240.0
3,5.998988,30.0,240.0,240.0
1,6.003003,330.0,240.0,240.0
5,6.003101,30.0,210.0,210.0
4,6.004596,30.0,270.0,240.0
9,6.011627,30.0,180.0,180.0
11,6.025432,0.0,180.0,180.0


## Sweet, we now have set up a basic genetic algorithm for a molecule 

### Now to try this for multiple generations automatically

In [9]:
# First, making the first generation

possible_dihedrals = np.arange(0, 360, 30)


calc = EMT()
mol.ase_molecule.set_calculator(calc)

population_size = 20

population = []

for i in range(population_size):
    mol_copy = deepcopy(mol)
    dihedrals = []
    
    for torsion in mol_copy.torsions:
        dihedral = np.random.choice(possible_dihedrals)
        dihedrals.append(dihedral)
        i,j,k,l = torsion.indices
        RHS = torsion.RHS
        
        mol_copy.ase_molecule.set_dihedral(a1 = i,
                                a2 = j, 
                                a3 = k, 
                                a4 = l, 
                                angle= float(dihedral), 
                                indices=RHS)
    
    mol_copy.update_geometry_from_ase_mol()
    
    
    e = mol_copy.ase_molecule.get_potential_energy()
    
    population.append( [e] + dihedrals )
    
df = pd.DataFrame(population)
columns = ["Energy"]
for i in range(len(mol.torsion_list)):
    columns = columns +["Torsion " + str(i)] 
df.columns = columns
df = df.sort("Energy")
df



Unnamed: 0,Energy,Torsion 0,Torsion 1,Torsion 2
0,6.010811,60,210,90
16,6.033274,210,300,120
12,6.049675,330,180,150
19,6.055499,120,150,270
2,6.072411,240,270,300
14,6.101048,180,120,300
8,6.109269,210,330,90
1,6.109269,210,330,90
4,6.118018,330,150,270
18,6.118254,90,150,300


Setting the percentages and probabilities and selecting the top population

In [10]:
crossover_probability = 0.8
mutation_probability = 0.2
top_selection_percent = 0.333333333

top_population_size = int(top_selection_percent * float(population_size))
bottom_population_size = int((1-top_selection_percent) * float(population_size))

In [11]:
generations = 50
mol_copy = deepcopy(mol)

for generation in range(generations):
    print "This is the {}th generation".format(generation)
    top_population = df.iloc[:top_population_size,:]
    population = []
    for i in range(population_size):
        mf = random.sample(np.arange(top_population_size), 2)
        m = max(mf)
        f = min(mf)

        print "The `male` is {0} and the `female` is {1}".format(m,f)



        #mol_copy = deepcopy(mol)
        dihedrals = []

        for i, torsion in enumerate(mol_copy.torsions):

            print i
            
            chance = random.random()

            if chance < mutation_probability:
                dihedral = np.random.choice(possible_dihedrals)
                print "MUTATION. The dihedral is {}".format(dihedral)


            else:
                crossover = random.random()
                if 0.5 > crossover:
                    dihedral = dff.iloc[m, i+1]
                    print "The male torsion was chosen. The dihedral is {}".format(dihedral)
                else:
                    dihedral = dff.iloc[f, i+1]
                    print "The male torsion was chosen. The dihedral is {}".format(dihedral)


            print
            dihedrals.append(dihedral)
            i,j,k,l = torsion.indices
            RHS = torsion.RHS

            mol_copy.ase_molecule.set_dihedral(a1 = i,
                                a2 = j, 
                                a3 = k, 
                                a4 = l, 
                                angle= float(dihedral), 
                                indices=RHS)

        mol_copy.update_geometry_from_ase_mol()


        e = mol_copy.ase_molecule.get_potential_energy()

        population.append( [e] + dihedrals )

    df = pd.DataFrame(population)
    columns = ["Energy"]
    for i in range(len(mol.torsion_list)):
        columns = columns +["Torsion " + str(i)] 
    df.columns = columns
    """
    df = top_population.append(new_population)"""
    df = df.sort("Energy")
    #print df.iloc[0:5,:]
    print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~"

This is the 0th generation
The `male` is 5 and the `female` is 3
0
The male torsion was chosen. The dihedral is 330.0

1
The male torsion was chosen. The dihedral is 240.0

2
The male torsion was chosen. The dihedral is 270.0

The `male` is 5 and the `female` is 1
0
The male torsion was chosen. The dihedral is 330.0

1
MUTATION. The dihedral is 90

2
The male torsion was chosen. The dihedral is 210.0

The `male` is 2 and the `female` is 0
0
The male torsion was chosen. The dihedral is 0.0

1
The male torsion was chosen. The dihedral is 210.0

2
The male torsion was chosen. The dihedral is 180.0

The `male` is 5 and the `female` is 1
0
The male torsion was chosen. The dihedral is 330.0

1
The male torsion was chosen. The dihedral is 180.0

2
The male torsion was chosen. The dihedral is 180.0

The `male` is 3 and the `female` is 1
0
The male torsion was chosen. The dihedral is 330.0

1
The male torsion was chosen. The dihedral is 270.0

2
The male torsion was chosen. The dihedral is 270.





The `male` is 4 and the `female` is 1
0
The male torsion was chosen. The dihedral is 30.0

1
The male torsion was chosen. The dihedral is 240.0

2
The male torsion was chosen. The dihedral is 240.0

The `male` is 4 and the `female` is 2
0
The male torsion was chosen. The dihedral is 0.0

1
The male torsion was chosen. The dihedral is 210.0

2
MUTATION. The dihedral is 270

The `male` is 5 and the `female` is 3
0
MUTATION. The dihedral is 150

1
The male torsion was chosen. The dihedral is 180.0

2
The male torsion was chosen. The dihedral is 180.0

The `male` is 4 and the `female` is 0
0
The male torsion was chosen. The dihedral is 0.0

1
The male torsion was chosen. The dihedral is 240.0

2
The male torsion was chosen. The dihedral is 240.0

The `male` is 5 and the `female` is 4
0
MUTATION. The dihedral is 30

1
The male torsion was chosen. The dihedral is 240.0

2
The male torsion was chosen. The dihedral is 240.0

The `male` is 5 and the `female` is 4
0
The male torsion was chosen

In [12]:
df

Unnamed: 0,Energy,Torsion 0,Torsion 1,Torsion 2
15,5.981179,0.0,270.0,210.0
19,5.99334,330.0,270.0,240.0
2,5.99392,0.0,210.0,180.0
17,5.99392,0.0,210.0,180.0
8,6.000926,30.0,270.0,180.0
18,6.004424,30.0,270.0,210.0
5,6.006816,330.0,210.0,270.0
4,6.009775,330.0,210.0,180.0
14,6.03113,330.0,240.0,300.0
1,6.031936,0.0,180.0,240.0


## Okay, we have something. So now let's compare this to the brute force method

In [13]:
mol

<multi_molecule.Multi_Molecule instance at 0x104082908>

In [14]:

mol.ase_molecule.set_calculator(calc)

# Getting the torsion combos
torsion_list = mol.torsions
torsion_angles = np.arange(0, 360,30) ### You can change the degree step size
torsion_combos = list( itertools.combinations_with_replacement( torsion_angles, len(torsion_list)) )
if len(torsion_list) != 1:
    torsion_combos = list(
        set(
            torsion_combos + 
            list(itertools.combinations_with_replacement( 
                torsion_angles[::-1], len(torsion_list)
            ))))

    results = []
    
# Calculating the potential energy for each conformation
for combo in torsion_combos:
    geo = zip(torsion_list, combo)
    #print geo
    for torsion in geo:
        tor = torsion[0]
        #print tor
        angle = torsion[1]
        
        i,j,k,l = tor.indices
        RHS =  tor.RHS
        mol.ase_molecule.set_dihedral(a1 = i,
                                a2 = j, 
                                a3 = k, 
                                a4 = l, 
                                angle= float(angle), 
                                indices=RHS)
    mol.update_geometry_from_ase_mol()
    results.append([mol.ase_molecule.get_potential_energy()] + list(combo) )

# Creating a dataframe of the results
brute_force = pd.DataFrame(results)
columns = ["Energy"]
for i in range(len(torsion_list)):
    columns = columns + ["Torsion " + str(i)] 

brute_force.columns = columns

brute_force.sort("Energy")



Unnamed: 0,Energy,Torsion 0,Torsion 1,Torsion 2
371,5.978668,0,270,270
3,5.981034,330,270,120
595,5.985056,330,270,270
18,5.985639,0,240,270
75,5.987366,330,300,270
99,5.987848,330,300,150
239,5.987863,330,270,150
481,5.988702,330,270,180
517,5.989538,0,240,240
294,5.990343,330,300,180


Hmmm... we keep running into a local minimum... I need help with this