# A notebook to perform a genetic algorithm for conformer analysis. This is currently being implemented on just molecules.

In [1]:
import os
import sys
import logging
FORMAT = "%(filename)s:%(lineno)d %(funcName)s %(levelname)s %(message)s"
logging.basicConfig(format=FORMAT, level=logging.INFO)

import re
import imp
import itertools
import random
import numpy as np
from numpy import array
import pandas as pd
import matplotlib
matplotlib.use('Agg')
%matplotlib inline
from matplotlib import pyplot as plt
import seaborn as sns


# do this before we have a chance to import openbabel!
import rdkit, rdkit.Chem.rdDistGeom, rdkit.DistanceGeometry

from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit import rdBase

import py3Dmol

from rmgpy.molecule import Molecule
from rmgpy.species import Species
from rmgpy.reaction import Reaction


from multi_molecule import *
from multi_reaction import *

from ase.calculators.morse import * #chosing this calculator for now because it's fast
from ase.calculators.dftb import *
from ase.calculators.lj import *
from ase.calculators.emt import *

from copy import deepcopy

thermo.py:760 loadLibraries INFO Loading thermodynamics library from CBS_QB3_1dHR.py in /Users/nathan/Code/RMG-database/input/thermo/libraries...
thermo.py:760 loadLibraries INFO Loading thermodynamics library from KlippensteinH2O2.py in /Users/nathan/Code/RMG-database/input/thermo/libraries...
thermo.py:760 loadLibraries INFO Loading thermodynamics library from primaryThermoLibrary.py in /Users/nathan/Code/RMG-database/input/thermo/libraries...
thermo.py:760 loadLibraries INFO Loading thermodynamics library from thermo_DFT_CCSDTF12_BAC.py in /Users/nathan/Code/RMG-database/input/thermo/libraries...
thermo.py:774 loadGroups INFO Loading thermodynamics group database from /Users/nathan/Code/RMG-database/input/thermo/groups...
transport.py:294 loadGroups INFO Loading transport group database from /Users/nathan/Code/RMG-database/input/transport/groups...
database.py:165 loadFamilies INFO Loading the user-specified kinetics families from /Users/nathan/Code/RMG-database/input/kinetics/famil

In [2]:
mol= Multi_Molecule("COCCNC")
mol.view_mol()

In [3]:
possible_dihedrals = np.arange(0, 360+30, 30)


calc = EMT()
mol.ase_molecule.set_calculator(calc)

population_size = 20

population = []

for i in range(population_size):
    mol_copy = deepcopy(mol)
    dihedrals = []
    
    for torsion in mol_copy.torsions:
        dihedral = np.random.choice(possible_dihedrals)
        dihedrals.append(dihedral)
        i,j,k,l = torsion.indices
        RHS = torsion.RHS
        
        mol_copy.ase_molecule.set_dihedral(i,j,k,l, dihedral)
    
    mol_copy.update_geometry_from_ase_mol()
    
    
    e = mol_copy.ase_molecule.get_potential_energy()
    
    population.append( [e] + dihedrals )
    
df = pd.DataFrame(population)
columns = ["Energy"]
for i in range(len(mol.torsion_list)):
    columns = columns +["Torsion " + str(i)] 
df.columns = columns
df

Unnamed: 0,Energy,Torsion 0,Torsion 1,Torsion 2
0,12.085835,60,90,90
1,20.326789,360,30,120
2,119.367509,210,0,0
3,22.731774,0,120,300
4,8.486937,120,150,300
5,46.986866,360,0,240
6,90.934561,330,300,360
7,25.745748,150,90,330
8,95.881304,210,270,270
9,81.425879,210,180,60


In [4]:
df = df.sort("Energy")
df

  if __name__ == '__main__':


Unnamed: 0,Energy,Torsion 0,Torsion 1,Torsion 2
4,8.486937,120,150,300
0,12.085835,60,90,90
11,18.637752,270,150,270
1,20.326789,360,30,120
3,22.731774,0,120,300
15,23.048696,30,30,0
18,24.042188,60,60,330
7,25.745748,150,90,330
12,29.683038,0,300,300
17,30.349924,180,30,180


## We now have a population of 20 molecules that have been sorted based on their energies.

### Now we need to set the following:

* Chance of cross-over
* Chance of mutation
* Percentage of the population that survives

`I honestly don't know which numbers to select, but here are random guesses`

In [5]:
crossover_probability = 0.5
mutation_probability = 0.1
survival_percent = 0.5

population_size = 20

In [6]:
top_population_size = int(survival_percent * float(population_size))
bottom_population_size = int((1-survival_percent) * float(population_size))

In [7]:
dff = df.iloc[:top_population_size,:]


In [8]:
population = []
for i in range(bottom_population_size):
    m,f = random.sample(np.arange(top_population_size), 2)
    
    print "The `male` is {0} and the `female` is {1}".format(m,f)
    
    
    
    mol_copy = deepcopy(mol)
    dihedrals = []
    
    for i, torsion in enumerate(mol_copy.torsions):
        
        print i
        
        mutation = random.random()
        
        crossover = random.random()
        
        if mutation > mutation_probability:
            if crossover > crossover_probability:
                dihedral = dff.iloc[m, i+1]
                print "The male torsion was chosen. The dihedral is {}".format(dihedral)
            else:
                dihedral = dff.iloc[f, i+1]
                print "The male torsion was chosen. The dihedral is {}".format(dihedral)
        else:
            dihedral = np.random.choice(possible_dihedrals)
            print "MUTATION. The dihedral is {}".format(dihedral)
        
        print
        dihedrals.append(dihedral)
        i,j,k,l = torsion.indices
        RHS = torsion.RHS
        
        mol_copy.ase_molecule.set_dihedral(i,j,k,l, dihedral)
    
    mol_copy.update_geometry_from_ase_mol()
    
    
    e = mol_copy.ase_molecule.get_potential_energy()
    
    population.append( [e] + dihedrals )
    
dfff = pd.DataFrame(population)
columns = ["Energy"]
for i in range(len(mol.torsion_list)):
    columns = columns +["Torsion " + str(i)] 
dfff.columns = columns
dfff

The `male` is 2 and the `female` is 9
0
The male torsion was chosen. The dihedral is 180.0

1
The male torsion was chosen. The dihedral is 30.0

2
The male torsion was chosen. The dihedral is 270.0

The `male` is 9 and the `female` is 2
0
The male torsion was chosen. The dihedral is 180.0

1
The male torsion was chosen. The dihedral is 150.0

2
The male torsion was chosen. The dihedral is 270.0

The `male` is 0 and the `female` is 4
0
The male torsion was chosen. The dihedral is 120.0

1
The male torsion was chosen. The dihedral is 120.0

2
The male torsion was chosen. The dihedral is 300.0

The `male` is 6 and the `female` is 7
0
MUTATION. The dihedral is 0

1
The male torsion was chosen. The dihedral is 60.0

2
The male torsion was chosen. The dihedral is 330.0

The `male` is 4 and the `female` is 5
0
The male torsion was chosen. The dihedral is 30.0

1
The male torsion was chosen. The dihedral is 120.0

2
The male torsion was chosen. The dihedral is 300.0

The `male` is 1 and the `f

Unnamed: 0,Energy,Torsion 0,Torsion 1,Torsion 2
0,33.459979,180.0,30.0,270.0
1,28.507596,180.0,150.0,270.0
2,7.978008,120.0,120.0,300.0
3,50.271721,0.0,60.0,330.0
4,17.211048,30.0,120.0,300.0
5,24.636449,360.0,30.0,90.0
6,22.587018,270.0,240.0,120.0
7,50.271721,360.0,60.0,330.0
8,50.200671,0.0,90.0,330.0
9,26.920011,300.0,90.0,180.0


In [9]:
new_population = dff.append(dfff)
new_population.sort("Energy")

  from ipykernel import kernelapp as app


Unnamed: 0,Energy,Torsion 0,Torsion 1,Torsion 2
2,7.978008,120.0,120.0,300.0
4,8.486937,120.0,150.0,300.0
0,12.085835,60.0,90.0,90.0
4,17.211048,30.0,120.0,300.0
11,18.637752,270.0,150.0,270.0
1,20.326789,360.0,30.0,120.0
6,22.587018,270.0,240.0,120.0
3,22.731774,0.0,120.0,300.0
15,23.048696,30.0,30.0,0.0
18,24.042188,60.0,60.0,330.0


## Sweet, we now have set up a basic genetic algorithm for a molecule 

### Now to try this for multiple generations automatically

In [10]:
# First, making the first generation

possible_dihedrals = np.arange(0, 360, 30)


calc = EMT()
mol.ase_molecule.set_calculator(calc)

population_size = 50

population = []

for i in range(population_size):
    mol_copy = deepcopy(mol)
    dihedrals = []
    
    for torsion in mol_copy.torsions:
        dihedral = np.random.choice(possible_dihedrals)
        dihedrals.append(dihedral)
        i,j,k,l = torsion.indices
        RHS = torsion.RHS
        
        mol_copy.ase_molecule.set_dihedral(i,j,k,l, dihedral)
    
    mol_copy.update_geometry_from_ase_mol()
    
    
    e = mol_copy.ase_molecule.get_potential_energy()
    
    population.append( [e] + dihedrals )
    
df = pd.DataFrame(population)
columns = ["Energy"]
for i in range(len(mol.torsion_list)):
    columns = columns +["Torsion " + str(i)] 
df.columns = columns
df = df.sort("Energy")
df



Unnamed: 0,Energy,Torsion 0,Torsion 1,Torsion 2
6,11.640717,90,120,240
31,12.109456,120,210,180
42,13.139068,90,240,150
37,14.127891,60,150,180
22,16.081751,90,30,90
38,17.307473,30,180,300
20,17.739538,270,120,150
3,18.439573,120,60,30
32,19.820995,270,90,270
30,20.709429,30,210,210


Setting the percentages and probabilities and selecting the top population

In [16]:
crossover_probability = 0.5
mutation_probability = 0.25
survival_percent = 0.25
population_size = 100

top_population_size = int(survival_percent * float(population_size))
bottom_population_size = int((1-survival_percent) * float(population_size))


In [17]:
generations = 100
mol_copy = deepcopy(mol)

for generation in range(generations):
    print "This is the {}th generation".format(generation)
    top_population = df.iloc[:top_population_size,:]
    population = []
    for i in range(population_size):
        mf = random.sample(np.arange(top_population_size), 2)
        m = max(mf)
        f = min(mf)

        print "The `male` is {0} and the `female` is {1}".format(m,f)



        #mol_copy = deepcopy(mol)
        dihedrals = []

        for i, torsion in enumerate(mol_copy.torsions):

            print i

            mutation = random.random()

            crossover = random.random()

            if mutation > mutation_probability:
                if crossover > crossover_probability:
                    dihedral = top_population.iloc[m, i+1]
                    print "The male torsion was chosen. The dihedral is {}".format(dihedral)
                else:
                    dihedral = top_population.iloc[f, i+1]
                    print "The male torsion was chosen. The dihedral is {}".format(dihedral)
            else:
                dihedral = np.random.choice(possible_dihedrals)
                print "MUTATION. The dihedral is {}".format(dihedral)

            print
            dihedrals.append(dihedral)
            i,j,k,l = torsion.indices
            RHS = torsion.RHS

            mol_copy.ase_molecule.set_dihedral(i,j,k,l, dihedral)

        mol_copy.update_geometry_from_ase_mol()


        e = mol_copy.ase_molecule.get_potential_energy()

        population.append( [e] + dihedrals )

    df = pd.DataFrame(population)
    columns = ["Energy"]
    for i in range(len(mol.torsion_list)):
        columns = columns +["Torsion " + str(i)] 
    df.columns = columns
    """
    df = top_population.append(new_population)"""
    df = df.sort("Energy")
    print df.iloc[0:5,:]
    print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~"

This is the 0th generation
The `male` is 16 and the `female` is 5
0
The male torsion was chosen. The dihedral is 90.0

1
MUTATION. The dihedral is 210

2
MUTATION. The dihedral is 270

The `male` is 21 and the `female` is 1
0
MUTATION. The dihedral is 60

1
The male torsion was chosen. The dihedral is 30.0

2
MUTATION. The dihedral is 240

The `male` is 13 and the `female` is 10
0
MUTATION. The dihedral is 90

1
The male torsion was chosen. The dihedral is 240.0

2
The male torsion was chosen. The dihedral is 60.0

The `male` is 10 and the `female` is 1
0
The male torsion was chosen. The dihedral is 150.0

1
The male torsion was chosen. The dihedral is 90.0

2
The male torsion was chosen. The dihedral is 60.0

The `male` is 16 and the `female` is 7
0
The male torsion was chosen. The dihedral is 120.0

1
MUTATION. The dihedral is 210

2
MUTATION. The dihedral is 0

The `male` is 14 and the `female` is 12
0
MUTATION. The dihedral is 300

1
The male torsion was chosen. The dihedral is 210



The `male` is 10 and the `female` is 3
0
The male torsion was chosen. The dihedral is 150.0

1
MUTATION. The dihedral is 90

2
The male torsion was chosen. The dihedral is 30.0

The `male` is 23 and the `female` is 8
0
The male torsion was chosen. The dihedral is 30.0

1
The male torsion was chosen. The dihedral is 330.0

2
The male torsion was chosen. The dihedral is 60.0

The `male` is 5 and the `female` is 4
0
MUTATION. The dihedral is 120

1
MUTATION. The dihedral is 210

2
MUTATION. The dihedral is 270

The `male` is 24 and the `female` is 22
0
The male torsion was chosen. The dihedral is 60.0

1
The male torsion was chosen. The dihedral is 180.0

2
The male torsion was chosen. The dihedral is 120.0

The `male` is 14 and the `female` is 9
0
The male torsion was chosen. The dihedral is 150.0

1
The male torsion was chosen. The dihedral is 180.0

2
MUTATION. The dihedral is 90

The `male` is 12 and the `female` is 5
0
The male torsion was chosen. The dihedral is 150.0

1
The male to

In [18]:
df

Unnamed: 0,Energy,Torsion 0,Torsion 1,Torsion 2
32,8.636404,180.0,210.0,300.0
33,8.636404,180.0,210.0,300.0
34,8.636404,180.0,210.0,300.0
75,8.655020,180.0,210.0,300.0
73,8.655020,180.0,210.0,300.0
70,8.655020,180.0,210.0,300.0
67,8.655020,180.0,210.0,300.0
65,8.655020,180.0,210.0,300.0
64,8.655020,180.0,210.0,300.0
27,8.752908,180.0,210.0,300.0


## Okay, we have something. So now let's compare this to the brute force method

In [14]:
mol

<multi_molecule.Multi_Molecule instance at 0x1043be710>

In [15]:

mol.ase_molecule.set_calculator(calc)

# Getting the torsion combos
torsion_list = mol.torsions
torsion_angles = np.arange(0, 360,30) ### You can change the degree step size
torsion_combos = list( itertools.combinations_with_replacement( torsion_angles, len(torsion_list)) )
if len(torsion_list) != 1:
    torsion_combos = list(
        set(
            torsion_combos + 
            list(itertools.combinations_with_replacement( 
                torsion_angles[::-1], len(torsion_list)
            ))))

    results = []
    
# Calculating the potential energy for each conformation
for combo in torsion_combos:
    geo = zip(torsion_list, combo)
    #print geo
    for torsion in geo:
        tor = torsion[0]
        #print tor
        angle = torsion[1]
        
        i,j,k,l = tor.indices
        RHS =  tor.RHS
        mol.ase_molecule.set_dihedral(a1 = i,
                                a2 = j, 
                                a3 = k, 
                                a4 = l, 
                                angle= float(angle), 
                                indices=RHS)
    mol.update_geometry_from_ase_mol()
    results.append([mol.ase_molecule.get_potential_energy()] + list(combo) )

# Creating a dataframe of the results
brute_force = pd.DataFrame(results)
columns = ["Energy"]
for i in range(len(torsion_list)):
    columns = columns + ["Torsion " + str(i)] 

brute_force.columns = columns

brute_force.sort("Energy")



Unnamed: 0,Energy,Torsion 0,Torsion 1,Torsion 2
371,5.978668,0,270,270
3,5.981034,330,270,120
595,5.985056,330,270,270
18,5.985639,0,240,270
75,5.987366,330,300,270
99,5.987848,330,300,150
239,5.987863,330,270,150
481,5.988702,330,270,180
517,5.989538,0,240,240
294,5.990343,330,300,180


Hmmm... we keep running into a local minimum... I need help with this