In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

In [1]:
#!/usr/bin/python
# -*- coding: utf-8 -*-

################################################################################
#
#   AutoTST - Automated Transition State Theory
#
#   Copyright (c) 2015-2018 Prof. Richard H. West (r.west@northeastern.edu)
#
#   Permission is hereby granted, free of charge, to any person obtaining a
#   copy of this software and associated documentation files (the 'Software'),
#   to deal in the Software without restriction, including without limitation
#   the rights to use, copy, modify, merge, publish, distribute, sublicense,
#   and/or sell copies of the Software, and to permit persons to whom the
#   Software is furnished to do so, subject to the following conditions:
#
#   The above copyright notice and this permission notice shall be included in
#   all copies or substantial portions of the Software.
#
#   THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
#   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
#   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
#   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
#   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
#   FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
#   DEALINGS IN THE SOFTWARE.
#
################################################################################

import os
import sys
import logging
FORMAT = "%(filename)s:%(lineno)d %(funcName)s %(levelname)s %(message)s"
logging.basicConfig(format=FORMAT, level=logging.INFO)

import itertools
import random
import numpy as np
from numpy import array
import pandas as pd

import autotst
from autotst.geometry import Bond, Angle, Torsion, CisTrans
from autotst.molecule import AutoTST_Molecule
from autotst.reaction import AutoTST_Reaction, AutoTST_TS
from autotst.conformer.utilities import update_from_ase, create_initial_population, \
    select_top_population, get_unique_conformers



In [None]:

def get_energy(autotst_object):

    if isinstance(autotst_object, autotst.molecule.AutoTST_Molecule):
        ase_object = autotst_object.ase_molecule

    if isinstance(autotst_object, autotst.reaction.AutoTST_Reaction):
        ase_object = autotst_object.ts.ase_ts

    if isinstance(autotst_object, autotst.reaction.AutoTST_TS):
        ase_object = autotst_object.ase_ts

    constrained_energy = ase_object.get_potential_energy()

    return constrained_energy

In [None]:
def optimize_mol(autotst_object):
    
    if isinstance(autotst_object, autotst.molecule.AutoTST_Molecule):
        ase_object = autotst_object.ase_molecule
        labels = []

    if isinstance(autotst_object, autotst.reaction.AutoTST_Reaction):
        ase_object = autotst_object.ts.ase_ts

        labels = []
        for atom in autotst_object.ts.rmg_ts.getLabeledAtoms().values():
            labels.append(atom.sortingLabel)

    if isinstance(autotst_object, autotst.reaction.AutoTST_TS):
        ase_object = autotst_object.ase_ts

        labels = []
        for atom in autotst_object.ts.rmg_ts.getLabeledAtoms().values():
            labels.append(atom.sortingLabel)
    ase_copy = ase_object.copy()
    ase_copy.set_calculator(ase_object.get_calculator())

    ase_copy.set_constraint(FixBondLengths(
        list(itertools.combinations(labels, 2))))

    opt = BFGS(ase_copy)
    complete = opt.run(fmax=0.01)
    

    relaxed_energy = ase_copy.get_potential_energy()

In [None]:
from hotbit.aseinterface import Hotbit

In [None]:
mol = AutoTST_Molecule("CCCNC")

Removing the non_terminal dihedrals

In [None]:
tor = mol.torsions[2]

In [None]:
i,j,k,l = tor.indices

In [None]:
len(np.array(tor.right_mask)[np.array(tor.right_mask) == True])

In [None]:
len(np.array(tor.right_mask)[np.array(tor.right_mask) == False])

In [None]:
moll = AutoTST_Molecule("[C]CC")

In [None]:
moll.view_mol()

In [None]:
mol.view_mol()

# Add this to utlilties

In [None]:

def find_terminal_torsions(mol):
    terminal_torsions = []
    non_terminal_torsions = []
    for torsion in mol.torsions:

        i,j,k,l = torsion.indices

        if isinstance(mol, autotst.molecule.AutoTST_Molecule):
            rmg_mol = mol.rmg_molecule

        if isinstance(mol, autotst.reaction.AutoTST_Reaction):
            rmg_mol = mol.ts.rmg_ts

        if isinstance(mol, autotst.reaction.AutoTST_TS):
            rmg_mol = mol.rmg_ts
            
        assert rmg_mol

        atom_j = rmg_mol.atoms[j]
        atom_k = rmg_mol.atoms[k]
        
        terminal = False

        if (atom_j.isCarbon()) and (len(atom_j.bonds) == 4):
            num_hydrogens = 0
            for atom_other in atom_j.bonds.keys():
                if atom_other.isHydrogen():
                    num_hydrogens += 1

            if num_hydrogens == 3:
                terminal = True

        if (atom_k.isCarbon()) and (len(atom_k.bonds) == 4):
            num_hydrogens = 0
            for atom_other in atom_k.bonds.keys():
                if atom_other.isHydrogen():
                    num_hydrogens += 1

            if num_hydrogens == 3:
                terminal = True
                      
        if terminal:
            terminal_torsions.append(torsion)
        else:
            non_terminal_torsions.append(torsion)
            
    return terminal_torsions, non_terminal_torsions





In [None]:
find_terminal_torsions(mol)

# this is here to rip random pieces of it out

In [11]:
def perform_ga(autotst_object,
               initial_pop=None,
               top_percent=0.3,
               tolerance=0.0001,
               max_generations=500,
               store_generations=False,
               store_directory=".",
               mutation_probability=0.2,
               delta=30):
    """
    Performs a genetic algorithm to determine the lowest energy conformer of a TS or molecule. 

    :param autotst_object: am autotst_ts, autotst_rxn, or autotst_molecule that you want to perform conformer analysis on
       * the ase_object of the autotst_object must have a calculator attached to it.
    :param initial_pop: a DataFrame containing the initial population
    :param top_percent: float of the top percentage of conformers you want to select
    :param tolerance: float of one of the possible cut off points for the analysis
    :param max_generations: int of one of the possible cut off points for the analysis
    :param store_generations: do you want to store pickle files of each generation
    :param store_directory: the director where you want the pickle files stored
    :param mutation_probability: float of the chance of mutation
    :param delta: the degree change in dihedral angle between each possible dihedral angle

    :return results: a DataFrame containing the final generation
    :return unique_conformers: a dictionary with indicies of unique torsion combinations and entries of energy of those torsions
    """
    assert autotst_object, "No AutoTST object provided..."
    if initial_pop is None:
        logging.info(
            "No initial population provided, creating one using base parameters...")
        initial_pop = create_initial_population(autotst_object)

    possible_dihedrals = np.arange(0, 360, delta)
    top = select_top_population(initial_pop,
                                top_percent=top_percent
                                )

    population_size = initial_pop.shape[0]

    results = initial_pop

    if isinstance(autotst_object, autotst.molecule.AutoTST_Molecule):
        logging.info("The object given is a `AutoTST_Molecule` object")
        torsions = autotst_object.torsions
        ase_object = autotst_object.ase_molecule
        label = autotst_object.smiles

    if isinstance(autotst_object, autotst.reaction.AutoTST_Reaction):
        logging.info("The object given is a `AutoTST_Reaction` object")
        torsions = autotst_object.ts.torsions
        ase_object = autotst_object.ts.ase_ts
        label = autotst_object.label

    if isinstance(autotst_object, autotst.reaction.AutoTST_TS):
        logging.info("The object given is a `AutoTST_TS` object")
        torsions = autotst_object.torsions
        ase_object = autotst_object.ase_ts
        label = autotst_object.label

    assert ase_object.get_calculator(
    ), "To use GA, you must attach an ASE calculator to the `ase_molecule`."
    gen_number = 0
    complete = False
    unique_conformers = {}
    
    terminal_torsions, non_terminal_torsions = find_terminal_torsions(autotst_object)
    
    
    while complete == False:
        gen_number += 1
        logging.info("Performing GA on generation {}".format(gen_number))

        r = []
        relaxations = {}
        for individual in range(population_size):
            parent_0, parent_1 = random.sample(top.index, 2)
            dihedrals = []
            for index, torsion in enumerate(non_terminal_torsions):

                if random.random() < mutation_probability:
                    dihedral = np.random.choice(possible_dihedrals)
                else:
                    if 0.5 > random.random():
                        dihedral = results["torsion_" +
                                           str(index)].loc[parent_0]
                    else:
                        dihedral = results["torsion_" +
                                           str(index)].loc[parent_1]

                i, j, k, l = torsion.indices
                right_mask = torsion.right_mask

                dihedrals.append(dihedral)
                ase_object.set_dihedral(a1=i,
                                        a2=j,
                                        a3=k,
                                        a4=l,
                                        angle=float(dihedral),
                                        mask=right_mask)

            # Updating the molecule
            update_from_ase(autotst_object)
            
            constrained_energy = get_energy(autotst_object)

            for torsion in non_terminal_torsions:

                i, j, k, l = torsion.indices

                angle = ase_copy.get_dihedral(i, j, k, l)

                relaxed_torsions.append(angle)

            r.append([constrained_energy, dihedrals])

        results = pd.DataFrame(r)
        logging.info(
            "Creating the DataFrame of results for the {}th generation".format(gen_number))

        columns = ["constrained_energy", "relaxed_energy"]
        for i in range(len(torsions)):
            columns = columns + ["torsion_" + str(i)]

        results.columns = columns
        results = results.sort_values("constrained_energy")

        unique_conformers = get_unique_conformers(results, unique_conformers)

        if store_generations == True:
            # This portion stores each generation if desired
            logging.info("Saving the results DataFrame")

            generation_name = "{0}_ga_generation_{1}.csv".format(
                label, gen_number)
            f = os.path.join(store_directory, generation_name)
            results.to_csv(f)

        top = select_top_population(results, top_percent)
                      
        stats = top.describe()

        if gen_number >= max_generations:
            complete = True
            logging.info("Max generations reached. GA complete.")
        if abs((stats["constrained_energy"]["max"] - stats["constrained_energy"]["min"]) / stats["constrained_energy"]["min"]) < tolerance:
            complete = True
            logging.info("Cutoff criteria reached. GA complete.")

    return results, unique_conformers


# Add to utilities

In [None]:
# Removed optimization of molecules

def create_initial_population(autotst_object, delta=30, population_size=30):
    """
    A function designed to take a multi_molecule, multi_rxn or multi_ts object
    and create an initial population of conformers.

    :param:
     multi_object: a multi_molecule, multi_ts, or multi_rxn object
     calc: an ASE calculator. If none is chosen, an EMT() calculator will be used
     delta: the step size between possible dihedral angles in degrees.
     population_size: the number of individuals to be used for the population

    :return:
     df: a DataFrame of the results sorted by the lowest energy conformers
    """
    logging.info("Creating initial population of {} individuals from random guesses".format(
        population_size))

    df = None

    possible_dihedrals = np.arange(0, 360, delta)
    population = []
    if isinstance(autotst_object, autotst.molecule.AutoTST_Molecule):
        logging.info("The object given is a `AutoTST_Molecule` object")

        torsions = autotst_object.torsions
        ase_object = autotst_object.ase_molecule

    if isinstance(autotst_object, autotst.reaction.AutoTST_Reaction):
        logging.info("The object given is a `AutoTST_Reaction` object")
        torsions = autotst_object.ts.torsions
        ase_object = autotst_object.ts.ase_ts

    if isinstance(autotst_object, autotst.reaction.AutoTST_TS):
        logging.info("The object given is a `AutoTST_TS` object")
        torsions = autotst_object.torsions
        ase_object = autotst_object.ase_ts
        
    terminal_torsions, non_terminal_torsions = find_terminal_torsions(autotst_object)

    for indivudual in range(population_size):
        dihedrals = []
        for torsion in non_terminal_torsions:
            dihedral = np.random.choice(possible_dihedrals)
            dihedrals.append(dihedral)
            i, j, k, l = torsion.indices
            right_mask = torsion.right_mask

            ase_object.set_dihedral(
                a1=i,
                a2=j,
                a3=k,
                a4=l,
                angle=float(dihedral),
                mask=right_mask
            )

        constrained_energy = get_energy(autotst_object)

        population.append(
            [constrained_energy] + dihedrals)

    if len(population) > 0:
        logging.info("Creating a dataframe of the initial population")
        df = pd.DataFrame(population)
        columns = ["constrained_energy"]
        for i in range(len(non_terminal_torsions)):
            columns = columns + ["torsion_" + str(i)]

        df.columns = columns
        df = df.sort_values("energy")

    return df

In [None]:
mol.ase_molecule.set_calculator(Hotbit())
init_pop = create_initial_population(mol)
init_pop

In [None]:
top = select_top_population(init_pop, 0.3)

In [None]:
top

# Add this to utilities

In [None]:
from ase.constraints import FixBondLengths
from ase.optimize import BFGS

def partial_optimize_mol(autotst_object):
    
    
    if isinstance(autotst_object, autotst.molecule.AutoTST_Molecule):
        ase_object = autotst_object.ase_molecule
        labels = []

    if isinstance(autotst_object, autotst.reaction.AutoTST_Reaction):
        ase_object = autotst_object.ts.ase_ts

        labels = []
        for atom in autotst_object.ts.rmg_ts.getLabeledAtoms().values():
            labels.append(atom.sortingLabel)

    if isinstance(autotst_object, autotst.reaction.AutoTST_TS):
        ase_object = autotst_object.ase_ts

        labels = []
        for atom in autotst_object.ts.rmg_ts.getLabeledAtoms().values():
            labels.append(atom.sortingLabel)
            
    ase_copy = ase_object.copy()
    ase_copy.set_calculator(ase_object.get_calculator())

    ase_copy.set_constraint(FixBondLengths(
        list(itertools.combinations(labels, 2))))

    opt = BFGS(ase_copy)
    complete = opt.run(fmax=0.01, steps=5)
    
    relaxed_energy = ase_copy.get_potential_energy()
    return relaxed_energy, ase_copy

# Add this to ga.py and simple_es.py

In [None]:
r = []
_, non_terminal_torsions = find_terminal_torsions(mol)
autotst_mol = mol
mutation_probability = 0.2
ase_object = mol.ase_molecule
autotst_object = mol
results = top
delta = 30
possible_dihedrals = np.arange(0, 360, delta)

########## Specifically this chunk
relaxed_top = []
for combo in top.iloc[:,1:].values:
    for index, torsion in enumerate(non_terminal_torsions):
        i, j, k, l = torsion.indices
        right_mask = torsion.right_mask
        
        dihedral = combo[index]
        
        
        ase_object.set_dihedral(a1=i,
                            a2=j,
                            a3=k,
                            a4=l,
                            angle=float(dihedral),
                            mask=right_mask)
    update_from_ase(autotst_object)
        
    relaxed_e, relaxed_object = partial_optimize_mol(autotst_object)
    
    new_dihedrals = []
    
    for index, torsion in enumerate(non_terminal_torsions):
        i, j, k, l = torsion.indices
        right_mask = torsion.right_mask
        
        d = ase_object.get_dihedral(a1=i,
                            a2=j,
                            a3=k,
                            a4=l)
        
        new_dihedrals.append(d)
        
    relaxed_top.append([relaxed_e]+ new_dihedrals)

columns = top.columns
top = pd.DataFrame(relaxed_top, columns=columns)

        
##########
    

new_pop = []
for individual in range(20):
    parent_0, parent_1 = random.sample(top.index, 2)
    dihedrals = []
    for index, torsion in enumerate(non_terminal_torsions):

        if random.random() < mutation_probability:
            dihedral = np.random.choice(possible_dihedrals)
        else:
            if 0.5 > random.random():
                dihedral = top["torsion_" +
                                   str(index)].loc[parent_0]
            else:
                dihedral = top["torsion_" +
                                   str(index)].loc[parent_1]

        i, j, k, l = torsion.indices
        right_mask = torsion.right_mask

        dihedrals.append(dihedral)
        ase_object.set_dihedral(a1=i,
                                a2=j,
                                a3=k,
                                a4=l,
                                angle=float(dihedral),
                                mask=right_mask)

    # Updating the molecule
    update_from_ase(autotst_object)

    constrained_energy = get_energy(autotst_object)
    
    new_pop.append([constrained_energy] + dihedrals)
    
population = pd.DataFrame(new_pop, columns=columns)

    
top = select_top_population(population)
best = top.iloc[0,1:]
worst= top.iloc[-1,1:]

rms = ((best - worst)**2).mean()
rms

In [None]:
population.sort_values("constrained_energy")

In [None]:
top

In [None]:
asemol = mol.ase_molecule
asemol

In [None]:
asemol.get_dihedral(0,1,2,3)

In [None]:
top = select_top_population(init_pop)
best = top.iloc[0,1:]
worst= top.iloc[-1,1:]

rms = ((best - worst)**2).mean()
rms

In [None]:


rms = ((best - worst)**2).mean()
rms

In [None]:
range(1, len(top))

In [None]:
random.sample(top.index, 2)

In [None]:
top

In [83]:
def perform_ga(autotst_object,
               initial_pop=None,
               top_percent=0.3,
               min_rms=10,
               max_generations=500,
               store_generations=False,
               store_directory=".",
               mutation_probability=0.2,
               delta=30):
    """
    Performs a genetic algorithm to determine the lowest energy conformer of a TS or molecule. 

    :param autotst_object: am autotst_ts, autotst_rxn, or autotst_molecule that you want to perform conformer analysis on
       * the ase_object of the autotst_object must have a calculator attached to it.
    :param initial_pop: a DataFrame containing the initial population
    :param top_percent: float of the top percentage of conformers you want to select
    :param min_rms: float of one of the possible cut off points for the analysis
    :param max_generations: int of one of the possible cut off points for the analysis
    :param store_generations: do you want to store pickle files of each generation
    :param store_directory: the director where you want the pickle files stored
    :param mutation_probability: float of the chance of mutation
    :param delta: the degree change in dihedral angle between each possible dihedral angle

    :return results: a DataFrame containing the final generation
    :return unique_conformers: a dictionary with indicies of unique torsion combinations and entries of energy of those torsions
    """
    assert autotst_object, "No AutoTST object provided..."
    if initial_pop is None:
        logging.info(
            "No initial population provided, creating one using base parameters...")
        initial_pop = create_initial_population(autotst_object)

    possible_dihedrals = np.arange(0, 360, delta)
    top = select_top_population(initial_pop,
                                top_percent=top_percent
                                )

    population_size = initial_pop.shape[0]

    results = initial_pop

    if isinstance(autotst_object, autotst.molecule.AutoTST_Molecule):
        logging.info("The object given is a `AutoTST_Molecule` object")
        torsions = autotst_object.torsions
        ase_object = autotst_object.ase_molecule
        label = autotst_object.smiles

    if isinstance(autotst_object, autotst.reaction.AutoTST_Reaction):
        logging.info("The object given is a `AutoTST_Reaction` object")
        torsions = autotst_object.ts.torsions
        ase_object = autotst_object.ts.ase_ts
        label = autotst_object.label

    if isinstance(autotst_object, autotst.reaction.AutoTST_TS):
        logging.info("The object given is a `AutoTST_TS` object")
        torsions = autotst_object.torsions
        ase_object = autotst_object.ase_ts
        label = autotst_object.label

    assert ase_object.get_calculator(
    ), "To use GA, you must attach an ASE calculator to the `ase_molecule`."
    gen_number = 0
    complete = False
    unique_conformers = {}
    
    terminal_torsions, non_terminal_torsions = find_terminal_torsions(autotst_object)
    
    
    while complete == False:
        
        
        relaxed_top = []
        for combo in top.iloc[:,1:].values:
            for index, torsion in enumerate(non_terminal_torsions):
                i, j, k, l = torsion.indices
                right_mask = torsion.right_mask

                dihedral = combo[index]


                ase_object.set_dihedral(a1=i,
                                    a2=j,
                                    a3=k,
                                    a4=l,
                                    angle=float(dihedral),
                                    mask=right_mask)
            update_from_ase(autotst_object)

            relaxed_e, relaxed_object = partial_optimize_mol(autotst_object)

            new_dihedrals = []

            for torsion in non_terminal_torsions:
                i, j, k, l = torsion.indices
                right_mask = torsion.right_mask

                d = relaxed_object.get_dihedral(a1=i,
                                    a2=j,
                                    a3=k,
                                    a4=l)

                new_dihedrals.append(d)

            relaxed_top.append([relaxed_e]+ new_dihedrals)

        columns = top.columns
        top = pd.DataFrame(relaxed_top, columns=columns)
        
        
        gen_number += 1
        logging.info("Performing GA on generation {}".format(gen_number))

        r = []
        for individual in range(population_size):
            parent_0, parent_1 = random.sample(top.index, 2)
            dihedrals = []
            for index, torsion in enumerate(non_terminal_torsions):

                if random.random() < mutation_probability:
                    dihedral = np.random.choice(possible_dihedrals)
                else:
                    if 0.5 > random.random():
                        dihedral = top["torsion_" +
                                           str(index)].loc[parent_0]
                    else:
                        dihedral = top["torsion_" +
                                           str(index)].loc[parent_1]

                i, j, k, l = torsion.indices
                right_mask = torsion.right_mask

                dihedrals.append(dihedral)
                ase_object.set_dihedral(a1=i,
                                        a2=j,
                                        a3=k,
                                        a4=l,
                                        angle=float(dihedral),
                                        mask=right_mask)

            # Updating the molecule
            update_from_ase(autotst_object)
            
            energy = get_energy(autotst_object)


            r.append([energy] + dihedrals)

        results = pd.DataFrame(r)
        logging.info(
            "Creating the DataFrame of results for the {}th generation".format(gen_number))


        results.columns = top.columns
        results = results.sort_values("energy")

        unique_conformers = get_unique_conformers(results, unique_conformers, min_rms)

        if store_generations == True:
            # This portion stores each generation if desired
            logging.info("Saving the results DataFrame")

            generation_name = "{0}_ga_generation_{1}.csv".format(
                label, gen_number)
            f = os.path.join(store_directory, generation_name)
            results.to_csv(f)

        top = select_top_population(results, top_percent)
                      
        best = top.iloc[0,1:]
        worst= top.iloc[-1,1:]

        rms = ((best - worst)**2).mean()

        if gen_number >= max_generations:
            complete = True
            logging.info("Max generations reached. GA complete.")
        if rms < min_rms:
            complete = True
            logging.info("Cutoff criteria reached. GA complete.")

    return results, unique_conformers


In [96]:
mol = AutoTST_Molecule("OO")
from hotbit.aseinterface import Hotbit
from autotst.conformer.utilities import find_terminal_torsions, partial_optimize_mol, get_energy
mol.ase_molecule.set_calculator(Hotbit())
data, confs = perform_ga(mol, min_rms=10)
data

Solved 940 times; Iterations: avg 2.1, max 0, min 3

Timing:
            label                    time     calls    %sub  %covered   %tot
-------------------------------------------------------------------------------
Hotbit                              39.322         1 (100.0 %, 35.1 %) 100.0 % |000000000000000000000000000000|
../initialization                    0.201         1 (  0.5 %        )   0.5 % ||
../geometry                          1.338       940 (  3.4 %, 24.1 %)   3.4 % |1|
../../operations                     0.034       940 (  2.5 %        )   0.1 % ||
../../displacements                  0.289       940 ( 21.6 %        )   0.7 % ||
../solve                            11.011       940 ( 28.0 %, 77.8 %)  28.0 % |11111111|
../../matrix construction            2.193       940 ( 19.9 %        )   5.6 % |22|
../../gamma matrix                   0.953       940 (  8.7 %        )   2.4 % |2|
../../h1                             1.823      2933 ( 16.6 %        )   4.6 % |2|
.

<ipython-input-83-3db3bd62b346>:30 perform_ga INFO No initial population provided, creating one using base parameters...
utilities.py:86 create_initial_population INFO Creating initial population of 30 individuals from random guesses
utilities.py:93 create_initial_population INFO The object given is a `AutoTST_Molecule` object








 _           _    _     _ _
| |__   ___ | |_ | |__ |_| |_
|  _ \ / _ \|  _||  _ \| |  _|
| | | | ( ) | |_ | ( ) | | |_
|_| |_|\___/ \__|\____/|_|\__|  ver. 0.1 (svn=Unversioned directory)
Distributed under GNU GPL; see /Users/nathan/Code/hotbit/lib/python/LICENSE
Date: Tue Jul 31 13:25:04 2018
Nodename: vagelos-ve701-2030.eduroam.wlan.private.upenn.edu
Arch: x86_64
Dir: /Users/nathan/Code/conformer/ipython_notebooks
System: H2O2
       Charge= 0.0
       Container Bravais: pbc:[0,0,0], cell:[0.00,0.00,0.00] Ang, 
   cosines(12,13,23):[nan,nan,nan]
Symmetry operations (if any):
       0: pbc= False , kappa-points=1, M=1
       1: pbc= False , kappa-points=1, M=1
       2: pbc= False , kappa-points=1, M=1
Electronic temperature: 0.02 eV
Mixer: Anderson with memory = 3 , mixing parameter = 0.2
4 atoms, 10 states, 14.0 electrons (7.0 filled states)
Initial n ranges:[0,0] [0,0] [0,0] 
Element O in /Users/nathan/Code/hotbit/param/O.elm
    *no comment
Element H in /Users/nathan/Code/ho

utilities.py:133 create_initial_population INFO Creating a dataframe of the initial population
utilities.py:155 select_top_population INFO Selecting the top population
<ipython-input-83-3db3bd62b346>:43 perform_ga INFO The object given is a `AutoTST_Molecule` object


      Step     Time          Energy         fmax
BFGS:    0 13:25:04      -21.306539        7.9468
BFGS:    1 13:25:04      -21.706273        3.2093
BFGS:    2 13:25:04      -21.823123        1.1461
BFGS:    3 13:25:04      -21.854323        0.4768
BFGS:    4 13:25:04      -21.860356        0.3756
      Step     Time          Energy         fmax
BFGS:    0 13:25:04      -21.306539        7.9468
BFGS:    1 13:25:04      -21.706273        3.2093
BFGS:    2 13:25:04      -21.823123        1.1461
BFGS:    3 13:25:04      -21.854323        0.4768
BFGS:    4 13:25:04      -21.860356        0.3756
      Step     Time          Energy         fmax
BFGS:    0 13:25:04      -21.306539        7.9468
BFGS:    1 13:25:04      -21.706273        3.2093
BFGS:    2 13:25:04      -21.823123        1.1461
BFGS:    3 13:25:05      -21.854323        0.4768
BFGS:    4 13:25:05      -21.860356        0.3756
      Step     Time          Energy         fmax
BFGS:    0 13:25:05      -21.306539        7.9468
BFGS

<ipython-input-83-3db3bd62b346>:111 perform_ga INFO Performing GA on generation 1
<ipython-input-83-3db3bd62b346>:150 perform_ga INFO Creating the DataFrame of results for the 1th generation
utilities.py:155 select_top_population INFO Selecting the top population


      Step     Time          Energy         fmax
BFGS:    0 13:25:06      -21.306539        7.9468
BFGS:    1 13:25:06      -21.706274        3.2093
BFGS:    2 13:25:06      -21.823123        1.1461
BFGS:    3 13:25:06      -21.854323        0.4768
BFGS:    4 13:25:06      -21.860356        0.3756
      Step     Time          Energy         fmax
BFGS:    0 13:25:06      -21.304985        7.9975
BFGS:    1 13:25:06      -21.708059        3.2456
BFGS:    2 13:25:06      -21.826822        1.1578
BFGS:    3 13:25:06      -21.858052        0.4775
BFGS:    4 13:25:06      -21.863843        0.3656
      Step     Time          Energy         fmax
BFGS:    0 13:25:06      -21.304985        7.9975
BFGS:    1 13:25:06      -21.708059        3.2456
BFGS:    2 13:25:06      -21.826822        1.1578
BFGS:    3 13:25:06      -21.858052        0.4775
BFGS:    4 13:25:06      -21.863843        0.3656
      Step     Time          Energy         fmax
BFGS:    0 13:25:06      -21.304985        7.9975
BFGS

<ipython-input-83-3db3bd62b346>:111 perform_ga INFO Performing GA on generation 2
<ipython-input-83-3db3bd62b346>:150 perform_ga INFO Creating the DataFrame of results for the 2th generation
utilities.py:155 select_top_population INFO Selecting the top population


      Step     Time          Energy         fmax
BFGS:    0 13:25:07      -21.306539        7.9468
BFGS:    1 13:25:07      -21.706273        3.2093
BFGS:    2 13:25:07      -21.823123        1.1461
BFGS:    3 13:25:07      -21.854323        0.4768
BFGS:    4 13:25:07      -21.860356        0.3756
      Step     Time          Energy         fmax
BFGS:    0 13:25:07      -21.306539        7.9468
BFGS:    1 13:25:07      -21.706273        3.2093
BFGS:    2 13:25:07      -21.823123        1.1461
BFGS:    3 13:25:07      -21.854323        0.4768
BFGS:    4 13:25:08      -21.860356        0.3756
      Step     Time          Energy         fmax
BFGS:    0 13:25:08      -21.306539        7.9468
BFGS:    1 13:25:08      -21.706273        3.2093
BFGS:    2 13:25:08      -21.823123        1.1461
BFGS:    3 13:25:08      -21.854323        0.4768
BFGS:    4 13:25:08      -21.860356        0.3756
      Step     Time          Energy         fmax
BFGS:    0 13:25:08      -21.304985        7.9975
BFGS

<ipython-input-83-3db3bd62b346>:111 perform_ga INFO Performing GA on generation 3
<ipython-input-83-3db3bd62b346>:150 perform_ga INFO Creating the DataFrame of results for the 3th generation
utilities.py:155 select_top_population INFO Selecting the top population


      Step     Time          Energy         fmax
BFGS:    0 13:25:09      -21.304985        7.9975
BFGS:    1 13:25:09      -21.708059        3.2456
BFGS:    2 13:25:09      -21.826822        1.1578
BFGS:    3 13:25:09      -21.858052        0.4775
BFGS:    4 13:25:09      -21.863843        0.3656
      Step     Time          Energy         fmax
BFGS:    0 13:25:09      -21.304985        7.9975
BFGS:    1 13:25:09      -21.708059        3.2456
BFGS:    2 13:25:09      -21.826822        1.1578
BFGS:    3 13:25:09      -21.858052        0.4775
BFGS:    4 13:25:09      -21.863843        0.3656
      Step     Time          Energy         fmax
BFGS:    0 13:25:09      -21.304985        7.9975
BFGS:    1 13:25:09      -21.708059        3.2456
BFGS:    2 13:25:09      -21.826822        1.1578
BFGS:    3 13:25:09      -21.858052        0.4775
BFGS:    4 13:25:09      -21.863843        0.3656
      Step     Time          Energy         fmax
BFGS:    0 13:25:09      -21.304985        7.9975
BFGS

<ipython-input-83-3db3bd62b346>:111 perform_ga INFO Performing GA on generation 4
<ipython-input-83-3db3bd62b346>:150 perform_ga INFO Creating the DataFrame of results for the 4th generation
utilities.py:155 select_top_population INFO Selecting the top population
<ipython-input-83-3db3bd62b346>:179 perform_ga INFO Cutoff criteria reached. GA complete.


Unnamed: 0,energy,torsion_0
15,-21.299597,100.456188
17,-21.299597,100.456188
11,-21.299597,100.456188
10,-21.299597,100.456188
16,-21.299597,100.456188
8,-21.299597,100.456188
21,-21.299597,100.456188
29,-21.299597,100.456188
28,-21.299597,100.456188
26,-21.299597,100.456188


In [97]:
data

Unnamed: 0,energy,torsion_0
15,-21.299597,100.456188
17,-21.299597,100.456188
11,-21.299597,100.456188
10,-21.299597,100.456188
16,-21.299597,100.456188
8,-21.299597,100.456188
21,-21.299597,100.456188
29,-21.299597,100.456188
28,-21.299597,100.456188
26,-21.299597,100.456188


In [98]:
confs

{(69.72755079481787,): -21.269209742304696,
 (90.0,): -21.306539325651244,
 (95.57409976934338,): -21.304984595119663,
 (100.45619346063391,): -21.29959730778205,
 (104.78381727096053,): -21.29187534006158,
 (259.54379597616554,): -21.29959729222466,
 (264.425888245565,): -21.30498458641833,
 (270.0,): -21.306539325633878}

In [80]:
confs[(100,100)] = -60
confs

{(30.307266519206912, 330.40513929197624): -87.05492949215255,
 (30.471579297618195, 180.81456827014455): -87.07344266825447,
 (30.597914639774164, 60.0): -87.08379932102338,
 (30.940495364401006, 299.43434142854113): -87.07959824913223,
 (58.44927191215866, 185.0191338871137): -87.0776675435481,
 (58.449394653627735, 299.42840236082446): -87.08530260367462,
 (60.0, 181.61835743315382): -87.07594186444067,
 (60.0, 330.81493263877167): -87.0598946645153,
 (100, 100): -60,
 (180.0, 298.29683515472146): -87.0560581017711,
 (300.0, 300.0): -87.09770143803212,
 (300.3101909169961, 181.6002917371194): -87.09234671952117,
 (300.3101909169961, 330.0): -87.08401451292491,
 (300.6133460953844, 270.0): -87.05566361412053,
 (300.61334621628066, 30.0): -87.05820680566603,
 (300.6137328598944, 185.0191338871137): -87.0928286389352,
 (300.8597253250664, 210.0): -87.0720114117143,
 (329.8644323418929, 180.81456827014455): -87.06038771565068}

In [81]:
min_e = min(confs.values())
print min_e
print min_e + ((units.kcal / units.mol) / units.eV)
print 
to_delete = []
for key, value  in confs.iteritems():
    print value
    
    if value > min_e + ((units.kcal / units.mol) / units.eV):
        to_delete.append(key)
    
for delete in to_delete:
    del confs[delete]
    
confs

        #print key
    

-87.09770143803212
-87.05433733413153

-87.06038771565068
-87.08401451292491
-87.08379932102338
-87.0720114117143
-87.07959824913223
-87.07344266825447
-87.0560581017711
-87.05820680566603
-87.09770143803212
-87.09234671952117
-87.07594186444067
-87.08530260367462
-60
-87.0598946645153
-87.05492949215255
-87.05566361412053
-87.0928286389352
-87.0776675435481


{(30.307266519206912, 330.40513929197624): -87.05492949215255,
 (30.471579297618195, 180.81456827014455): -87.07344266825447,
 (30.597914639774164, 60.0): -87.08379932102338,
 (30.940495364401006, 299.43434142854113): -87.07959824913223,
 (58.44927191215866, 185.0191338871137): -87.0776675435481,
 (58.449394653627735, 299.42840236082446): -87.08530260367462,
 (60.0, 181.61835743315382): -87.07594186444067,
 (60.0, 330.81493263877167): -87.0598946645153,
 (180.0, 298.29683515472146): -87.0560581017711,
 (300.0, 300.0): -87.09770143803212,
 (300.3101909169961, 181.6002917371194): -87.09234671952117,
 (300.3101909169961, 330.0): -87.08401451292491,
 (300.6133460953844, 270.0): -87.05566361412053,
 (300.61334621628066, 30.0): -87.05820680566603,
 (300.6137328598944, 185.0191338871137): -87.0928286389352,
 (300.8597253250664, 210.0): -87.0720114117143,
 (329.8644323418929, 180.81456827014455): -87.06038771565068}

In [30]:
get_unique_conformers??

In [82]:
from ase import units
def get_unique_conformers(df, unique_torsions={}, min_rms=10):
    """
    A function designed to identify all low energy conformers within a standard deviation of the data given.

    :param:
     df: a DataFrame of a population of torsions with columns of `Energy` and `Torsion N`
     unique_torsions: a dict of unique torsions already present in the dataframe

    :return:
     unique_torsion: a dict containing all of the unique torsions from the dataframe appended
    """
    columns = []

    for c in df.columns:
        if "torsion" in c:
            columns.append(c)

    assert len(columns) > 0
    assert "energy" in df.columns

    mini = df[df.energy < (df.energy.min(
    ) + (units.kcal / units.mol) / units.eV)].sort_values("energy")
    for i, combo in enumerate(mini[columns].values):
        combo = np.array(combo)
        energy = mini.energy.iloc[i]
        
        unique = []
        for key in unique_torsions.keys():
            key = np.array(key)
            
            if ((key - combo)**2).mean() > min_rms:
                unique.append(True)
                
            else:
                unique.append(False)
                
        if np.array(unique).all():
            combo = tuple(combo)
            unique_torsions[combo] = energy
            
            
    min_e = min(unique_torsions.values())

    to_delete = []
    for key, value  in unique_torsions.iteritems():

        if value > min_e + ((units.kcal / units.mol) / units.eV):
            to_delete.append(key)

    for delete in to_delete:
        del unique_torsions[delete]
                
        
    return unique_torsions

In [44]:
a = np.array([False, True]).all()
a


False

In [37]:
a.