In [None]:
from main import polygonize
import numpy as np

def get_directions(vecs):
    '''
    Returns two or three vectors specifying the direction in which each molecule should be aligned
    in the cyclical TS, pointing towards the center of the polygon.
    '''
    assert len(vecs) in (2,3)
    if len(vecs) == 2:
        return np.array([[0,1,0],
                            [0,-1,0]])
    else:
        a = vecs[0,1,0] # first vec, end, x
        b = vecs[1,1,0] # second vec, end, x
        c = vecs[1,1,1] # second vec, end, y

        x = a/2
        y = (b**2 + c**2 - a*b)/(2*c)
        cc = np.array([x,y,0])
        # coordinates of the triangle circocenter

        v1 = cc - np.mean((vecs[0,1],vecs[2,1]), axis=0)
        v2 = cc - np.mean((vecs[1,1],vecs[0,1]), axis=0)
        v3 = cc - np.mean((vecs[2,1],vecs[1,1]), axis=0)
        # versors connecting center of side with circocenter

        return np.vstack((v1,v2,v3))

# get_directions(polygonize([1,1,1])[0])
# polygonize([1,1,1])[0]

from linalg_tools import cartesian_product
rotation_steps = 2
a = cartesian_product(*[range(-rotation_steps, rotation_steps+1) for _ in range(3)])*45/rotation_steps
a[0:10]

In [None]:
x = np.array([[[0.,         0. ,        0.        ],
  [1.97332738, 0.   ,      0.        ]],

 [[2.83868969, 4.15092487, 0.        ],
  [1.97332738, 0.        , 0.        ]],

 [[2.83868969, 4.15092487, 0.        ],
  [0.        , 0.        , 0.        ]]])

[np.linalg.norm(i[0]-i[1]) for i in x]

## Dataclass

In [None]:
from main import Options
options = Options()
dir(options)
{var:options.__getattribute__(var) for var in dir(options) if var[0:2] != '__'}

## Options with numbers

In [None]:
keywords_list = ['STEPS=5']
line = 'NOOPT STEPS=7 BYPASS'
keywords = [l.split('=')[0] for l in line.split()]
keywords

## options with commas/equals

In [None]:
d = {0:[[1,'a']], 1:[[9,'a']]}
def _set_custom_orbs(orb_string):
    '''
    orb_string looks like 'a=2.345,b=3.456,c=2.22'

    '''
    pairs = [(piece.split('=')[0], float(piece.split('=')[1])) for piece in orb_string.split(',')]

    for letter, dist in pairs:
        for index in range(len(self.objects)):
            for pairing in self.pairing_dict[index]:

    # for each pairing specified by the user, check each pairing recorded
    # in the pairing_dict on that molecule.

                if pairing[1] == letter:
                    for reactive_atom, reactive_index in zip(self.objects[index].reactive_atoms_classes, self.objects[index].reactive_indexes):
                        if reactive_index == pairing[0]:
                            reactive_atom.init(self.objects[index], reactive_index, update=True, orb_dim=dist/2)

                # If the letter matches, look for the correct reactive atom on that molecule. When we find the correct match,
                # set the new orbital center with imposed distance from the reactive atom. The imposed distance is half the 
                # user-specified one, as the final atomic distances will be given by two halves of this length.


# _set_custom_orbs('a=2.345,b=3.456')
# d = {0:[[1,'a']], 1:[[9,'a']]}

'CLASHES(a=3.5)'[8:-1]

## SUPRAFAC Keyword

In [None]:
l = [0,2,3,1]

for n in l:
    keep = [i for i in l if n >= i]
    if len(keep) == 2:
        b = [i in keep for i in l]
        print(keep)
        print(n)
        print(b)

## Rodrigues Formula

In [None]:
import numpy as np
from linalg_tools import norm, rot_mat_from_pointer
def rodrigues(pivot, angle):
    '''
    Pivot is a shape (3,) array
    Angle in degrees
    '''

    p = norm(pivot)
    a = angle/180*np.pi
    x = np.array([1,0,0])
    y = np.array([0,1,0])
    z = np.array([0,0,1])

    v0 = x*np.cos(a) + np.cross(p, x) * np.sin(a) + p*(np.dot(p, x))*(1-np.cos(a))
    v1 = y*np.cos(a) + np.cross(p, y) * np.sin(a) + p*(np.dot(p, y))*(1-np.cos(a))
    v2 = z*np.cos(a) + np.cross(p, z) * np.sin(a) + p*(np.dot(p, z))*(1-np.cos(a))

    v0 = v0[..., None]
    v1 = v1[..., None]
    v2 = v2[..., None]

    return np.hstack((v0,v1,v2))
pivot = np.array([1,0,0])

In [None]:
%timeit -r 100 rodrigues(pivot, 180)

In [None]:
%timeit -r 100 rot_mat_from_pointer(pivot, 180)

In [None]:
print(rodrigues(pivot, 55))
print(rot_mat_from_pointer(pivot, 55))

## Align vectors

In [None]:
from scipy.spatial.transform import Rotation as R
v1 = np.array((1,0,0))
v2 = np.array((0,1,0))
R.align_vectors((v1,v2),(v1,-v2))[0].as_matrix()

## RMSD threshold
Looking for the best RMSD treshold for optimized structures obtainment

In [None]:
from prune import prune_conformers
import numpy as np
import matplotlib.pyplot as plt
import os
from cclib.io import ccread
os.chdir('Resources/RMSD_test')

fig = plt.figure(figsize=(8,5))
plt.ylabel(r'% structures kept')
plt.xlabel('RMSD')

for filename in os.listdir():
    if filename[-4:] == '.xyz':
        data = ccread(filename)
        x, y = [], []
        for RMSD in np.arange(0.5,3,0.05):
            mask = prune_conformers(data.atomcoords, data.atomnos, max_rmsd=RMSD)[1]
            kept = len([m for m in mask if m == True])/len(mask)
            # s = f'kept {len([m for m in mask if m == True])}/{len(mask)}'
            y.append(kept)
            x.append(RMSD)
        plt.plot(x, y, label=filename)
plt.legend()

## MOPAC Berny
### Full list of KEYWORDS
        & - TURN NEXT LINE INTO KEYWORDS
        + - ADD ANOTHER LINE OF KEYWORDS
        0SCF - READ IN DATA, THEN STOP
        1ELECTRON- PRINT FINAL ONE-ELECTRON MATRIX
        1SCF - DO ONE SCF AND THEN STOP
        AIDER - READ IN AB INITIO DERIVATIVES
        AIGIN - GEOMETRY MUST BE IN GAUSSIAN FORMAT
        AIGOUT - IN ARC FILE, INCLUDE AB-INITIO GEOMETRY
        ANALYT - USE ANALYTICAL DERIVATIVES OF ENERGY WRT GEOMETRY
        AM1 - USE THE AM1 HAMILTONIAN
        BAR=n.n - REDUCE BAR LENGTH BY A MAXIMUM OF n.n
        BIRADICAL- SYSTEM HAS TWO UNPAIRED ELECTRONS
        BONDS - PRINT FINAL BOND-ORDER MATRIX
        C.I. - A MULTI-ELECTRON CONFIGURATION INTERACTION SPECIFIED
        Keywords
        CHARGE=n - CHARGE ON SYSTEM = n (e.g. NH4 => CHARGE=1)
        COMPFG - PRINT HEAT OF FORMATION CALCULATED IN COMPFG
        CONNOLLY - USE CONNOLLY SURFACE
        DEBUG - DEBUG OPTION TURNED ON
        DENOUT - DENSITY MATRIX OUTPUT (CHANNEL 10)
        DENSITY - PRINT FINAL DENSITY MATRIX
        DEP - GENERATE FORTRAN CODE FOR PARAMETERS FOR NEW ELEMENTS
        DEPVAR=n - TRANSLATION VECTOR IS A MULTIPLE OF BOND-LENGTH
        DERIV - PRINT PART OF WORKING IN DERIV
        DFORCE - FORCE CALCULATION SPECIFIED, ALSO PRINT FORCE MATRIX.
        DFP - USE DAVIDON-FLETCHER-POWELL METHOD TO OPTIMIZE GEOMETRIES
        DIPOLE - FIT THE ESP TO THE CALCULATED DIPOLE
        DIPX - X COMPONENT OF DIPOLE TO BE FITTED
        DIPY - Y COMPONENT OF DIPOLE TO BE FITTED
        DIPZ - Z COMPONENT OF DIPOLE TO BE FITTED
        DMAX - MAXIMUM STEPSIZE IN EIGENVECTOR FOLLOWING
        DOUBLET - DOUBLET STATE REQUIRED
        DRC - DYNAMIC REACTION COORDINATE CALCULATION
        DUMP=n - WRITE RESTART FILES EVERY n SECONDS
        ECHO - DATA ARE ECHOED BACK BEFORE CALCULATION STARTS
        EF - USE EF ROUTINE FOR MINIMUM SEARCH
        EIGINV -
        EIGS - PRINT ALL EIGENVALUES IN ITER
        ENPART - PARTITION ENERGY INTO COMPONENTS
        ESP - ELECTROSTATIC POTENTIAL CALCULATION
        ESPRST - RESTART OF ELECTROSTATIC POTENTIAL
        ESR - CALCULATE RHF UNPAIRED SPIN DENSITY
        EXCITED - OPTIMIZE FIRST EXCITED SINGLET STATE
        EXTERNAL - READ PARAMETERS OFF DISK
        FILL=n - IN RHF OPEN AND CLOSED SHELL, FORCE M.O. n
        TO BE FILLED
        FLEPO - PRINT DETAILS OF GEOMETRY OPTIMIZATION
        FMAT - PRINT DETAILS OF WORKING IN FMAT
        FOCK - PRINT LAST FOCK MATRIX
        FORCE - FORCE CALCULATION SPECIFIED
        GEO-OK - OVERRIDE INTERATOMIC DISTANCE CHECK
        GNORM=n.n- EXIT WHEN GRADIENT NORM DROPS BELOW n.n
        GRADIENTS- PRINT ALL GRADIENTS
        GRAPH - GENERATE FILE FOR GRAPHICS
        HCORE - PRINT DETAILS OF WORKING IN HCORE
        HESS=N - OPTIONS FOR CALCULATING HESSIAN MATRICES IN EF
        H-PRIO - HEAT OF FORMATION TAKES PRIORITY IN DRC
        HYPERFINE- HYPERFINE COUPLING CONSTANTS TO BE CALCULATED
        IRC - INTRINSIC REACTION COORDINATE CALCULATION
        ISOTOPE - FORCE MATRIX WRITTEN TO DISK (CHANNEL 9 )
        ITER - PRINT DETAILS OF WORKING IN ITER
        ITRY=N - SET LIMIT OF NUMBER OF SCF ITERATIONS TO N.
        IUPD - MODE OF HESSIAN UPDATE IN EIGENVECTOR FOLLOWING
        K=(N,N) - BRILLOUIN ZONE STRUCTURE TO BE CALCULATED
        KINETIC - EXCESS KINETIC ENERGY ADDED TO DRC CALCULATION
        LINMIN - PRINT DETAILS OF LINE MINIMIZATION
        LARGE - PRINT EXPANDED OUTPUT
        LET - OVERRIDE CERTAIN SAFETY CHECKS
        LOCALIZE - PRINT LOCALIZED ORBITALS
        MAX - PRINTS MAXIMUM GRID SIZE (23*23)
        MECI - PRINT DETAILS OF MECI CALCULATION
        MICROS - USE SPECIFIC MICROSTATES IN THE C.I.
        MINDO/3 - USE THE MINDO/3 HAMILTONIAN
        MMOK - USE MOLECULAR MECHANICS CORRECTION TO CONH BONDS
        MODE=N - IN EF, FOLLOW HESSIAN MODE NO. N
        MOLDAT - PRINT DETAILS OF WORKING IN MOLDAT
        MS=N - IN MECI, MAGNETIC COMPONENT OF SPIN
        MULLIK - PRINT THE MULLIKEN POPULATION ANALYSIS
        NLLSQ - MINIMIZE GRADIENTS USING NLLSQ
        NOANCI - DO NOT USE ANALYTICAL C.I. DERIVATIVES
        NODIIS - DO NOT USE DIIS GEOMETRY OPTIMIZER
        NOINTER - DO NOT PRINT INTERATOMIC DISTANCES
        NOLOG - SUPPRESS LOG FILE TRAIL, WHERE POSSIBLE
        NOMM - DO NOT USE MOLECULAR MECHANICS CORRECTION TO CONH BONDS
        NONR -
        NOTHIEL - DO NOT USE THIEL’S FSTMIN TECHNIQUE
        NSURF=N - NUMBER OF SURFACES IN AN ESP CALCULATION
        NOXYZ - DO NOT PRINT CARTESIAN COORDINATES
        NSURF - NUMBER OF LAYERS USED IN ELECTROSTATIC POTENTIAL
        OLDENS - READ INITIAL DENSITY MATRIX OFF DISK
        OLDGEO - PREVIOUS GEOMETRY TO BE USED
        OPEN - OPEN-SHELL RHF CALCULATION REQUESTED
        ORIDE -
        PARASOK - IN AM1 CALCULATIONS SOME MNDO PARAMETERS ARE TO BE USED
        PI - RESOLVE DENSITY MATRIX INTO SIGMA AND PI BONDS
        PL - MONITOR CONVERGENCE OF DENSITY MATRIX IN ITER
        PM3 - USE THE MNDO-PM3 HAMILTONIAN
        POINT=N - NUMBER OF POINTS IN REACTION PATH
        POINT1=N - NUMBER OF POINTS IN FIRST DIRECTION IN GRID CALCULATION
        POINT2=N - NUMBER OF POINTS IN SECOND DIRECTION IN GRID CALCULATION
        POLAR - CALCULATE FIRST, SECOND AND THIRD ORDER POLARIZABILITIES
        POTWRT - IN ESP, WRITE OUT ELECTROSTATIC POTENTIAL TO UNIT 21
        POWSQ - PRINT DETAILS OF WORKING IN POWSQ
        PRECISE - CRITERIA TO BE INCREASED BY 100 TIMES
        PULAY - USE PULAY’S CONVERGER TO OBTAIN A SCF
        QUARTET - QUARTET STATE REQUIRED
        QUINTET - QUINTET STATE REQUIRED
        RECALC=N - IN EF, RECALCULATE HESSIAN EVERY N STEPS
        RESTART - CALCULATION RESTARTED
        ROOT=n - ROOT n TO BE OPTIMIZED IN A C.I. CALCULATION
        ROT=n - THE SYMMETRY NUMBER OF THE SYSTEM IS n.
        SADDLE - OPTIMIZE TRANSITION STATE
        SCALE - SCALING FACTOR FOR VAN DER WAALS DISTANCE IN ESP
        SCFCRT=n - DEFAULT SCF CRITERION REPLACED BY THE VALUE SUPPLIED
        SCINCR - INCREMENT BETWEEN LAYERS IN ESP
        SETUP - EXTRA KEYWORDS TO BE READ OF SETUP FILE
        SEXTET - SEXTET STATE REQUIRED
        SHIFT=n - A DAMPING FACTOR OF n DEFINED TO START SCF
        SIGMA - MINIMIZE GRADIENTS USING SIGMA
        SINGLET - SINGLET STATE REQUIRED
        SLOPE - MULTIPLIER USED TO SCALE MNDO CHARGES
        SPIN - PRINT FINAL UHF SPIN MATRIX
        STEP - STEP SIZE IN PATH
        Keywords
        STEP1=n - STEP SIZE n FOR FIRST COORDINATE IN GRID CALCULATION
        STEP2=n - STEP SIZE n FOR SECOND COORDINATE IN GRID CALCULATION
        STO-3G - DEORTHOGONALIZE ORBITALS IN STO-3G BASIS
        SYMAVG - AVERAGE SYMMETRY EQUIVALENT ESP CHARGES
        SYMMETRY - IMPOSE SYMMETRY CONDITIONS
        T=n - A TIME OF n SECONDS REQUESTED
        THERMO - PERFORM A THERMODYNAMICS CALCULATION
        TIMES - PRINT TIMES OF VARIOUS STAGES
        T-PRIO - TIME TAKES PRIORITY IN DRC
        TRANS - THE SYSTEM IS A TRANSITION STATE
        (USED IN THERMODYNAMICS CALCULATION)
        TRIPLET - TRIPLET STATE REQUIRED
        TS - USING EF ROUTINE FOR TS SEARCH
        UHF - UNRESTRICTED HARTREE-FOCK CALCULATION
        VECTORS - PRINT FINAL EIGENVECTORS
        VELOCITY - SUPPLY THE INITIAL VELOCITY VECTOR IN A DRC CALCULATION
        WILLIAMS - USE WILLIAMS SURFACE
        X-PRIO - GEOMETRY CHANGES TAKE PRIORITY IN DRC
        XYZ - DO ALL GEOMETRIC OPERATIONS IN CARTESIAN COORDINATES.

In [None]:
from optimization_methods import mopac_opt
from tscode import write_xyz
import numpy as np
import os
from cclib.io import ccread
os.chdir('Resources/SN2')
data = ccread('test_berny.xyz')
constrained_indexes = np.array(((0,18),(6,16)))

In [None]:
newcoords = mopac_opt(data.atomcoords[0], data.atomnos, method='PM7 TS')[0]
with open('test_berny_out.xyz', 'w') as f:
    write_xyz(newcoords, data.atomnos, f, title='test_berny_out.xyz')
os.system('obabel test_berny_out.xyz -O test_berny_out.sdf')
os.system('gview test_berny_out.sdf')

In [None]:
from spyrmsd.rmsd import rmsd
rmsd(data.atomcoords[0], newcoords, data.atomnos, data.atomnos, center=True, minimize=True)