In [1]:
import dbstep.Dbstep as db
import numpy as np
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem
import os 
from rdkit.Chem import rdMolTransforms

  def parallel_grid_scan(xy_grid, angle):


# Import Data

In [3]:
input_file = '../Data/IDPI_DataSet_Final_JPR.xlsx'

df1 = pd.read_excel(input_file)
df1

Unnamed: 0,reaction,starting electrophile SMILES,input electrophile SMILES,nucleophile SMILES,"3,3 Catalyst Substituent",N Catalyst Substituent,solvent,Temperature (Celsius),Temperature (Kelvin),yield (%),ee (%),e.r. 1,e.r. 2,ddG,reference,Link
0,1a,O=Cc2ccc1ccccc1c2,O=Cc2ccc1ccccc1c2,C=CC[Si](C)(C)C,c2ccc1ccccc1c2,NS(=O)(=O)C(F)(F)F,toluene,-78,195.15,92.0,92,96.0,4.0,1.231712,"Angew. Chem. Int. Ed. 2016, 55, 13200–13203",https://onlinelibrary.wiley.com/doi/pdfdirect/...
1,1b,COc2ccc1cc(C=O)ccc1c2,COc2ccc1cc(C=O)ccc1c2,C=CC[Si](C)(C)C,c2ccc1ccccc1c2,NS(=O)(=O)C(F)(F)F,toluene,-78,195.15,80.0,88,94.0,6.0,1.066407,"Angew. Chem. Int. Ed. 2016, 55, 13200–13204",
2,1c,O=Cc2ccc1cc(Br)ccc1c2,O=Cc2ccc1cc(Br)ccc1c2,C=CC[Si](C)(C)C,c2ccc1ccccc1c2,NS(=O)(=O)C(F)(F)F,toluene,-78,195.15,57.0,86,93.0,7.0,1.002518,"Angew. Chem. Int. Ed. 2016, 55, 13200–13205",
3,1d,O=Cc1ccccc1,O=Cc1ccccc1,C=CC[Si](C)(C)C,c2ccc1ccccc1c2,NS(=O)(=O)C(F)(F)F,toluene,-78,195.15,85.0,82,91.0,9.0,0.896691,"Angew. Chem. Int. Ed. 2016, 55, 13200–13206",
4,1e,O=Cc1ccccc1F,O=Cc1ccccc1F,C=CC[Si](C)(C)C,c1ccc3c(c1)CCc2ccccc23,NS(=O)(=O)C(F)(F)F,toluene,-60,213.15,82.0,84,92.0,8.0,1.033884,"Angew. Chem. Int. Ed. 2016, 55, 13200–13207",
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
333,3e,CC/C=C(C)/C=O,CC/C=C(C)/C=O,C1=CCC=C1,c3ccc2ccc1ccccc1c2c3,NS(=O)(=O)C(F)(F)C(F)(F)C(F)(F)C(F)(F)F,CH2Cl2,-100,173.15,93.0,96,98.0,2.0,1.338303,"Nat. Commun. 2019, 10, 770",
334,3f,O=CC1=CCCCC1,O=CC1=CCCCC1,C1=CCC=C1,c3ccc2ccc1ccccc1c2c3,NS(=O)(=O)C(F)(F)C(F)(F)F,CH2Cl2,-100,173.15,93.0,96,98.0,2.0,1.338303,"Nat. Commun. 2019, 10, 770",
335,3u,C/C=C(C)/C=O,C/C=C(C)/C=O,C1=CCC=C1,c3ccc2ccc1ccccc1c2c3,NS(=O)(=O)C(F)(F)C(F)(F)F,CH2Cl2,-100,173.15,90.0,92,96.0,4.0,1.092856,"Nat. Commun. 2019, 10, 770",
336,3v,O=CC1=CCCC1,O=CC1=CCCC1,C1=CCC=C1,c3ccc2ccc1ccccc1c2c3,NS(=O)(=O)C(F)(F)C(F)(F)C(F)(F)C(F)(F)F,CH2Cl2,-100,173.15,78.0,94,97.0,3.0,1.195347,"Nat. Commun. 2019, 10, 770",


# Functions for Conformer Creation

In [4]:




def calc_Boltzman_weights(mol, conformerId, minimizeIts=1000,T=293):
	"""
    The `calc_Boltzman_weights` function calculates the energy of a molecule conformer using the MMFF (Merck Molecular Force Field) method. 
    It takes the molecule (`mol`), conformer ID (`conformerId`), and optional parameters for the number of minimization iterations (`minimizeIts`) and temperature (`T`).
    Return  Boltzman Weight of a molecule
	"""

	ff = AllChem.MMFFGetMoleculeForceField(mol, AllChem.MMFFGetMoleculeProperties(mol), confId=conformerId)
	ff.Initialize()
	ff.CalcEnergy()
	
	if minimizeIts > 0:
		conv = ff.Minimize(maxIts=minimizeIts)
	energy= ff.CalcEnergy()
	
	results  = np.exp(-energy/(T*1.988*10**(-3)))
	return results


def smiles_to_xyz(smiles, output_folder,mode='basic'):
    """
    The `smiles_to_xyz` function takes a SMILES string, an output folder path, and a mode parameter as input.
    It creates .xyz files of a molecule based on the given SMILES string. It has three meyhods of conformer generation:
    1) "basic" - which uses ETDKG algorithm implemented in RDKit
    2) "conformer_sample" - which creates 10 conformers also using RDKit
    3) "conformer_energy" - which creates 10 conformers also using RDKit and also applies the calc_Boltzman_weights function to obtain Boltzman weights for conformers

    """
    mol = Chem.MolFromSmiles(smiles)
    if mode=='basic':
        mol = Chem.AddHs(mol)

        
        if mol is not None:
            # Add hydrogen atoms
            mol = Chem.AddHs(mol)
            
            # Generate 3D coordinates
            AllChem.EmbedMolecule(mol)
            conf = mol.GetConformer()
            rdMolTransforms.CanonicalizeConformer(conf)
            output_file_path = os.path.join(output_folder, f'{smiles.replace("/", "_")}.xyz')
            # Write coordinates and SMILES to .xyz file
            with open(output_file_path, 'w') as xyz_file:
                xyz_file.write(f"{mol.GetNumAtoms()}\n")
                xyz_file.write(f"{smiles}\n")

                for atom in mol.GetAtoms():
                    pos = conf.GetAtomPosition(atom.GetIdx())
                    symbol = atom.GetSymbol()
                    xyz_file.write(f"{symbol} {pos.x:.16f} {pos.y:.16f} {pos.z:.16f}\n")
            return output_file_path
    elif mode=='conformer_sample':
        conf_list=[]
        mol = Chem.AddHs(mol)
        AllChem.EmbedMultipleConfs(mol)
        conformers = mol.GetConformers()
        
        for i, conf in enumerate(conformers):
            if conf is not None:
                
                rdMolTransforms.CanonicalizeConformer(conf)
                output_file_path = os.path.join(output_folder, f'{smiles.replace("/", "_")}_{i}conf.xyz')
            
                # Write coordinates and SMILES to .xyz file
                with open(output_file_path, 'w') as xyz_file:
                    xyz_file.write(f"{mol.GetNumAtoms()}\n")
                    xyz_file.write(f"{smiles}_{i}conf\n")
                    for atom in mol.GetAtoms():
                        pos = conf.GetAtomPosition(atom.GetIdx())
                        symbol = atom.GetSymbol()
                        xyz_file.write(f"{symbol} {pos.x:.16f} {pos.y:.16f} {pos.z:.16f}\n")
                conf_list.append(output_file_path)
        return conf_list
    elif mode=='conformer_energy':
        conf_list=[]
        conf_weights =[]
        mol = Chem.AddHs(mol)
        AllChem.EmbedMultipleConfs(mol)
        conformers = mol.GetConformers()
        
        for i, conf in enumerate(conformers):
            if conf is not None:
                conf_weights.append(calc_Boltzman_weights(mol, i, minimizeIts=1000,T=293))
                rdMolTransforms.CanonicalizeConformer(conf)
                output_file_path = os.path.join(output_folder, f'{smiles.replace("/", "_")}_{i}conf.xyz')
            
                # Write coordinates and SMILES to .xyz file
                with open(output_file_path, 'w') as xyz_file:
                    xyz_file.write(f"{mol.GetNumAtoms()}\n")
                    xyz_file.write(f"{smiles}_{i}conf\n")
                    for atom in mol.GetAtoms():
                        pos = conf.GetAtomPosition(atom.GetIdx())
                        symbol = atom.GetSymbol()
                        xyz_file.write(f"{symbol} {pos.x:.16f} {pos.y:.16f} {pos.z:.16f}\n")
                conf_list.append(output_file_path)
                conf_weights = [float(i)/sum(conf_weights) for i in conf_weights]
        return conf_list, conf_weights


def write_smiles_to_file(smiles_list, output_file):
    with open(output_file, 'w') as smiles_file:
        for smiles in smiles_list:
            smiles_file.write(f"{smiles}\n")



# Identify Reacting Atoms in Electrophiles

In [7]:

def find_c_o_atom_ids(smiles):
    """  
    Input SMILES string
    This function `find_c_o_atom_ids(smiles)` is designed to identify carbonyl group
    in a molecule represented by its SMILES notation. Here's a breakdown of what the function does:

    1) First look for aldehydes
    2) If none were found look for ketones
    connected to two oxygen atom

    Return IDs of identified atoms
    """
    found_oxygen = False
    mol = Chem.MolFromSmiles(smiles)
    co_ids = {'smiles': smiles, 'oxygen_atom_ids': [], 'carbon_atom_ids': []}
    if mol is not None:
        # Add hydrogen atoms
        #mol = Chem.AddHs(mol)

        # Find atom IDs for oxygen and carbon in the C=O group with a double bond
        for bond in mol.GetBonds():
            if bond.GetBondTypeAsDouble() == 2.0:
                atom1 = bond.GetBeginAtom()
                atom2 = bond.GetEndAtom()    

                if atom1.GetAtomicNum() == 6 and atom2.GetAtomicNum() == 8 and atom1.GetDegree() == 2:#no hydrogens so aldehydes have degree 2

                    found_oxygen = True
                    co_ids['carbon_atom_ids'].append(atom1.GetIdx())
                    co_ids['oxygen_atom_ids'].append(atom2.GetIdx())
                    #print(f"f {atom1.GetIdx()} {smiles}")
                    return co_ids
                elif atom2.GetAtomicNum() == 6 and atom1.GetAtomicNum() == 8 and atom2.GetDegree() == 2:

                    found_oxygen =True
                    co_ids['carbon_atom_ids'].append(atom2.GetIdx())
                    co_ids['oxygen_atom_ids'].append(atom1.GetIdx())
                    return co_ids
        if not found_oxygen:
            #mol = Chem.AddHs(mol
            
            for bond in mol.GetBonds():
                if bond.GetBondTypeAsDouble() == 2.0:
                    atom1 = bond.GetBeginAtom()
                    atom2 = bond.GetEndAtom()  
                    if atom1.GetAtomicNum() == 6 and atom2.GetAtomicNum() == 8:

                        found_oxygen = True
                        co_ids['carbon_atom_ids'].append(atom1.GetIdx())
                        co_ids['oxygen_atom_ids'].append(atom2.GetIdx())
                        return co_ids
                    elif atom2.GetAtomicNum() == 6 and atom1.GetAtomicNum() == 8:

                        found_oxygen =True
                        co_ids['carbon_atom_ids'].append(atom2.GetIdx())
                        co_ids['oxygen_atom_ids'].append(atom1.GetIdx())
                        return co_ids
            if not found_oxygen:
                print(smiles)
    else:
        return None




# Keep only unique SMILES entries
unique_df = df1.drop_duplicates(subset='input electrophile SMILES')
# Find atom IDs for oxygen and carbon in the C=O group with a double bond for each unique SMILES
results = []
for index, row in unique_df.iterrows():
    result = find_c_o_atom_ids(row['input electrophile SMILES'])
    if result:
        results.append(result)
   

# Display results
for result in results:
    if len(result['oxygen_atom_ids'])==1:
        print(f"SMILES: {result['smiles']}, Oxygen Atom IDs: {result['oxygen_atom_ids']}, Carbon Atom IDs: {result['carbon_atom_ids']}")


SMILES: O=Cc2ccc1ccccc1c2, Oxygen Atom IDs: [0], Carbon Atom IDs: [1]
SMILES: COc2ccc1cc(C=O)ccc1c2, Oxygen Atom IDs: [9], Carbon Atom IDs: [8]
SMILES: O=Cc2ccc1cc(Br)ccc1c2, Oxygen Atom IDs: [0], Carbon Atom IDs: [1]
SMILES: O=Cc1ccccc1, Oxygen Atom IDs: [0], Carbon Atom IDs: [1]
SMILES: O=Cc1ccccc1F, Oxygen Atom IDs: [0], Carbon Atom IDs: [1]
SMILES: Cc1ccccc1C=O, Oxygen Atom IDs: [8], Carbon Atom IDs: [7]
SMILES: Cc1cccc(C=O)c1, Oxygen Atom IDs: [7], Carbon Atom IDs: [6]
SMILES: Cc1ccc(C=O)cc1, Oxygen Atom IDs: [6], Carbon Atom IDs: [5]
SMILES: CC(C)(C)c1ccc(C=O)cc1, Oxygen Atom IDs: [9], Carbon Atom IDs: [8]
SMILES: O=C/C=C/c1ccccc1, Oxygen Atom IDs: [0], Carbon Atom IDs: [1]
SMILES: CCCCCCCCCC=O, Oxygen Atom IDs: [10], Carbon Atom IDs: [9]
SMILES: CCCCC=O, Oxygen Atom IDs: [5], Carbon Atom IDs: [4]
SMILES: O=CCCc1ccccc1, Oxygen Atom IDs: [0], Carbon Atom IDs: [1]
SMILES: CC(C)CC=O, Oxygen Atom IDs: [5], Carbon Atom IDs: [4]
SMILES: C1CC=[O+]C1, Oxygen Atom IDs: [3], Carbon Atom ID

# Identify Reacting Atoms in Nucleophiles

In [8]:


def find_terminal_double_bonded_carbons(smiles):
    """  
    Input SMILES string
    This function `find_terminal_double_bonded_carbons(smiles)` is designed to identify terminal double-bonded carbons
    in a molecule represented by its SMILES notation. Here's a breakdown of what the function does:

    1) First look for the terminal double carbon bonds
    2) If none were found look for double bonds between carbon atoms where one atom is
    connected to two oxygen atom
    3) If none in two previous searches were found all carbons with double bonds were
    considered
    Return IDs of terminal carbons
    """
    terminal_double_bonded_carbons = {'smiles': smiles, 'terminal_atoms': [], 'non_terminal_atoms': [], 'returned_from': ''}    
    mol = Chem.MolFromSmiles(smiles)
    found_terminal = False
    if mol is not None:
        # Add hydrogen atoms
        #mol = Chem.AddHs(mol)
        for bond in mol.GetBonds():
            if bond.GetBondTypeAsDouble() == 2.0:  # Check if the bond is a double bond
                atom1 = bond.GetBeginAtom()
                atom2 = bond.GetEndAtom()
                # Check if both atoms are carbon and at least one of them is terminal
                if atom1.GetAtomicNum() == 6 and atom2.GetAtomicNum() == 6:
                    if atom1.GetDegree() == 1 and atom2.GetDegree() > 1:  # Check if atom1 is terminal and atom2 is non-terminal
                            terminal_double_bonded_carbons['terminal_atoms'].append(atom1.GetIdx())
                            terminal_double_bonded_carbons['non_terminal_atoms'].append(atom2.GetIdx())
                            found_terminal = True
                            terminal_double_bonded_carbons['returned_from'] = 'terminal'
                    elif atom1.GetDegree() > 1 and atom2.GetDegree() == 1:  # Check if atom2 is terminal and atom1 is non-terminal
                            terminal_double_bonded_carbons['terminal_atoms'].append(atom2.GetIdx())
                            terminal_double_bonded_carbons['non_terminal_atoms'].append(atom1.GetIdx())
                            terminal_double_bonded_carbons['returned_from'] = 'terminal'
                            found_terminal = True
                                  
        if not found_terminal:
            for bond in mol.GetBonds():
                if bond.GetBondTypeAsDouble() == 2.0:  # Check if the bond is a double bond
                    atom1 = bond.GetBeginAtom()
                    atom2 = bond.GetEndAtom()
                    # Check if both atoms are carbon and at least one of them is terminal
                    if atom1.GetAtomicNum() == 6 and atom2.GetAtomicNum() == 6:
                        if atom1.GetDegree() == 3 and len([neighbor for neighbor in atom1.GetNeighbors() if neighbor.GetAtomicNum() == 8]) == 2:
                                        terminal_double_bonded_carbons['terminal_atoms'].append(atom2.GetIdx())
                                        terminal_double_bonded_carbons['non_terminal_atoms'].append(atom1.GetIdx())
                                        found_terminal = True
                                        terminal_double_bonded_carbons['returned_from'] = 'oxygen'
                        elif atom2.GetDegree() == 3 and len([neighbor for neighbor in atom2.GetNeighbors() if neighbor.GetAtomicNum() == 8]) == 2:
                                        terminal_double_bonded_carbons['terminal_atoms'].append(atom1.GetIdx())
                                        terminal_double_bonded_carbons['non_terminal_atoms'].append(atom2.GetIdx())
                                        found_terminal = True
                                        terminal_double_bonded_carbons['returned_from'] = 'oxygen'

        if not found_terminal:
            for bond in mol.GetBonds():
                if bond.GetBondTypeAsDouble() == 2.0:  # Check if the bond is a double bond
                    atom1 = bond.GetBeginAtom()
                    atom2 = bond.GetEndAtom()
                    # Check if both atoms are carbon and at least one of them is terminal
                    if atom1.GetAtomicNum() == 6 and atom2.GetAtomicNum() == 6:
                        terminal_double_bonded_carbons['terminal_atoms'].append(atom1.GetIdx())
                        terminal_double_bonded_carbons['non_terminal_atoms'].append(atom2.GetIdx())
                        terminal_double_bonded_carbons['terminal_atoms'].append(atom2.GetIdx())
                        terminal_double_bonded_carbons['non_terminal_atoms'].append(atom1.GetIdx())
                        terminal_double_bonded_carbons['returned_from'] = 'carbon nonterminal'
    return terminal_double_bonded_carbons



unique_df = df1.drop_duplicates(subset='nucleophile SMILES')

results = []
for index, row in unique_df.iterrows():
    result = find_terminal_double_bonded_carbons(row['nucleophile SMILES'])
    if result:
        results.append(result)

# Display results
for result in results:
    #if not result['terminal_atoms']:
        print(f"SMILES: {result['smiles']}, terminal_atoms: {result['terminal_atoms']},non_terminal_atoms: {result['non_terminal_atoms']}")


SMILES: C=CC[Si](C)(C)C, terminal_atoms: [0],non_terminal_atoms: [1]
SMILES: C=C(OC)O[Si](C)(C)C(C)(C)C, terminal_atoms: [0],non_terminal_atoms: [1]
SMILES: C=C(OCc1ccccc1)O[Si](C)(C)C(C)(C)C, terminal_atoms: [0],non_terminal_atoms: [1]
SMILES: C=C(OC1CCCCC1)O[Si](C)(C)C(C)(C)C, terminal_atoms: [0],non_terminal_atoms: [1]
SMILES: C=C(O[Si](C)(C)C(C)(C)C)c1ccccc1, terminal_atoms: [0],non_terminal_atoms: [1]
SMILES: C=C(/C=C/c1ccccc1)O[Si](C)(C)C(C)(C)C, terminal_atoms: [0],non_terminal_atoms: [1]
SMILES: C=C/C=C(OC(C)C)\O[Si](C)(C)C(C)(C)C, terminal_atoms: [0],non_terminal_atoms: [1]
SMILES: C=C1C=C(O[Si](C)(C)C)OC(C)(C)O1, terminal_atoms: [0],non_terminal_atoms: [1]
SMILES: CC/C=C(C)/C(=O)/C=C/C(C)C, terminal_atoms: [2, 3, 7, 8],non_terminal_atoms: [3, 2, 8, 7]
SMILES: CC/C=C(C)/C(=O)/C=C/CCC, terminal_atoms: [2, 3, 7, 8],non_terminal_atoms: [3, 2, 8, 7]
SMILES: CC/C=C(C)/C(=O)/C=C/CC(C)C, terminal_atoms: [2, 3, 7, 8],non_terminal_atoms: [3, 2, 8, 7]
SMILES: CC/C=C(C)/C(=O)/C=C/C(CC)CC

In [9]:

from rdkit.Chem.rdmolfiles import  MolFromXYZFile
import itertools
import math

def calculate_distance(point1, point2):
    return math.sqrt((point1[0] - point2[0])**2 + (point1[1] - point2[1])**2 + (point1[2] - point2[2])**2)

def calculate_max_distance(coordinates):
    max_distance = 0.0
    
    # Use itertools to generate all combinations of points
    for pair in itertools.combinations(coordinates, 2):
        distance = calculate_distance(pair[0], pair[1])
        max_distance = max(max_distance, distance)
    
    return max_distance


def calculate_bounding_box_dimensions(coordinates):
    """    
    Input - list of cartesian coordinates
    This function  calculates the dimensions of a bounding box that encloses a set of 3D coordinates, based on cartesian coordinates, without accounting for different configurations
    Return Dimensions of bounding box (List)

    """    
    min_x, min_y, min_z = float('inf'), float('inf'), float('inf')
    max_x, max_y, max_z = float('-inf'), float('-inf'), float('-inf')

    for point in coordinates:
        x, y, z = point
        min_x = min(min_x, x)
        min_y = min(min_y, y)
        min_z = min(min_z, z)
        max_x = max(max_x, x)
        max_y = max(max_y, y)
        max_z = max(max_z, z)

    dimensions = [max_x - min_x, max_y - min_y, max_z - min_z]
    dimensions.sort(reverse=True)

    return dimensions

def get_coord(coord_filepath,id):

    """
    Input xyz file atom ID
    This `get_coord` function is designed to extract information from a molecular structure file in XYZ format.
    It than calculates max_distance, bounding_box_dimensions, displacement
    Return  max_distance (Float), bounding_box_dimensions (list),displace (Float)
    """

    mol = MolFromXYZFile(coord_filepath)
    positions = []
    mol = MolFromXYZFile(coord_filepath)
    if mol is not None:
        for i, atom in enumerate(mol.GetAtoms()):
                positions.append(mol.GetConformer().GetAtomPosition(i))
        max_distance = calculate_max_distance(positions)
        bounding_box_dimensions = calculate_bounding_box_dimensions(positions)
        displace = calculate_distance(mol.GetConformer().GetAtomPosition(id), [x / 2 for x in bounding_box_dimensions])
        return max_distance, bounding_box_dimensions,displace
    else:
        return None,None,None







# Add Catalyst Parameters to the Dataset

In [5]:
input_file2 = '../Data/catalyst_sterimol_data.csv'
df2 = pd.read_csv(input_file2)


result = pd.merge(df1 ,df2, left_on='3,3 Catalyst Substituent ', right_on='old_name', how='left')

result.to_csv("../Data/merged.csv")

# Parameter Aquisition Pipeline 

In [12]:
def run_dbstep(file_path, atom1, atom2):
    """
    Input .xyz file, identified atom IDs
    Function calculates all parameters used in this study: sterimol, %Vbur, bounding obx dimension max_distance, displacment and bounding box dimensions
    Return Steric Paramiters
    """
    mol = db.dbstep(file_path,atom1=atom1+1,atom2=atom2+1,commandline=True,sterimol=True,measure='classic')  #dbstep starts from 0 rdkit from 0
    mol2 = db.dbstep(file_path,atom2=atom2+1,commandline=True,volume=True,measure='classic')
    #Grab Sterimol Parameters
    L = mol.L
    Bmin = mol.Bmin
    Bmax = mol.Bmax
    bur_shell = mol2.bur_shell
    bur_vol = mol2.bur_vol
    
    max_distance, bounding_box_dimensions,disp = get_coord(file_path,atom2)
    if max_distance is not None:
        tot_V = bounding_box_dimensions[0]*bounding_box_dimensions[1]*bounding_box_dimensions[2]
        max_axis = np.max(bounding_box_dimensions)
        A2 = np.sort(bounding_box_dimensions)[-2]/max_axis
        min_axis = np.min(bounding_box_dimensions)/max_axis
        return L, Bmin,Bmax,bur_shell,bur_vol, max_distance, tot_V, disp,max_axis,A2,min_axis


def calculate_average(properties,mode='default',weights=[]):
    """"
    Input: Properties of interest (in our case those calculated in  run_dbstep), mode=(boltzman_max,boltzman_weighted,default) weights for boltzman_max,boltzman_weighted
    
    Function returns properties over confromers in 3 different modes boltzman_max - properites for conformer with min energy
    boltzman_weighted - as a weighted boltzman average of over conformers
    default - regular average over conformers 
    """
    num_conformers = len(properties)
    num_atoms = len(properties[0])  # Assuming all conformers have the same number of atoms
    num_properties = len(properties[0][0])  # Assuming all atoms have the same number of properties
    averages = [[0] * num_properties for _ in range(num_atoms)]  # Initialize averages for each property of each atom
    if mode=='default':
        for prop in properties:
            for atom_idx, atom_props in enumerate(prop):
                for prop_idx, val in enumerate(atom_props):
                    averages[atom_idx][prop_idx] += val / num_conformers
    elif mode=="boltzman_weighted":
        for prop, weight in zip(properties, weights):
            for atom_idx, atom_props in enumerate(prop):
                for prop_idx, val in enumerate(atom_props):
                    averages[atom_idx][prop_idx] += val * weight
        for atom_avg in averages:
            for prop_idx in range(num_properties):
                atom_avg[prop_idx] /= sum(weights)  # Normalize by the sum of weights
    elif mode=="boltzman_max":
        for prop, weight in zip(properties, weights):
            if weight==max(weights):
                for atom_idx, atom_props in enumerate(prop):
                    for prop_idx, val in enumerate(atom_props):
                        averages[atom_idx][prop_idx] = val 
    
                
    return averages


# Lowest energy conformer method

In [19]:


unique_df = df1.drop_duplicates(subset='input electrophile SMILES')

# Find Sterimol parameters for each unique SMILES
results = []
for index, row in unique_df.iterrows():
    properties = []
    c_o_info = find_c_o_atom_ids(row['input electrophile SMILES'])
    if c_o_info:
        smiles = c_o_info['smiles']
        oxygen_atom_id = c_o_info['oxygen_atom_ids']
        carbon_atom_id = c_o_info['carbon_atom_ids']

        # Create .xyz file
        output_folder = '../Data/exyz_files_conf_sample'
        os.makedirs(output_folder, exist_ok=True)
        conformers, weights = smiles_to_xyz(smiles, output_folder,'conformer_energy')
        for conf in conformers:
            
            conformer_properties = []
            for i in range(len(oxygen_atom_id)):
                print(oxygen_atom_id[i])
                #L, Bmin,Bmax,bur_shell,bur_vol, max_distance, tot_V, disp,max_axis,A2,min_axis
                props  = run_dbstep(conf, oxygen_atom_id[i], carbon_atom_id[i])
                if props is not None:
                    conformer_properties.append(props)
            if props is not None:
                properties.append(conformer_properties)
            

        
        average_properties_array = calculate_average(properties,mode="boltzman_max",weights=weights)[0]
        #average_properties_array = np.array(average_properties)

       
        results.append({'smiles': smiles, 'L': average_properties_array[0], 'Bmin': average_properties_array[1], 
        'Bmax': average_properties_array[2], 'bur_shell': average_properties_array[3], 'bur_vol': average_properties_array[4], 
        'max_distance': average_properties_array[5], 'tot_V': average_properties_array[6], 'disp': average_properties_array[7],
         'max_axis': average_properties_array[8], 'A2': average_properties_array[9], 'min_axis': average_properties_array[10]})


# Display Sterimol results
results_df = pd.DataFrame(results)

# Save the results to a CSV file
results_csv_file = '../Data/boltz_max_sterimol_results.csv'
results_df.to_csv(results_csv_file, index=False)

0
   exyz_files_conf_sample/O=Cc2ccc1ccccc1c2_0conf.xyz / Bmin:  1.70 / Bmax:  6.85 / L:  8.50
      R/Å     %V_Bur     %S_Bur
     3.50      26.32       0.00
0
   exyz_files_conf_sample/O=Cc2ccc1ccccc1c2_1conf.xyz / Bmin:  1.70 / Bmax:  8.16 / L:  6.44
      R/Å     %V_Bur     %S_Bur
     3.50      26.32       0.00
0
   exyz_files_conf_sample/O=Cc2ccc1ccccc1c2_2conf.xyz / Bmin:  1.70 / Bmax:  8.16 / L:  6.44
      R/Å     %V_Bur     %S_Bur
     3.50      26.32       0.00
0
   exyz_files_conf_sample/O=Cc2ccc1ccccc1c2_3conf.xyz / Bmin:  1.70 / Bmax:  8.16 / L:  6.44
      R/Å     %V_Bur     %S_Bur
     3.50      26.32       0.00
0
   exyz_files_conf_sample/O=Cc2ccc1ccccc1c2_4conf.xyz / Bmin:  1.70 / Bmax:  6.85 / L:  8.50
      R/Å     %V_Bur     %S_Bur
     3.50      26.32       0.00
0
   exyz_files_conf_sample/O=Cc2ccc1ccccc1c2_5conf.xyz / Bmin:  1.70 / Bmax:  8.16 / L:  6.44
      R/Å     %V_Bur     %S_Bur
     3.50      26.32       0.00
0
   exyz_files_conf_sample/O=Cc2ccc1ccccc1c2_

In [20]:

unique_df = df1.drop_duplicates(subset='nucleophile SMILES')

# Find Sterimol parameters for each unique SMILES
results = []
for index, row in unique_df.iterrows():
    properties = []
    terminal_double_bonded_carbons = find_terminal_double_bonded_carbons(row['nucleophile SMILES'])
    if terminal_double_bonded_carbons:
        smiles = terminal_double_bonded_carbons['smiles']
        terminal_atom_ids = terminal_double_bonded_carbons['terminal_atoms']
        non_terminal_atoms = terminal_double_bonded_carbons['non_terminal_atoms']
        # Create .xyz file
        output_folder = '../Data/nxyz_files_conf_sample'
        os.makedirs(output_folder, exist_ok=True)
        #output_file_path = os.path.join(output_folder, f'{smiles.replace("/", "_")}.xyz')
        conformers, weights = smiles_to_xyz(smiles, output_folder,'conformer_energy')
        for conf in conformers:
            
            conformer_properties = []
            for i in range(len(terminal_atom_ids)):
                print(terminal_atom_ids[i])
                #L, Bmin,Bmax,bur_shell,bur_vol, max_distance, tot_V, disp,max_axis,A2,min_axis
                props  = run_dbstep(conf, terminal_atom_ids[i], non_terminal_atoms[i])
                if props is not None:
                    conformer_properties.append(props)
            if props is not None:
                properties.append(conformer_properties)
            
        average_properties = calculate_average(properties,mode="boltzman_max",weights=weights)
        average_properties_array = np.array(average_properties)

        max_L_index = np.argmax(average_properties_array[:, 0])

        
        filtered_properties =  average_properties[max_L_index]#[prop for prop in properties if prop[0] == max_L]
        results.append({'smiles': smiles, 'L': filtered_properties[0], 'Bmin': filtered_properties[1], 
        'Bmax': filtered_properties[2], 'bur_shell': filtered_properties[3], 'bur_vol': filtered_properties[4], 
        'max_distance': filtered_properties[5], 'tot_V': filtered_properties[6], 'disp': filtered_properties[7],
        'max_axis': filtered_properties[8], 'A2': filtered_properties[9], 'min_axis': filtered_properties[10]})

# Display Sterimol results
results_df = pd.DataFrame(results)

# Save the results to a CSV file
results_csv_file = '../Data/n_boltz_max_sterimol_results.csv'
results_df.to_csv(results_csv_file, index=False)

0
   nxyz_files_conf_sample/C=CC[Si](C)(C)C_0conf.xyz / Bmin:  1.70 / Bmax:  5.44 / L:  6.17
      R/Å     %V_Bur     %S_Bur
     3.50      29.09       0.00
0
   nxyz_files_conf_sample/C=CC[Si](C)(C)C_1conf.xyz / Bmin:  1.70 / Bmax:  6.21 / L:  4.96
      R/Å     %V_Bur     %S_Bur
     3.50      33.65       0.00
0
   nxyz_files_conf_sample/C=CC[Si](C)(C)C_2conf.xyz / Bmin:  1.70 / Bmax:  5.44 / L:  6.17
      R/Å     %V_Bur     %S_Bur
     3.50      29.09       0.00
0
   nxyz_files_conf_sample/C=CC[Si](C)(C)C_3conf.xyz / Bmin:  1.70 / Bmax:  5.44 / L:  6.17
      R/Å     %V_Bur     %S_Bur
     3.50      29.09       0.00
0
   nxyz_files_conf_sample/C=CC[Si](C)(C)C_4conf.xyz / Bmin:  1.70 / Bmax:  5.44 / L:  6.17
      R/Å     %V_Bur     %S_Bur
     3.50      29.09       0.00
0
   nxyz_files_conf_sample/C=CC[Si](C)(C)C_5conf.xyz / Bmin:  1.70 / Bmax:  5.44 / L:  6.17
      R/Å     %V_Bur     %S_Bur
     3.50      29.09       0.00
0
   nxyz_files_conf_sample/C=CC[Si](C)(C)C_6conf.xyz / Bm

In [6]:

results_csv_file = '../Data/boltz_max_sterimol_results.csv'
sterimol_df = pd.read_csv(results_csv_file) #load electrophile data
sterimol_df["input electrophile SMILES"]=  sterimol_df["smiles"] 
# Load additional information from the second CSV file
nresults_csv_file = '../Data/n_boltz_max_sterimol_results.csv'
nsterimol_df = pd.read_csv(nresults_csv_file)  #load nnucleophile data
nsterimol_df["nucleophile SMILES"]=  nsterimol_df["smiles"]


additional_info_csv_file = '../Data/merged.csv'
additional_info_df = pd.read_csv(additional_info_csv_file)
print(len(np.unique(additional_info_df["nucleophile SMILES"])))

# Merge DataFrames based on the 'SMILES' column
merged_df = pd.merge( additional_info_df, sterimol_df, on='input electrophile SMILES', how='left')
merged_df = pd.merge( merged_df, nsterimol_df, on='nucleophile SMILES', how='left',suffixes=('_e', '_n'))
# Save the merged DataFrame to a new CSV file
merged_csv_file = '../Data/merged_max.csv'
merged_df.to_csv(merged_csv_file, index=False)

print(f"Merged results saved to {merged_csv_file}")

49
Merged results saved to ../Data/merged_max.csv


# Boltzman Average conformer method

In [15]:
unique_df = df1.drop_duplicates(subset='nucleophile SMILES')

# Find Sterimol parameters for each unique SMILES
results = []
for index, row in unique_df.iterrows():
    properties = []
    terminal_double_bonded_carbons = find_terminal_double_bonded_carbons(row['nucleophile SMILES'])
    if terminal_double_bonded_carbons:
        smiles = terminal_double_bonded_carbons['smiles']
        terminal_atom_ids = terminal_double_bonded_carbons['terminal_atoms']
        non_terminal_atoms = terminal_double_bonded_carbons['non_terminal_atoms']
        # Create .xyz file
        output_folder = '../Data/nxyz_files_conf_sample'
        os.makedirs(output_folder, exist_ok=True)
        #output_file_path = os.path.join(output_folder, f'{smiles.replace("/", "_")}.xyz')
        conformers, weights = smiles_to_xyz(smiles, output_folder,'conformer_energy')
        for conf in conformers:
            
            conformer_properties = []
            for i in range(len(terminal_atom_ids)):
                print(terminal_atom_ids[i])
                #L, Bmin,Bmax,bur_shell,bur_vol, max_distance, tot_V, disp,max_axis,A2,min_axis
                props  = run_dbstep(conf, terminal_atom_ids[i], non_terminal_atoms[i])
                if props is not None:
                    conformer_properties.append(props)
            if props is not None:
                properties.append(conformer_properties)
            
        average_properties = calculate_average(properties,mode="boltzman_weighted",weights=weights)
        average_properties_array = np.array(average_properties)

        max_L_index = np.argmax(average_properties_array[:, 0])

        
        filtered_properties =  average_properties[max_L_index]#[prop for prop in properties if prop[0] == max_L]
        print(filtered_properties )
        results.append({'smiles': smiles, 'L': filtered_properties[0], 'Bmin': filtered_properties[1], 
        'Bmax': filtered_properties[2], 'bur_shell': filtered_properties[3], 'bur_vol': filtered_properties[4], 
        'max_distance': filtered_properties[5], 'tot_V': filtered_properties[6], 'disp': filtered_properties[7],
         'max_axis': filtered_properties[8], 'A2': filtered_properties[9], 'min_axis': filtered_properties[10]})

# Display Sterimol results
results_df = pd.DataFrame(results)

# Save the results to a CSV file
results_csv_file = '../Data/n_boltz_weighted_sterimol_results.csv'
results_df.to_csv(results_csv_file, index=False)

0
   nxyz_files_conf_sample/C=CC[Si](C)(C)C_0conf.xyz / Bmin:  1.70 / Bmax:  5.44 / L:  6.17
      R/Å     %V_Bur     %S_Bur
     3.50      29.09       0.00
0
   nxyz_files_conf_sample/C=CC[Si](C)(C)C_1conf.xyz / Bmin:  1.70 / Bmax:  5.44 / L:  6.17
      R/Å     %V_Bur     %S_Bur
     3.50      29.09       0.00
0
   nxyz_files_conf_sample/C=CC[Si](C)(C)C_2conf.xyz / Bmin:  1.70 / Bmax:  6.21 / L:  4.96
      R/Å     %V_Bur     %S_Bur
     3.50      33.65       0.00
0
   nxyz_files_conf_sample/C=CC[Si](C)(C)C_3conf.xyz / Bmin:  1.70 / Bmax:  6.21 / L:  4.96
      R/Å     %V_Bur     %S_Bur
     3.50      33.65       0.00
0
   nxyz_files_conf_sample/C=CC[Si](C)(C)C_4conf.xyz / Bmin:  1.70 / Bmax:  6.21 / L:  4.96
      R/Å     %V_Bur     %S_Bur
     3.50      33.65       0.00
0
   nxyz_files_conf_sample/C=CC[Si](C)(C)C_5conf.xyz / Bmin:  1.70 / Bmax:  5.44 / L:  6.17
      R/Å     %V_Bur     %S_Bur
     3.50      29.09       0.00
0
   nxyz_files_conf_sample/C=CC[Si](C)(C)C_6conf.xyz / Bm

In [16]:

unique_df = df1.drop_duplicates(subset='input electrophile SMILES')

# Find Sterimol parameters for each unique SMILES
results = []
for index, row in unique_df.iterrows():
    properties = []
    c_o_info = find_c_o_atom_ids(row['input electrophile SMILES'])
    if c_o_info:
        smiles = c_o_info['smiles']
        oxygen_atom_id = c_o_info['oxygen_atom_ids']
        carbon_atom_id = c_o_info['carbon_atom_ids']

        # Create .xyz file
        output_folder = '../Data/exyz_files_conf_sample'
        os.makedirs(output_folder, exist_ok=True)
        conformers, weights = smiles_to_xyz(smiles, output_folder,'conformer_energy')
        for conf in conformers:
            
            conformer_properties = []
            for i in range(len(oxygen_atom_id)):
                print(oxygen_atom_id[i])
                #L, Bmin,Bmax,bur_shell,bur_vol, max_distance, tot_V, disp,max_axis,A2,min_axis
                props  = run_dbstep(conf, oxygen_atom_id[i], carbon_atom_id[i])
                if props is not None:
                    conformer_properties.append(props)
            if props is not None:
                properties.append(conformer_properties)
            

        
        average_properties_array = calculate_average(properties,mode="boltzman_weighted",weights=weights)[0]
        #average_properties_array = np.array(average_properties)

       
        results.append({'smiles': smiles, 'L': average_properties_array[0], 'Bmin': average_properties_array[1], 
        'Bmax': average_properties_array[2], 'bur_shell': average_properties_array[3], 'bur_vol': average_properties_array[4], 
        'max_distance': average_properties_array[5], 'tot_V': average_properties_array[6], 'disp': average_properties_array[7],
         'max_axis': average_properties_array[8], 'A2': average_properties_array[9], 'min_axis': average_properties_array[10]})


# Display Sterimol results
results_df = pd.DataFrame(results)

# Save the results to a CSV file
results_csv_file = '../Data/boltz_weighted_sterimol_results.csv'
results_df.to_csv(results_csv_file, index=False)

0
   exyz_files_conf_sample/O=Cc2ccc1ccccc1c2_0conf.xyz / Bmin:  1.70 / Bmax:  8.16 / L:  6.44
      R/Å     %V_Bur     %S_Bur
     3.50      26.32       0.00
0
   exyz_files_conf_sample/O=Cc2ccc1ccccc1c2_1conf.xyz / Bmin:  1.70 / Bmax:  6.85 / L:  8.50
      R/Å     %V_Bur     %S_Bur
     3.50      26.32       0.00
0
   exyz_files_conf_sample/O=Cc2ccc1ccccc1c2_2conf.xyz / Bmin:  1.70 / Bmax:  6.85 / L:  8.50
      R/Å     %V_Bur     %S_Bur
     3.50      26.32       0.00
0
   exyz_files_conf_sample/O=Cc2ccc1ccccc1c2_3conf.xyz / Bmin:  1.70 / Bmax:  6.85 / L:  8.50
      R/Å     %V_Bur     %S_Bur
     3.50      26.32       0.00
0
   exyz_files_conf_sample/O=Cc2ccc1ccccc1c2_4conf.xyz / Bmin:  1.70 / Bmax:  8.16 / L:  6.44
      R/Å     %V_Bur     %S_Bur
     3.50      26.32       0.00
0
   exyz_files_conf_sample/O=Cc2ccc1ccccc1c2_5conf.xyz / Bmin:  1.70 / Bmax:  8.16 / L:  6.44
      R/Å     %V_Bur     %S_Bur
     3.50      26.32       0.00
0
   exyz_files_conf_sample/O=Cc2ccc1ccccc1c2_

In [8]:

results_csv_file = '../Data/boltz_weighted_sterimol_results.csv'
sterimol_df = pd.read_csv(results_csv_file) #load electrophile data
sterimol_df["input electrophile SMILES"]=  sterimol_df["smiles"]
# Load additional information from the second CSV file
nresults_csv_file = '../Data/n_boltz_weighted_sterimol_results.csv'
nsterimol_df = pd.read_csv(nresults_csv_file)  #load nnucleophile data
nsterimol_df["nucleophile SMILES"]=  nsterimol_df["smiles"]


additional_info_csv_file = '../Data/merged.csv'
additional_info_df = pd.read_csv(additional_info_csv_file)

# Merge DataFrames based on the 'SMILES' column
merged_df = pd.merge( additional_info_df, sterimol_df, on='input electrophile SMILES', how='left')
merged_df = pd.merge( merged_df, nsterimol_df, on='nucleophile SMILES', how='left',suffixes=('_e', '_n'))
# Save the merged DataFrame to a new CSV file
merged_csv_file = '../Data/merged_weighted.csv'
merged_df.to_csv(merged_csv_file, index=False)

print(f"Merged results saved to {merged_csv_file}")

Merged results saved to ../Data/merged_weighted.csv


# Default conformer Method

In [26]:
unique_df = df1.drop_duplicates(subset='input electrophile SMILES')

# Find Sterimol parameters for each unique SMILES
results = []
for index, row in unique_df.iterrows():
    properties = []
    c_o_info = find_c_o_atom_ids(row['input electrophile SMILES'])
    if c_o_info:
        smiles = c_o_info['smiles']
        oxygen_atom_id = c_o_info['oxygen_atom_ids']
        carbon_atom_id = c_o_info['carbon_atom_ids']

        # Create .xyz file
        output_folder = '../Data/exyz_file'
        os.makedirs(output_folder, exist_ok=True)
        conf = smiles_to_xyz(smiles, output_folder,'basic')
        for i in range(len(oxygen_atom_id)):
            print(oxygen_atom_id[i])
                #L, Bmin,Bmax,bur_shell,bur_vol, max_distance, tot_V, disp,max_axis,A2,min_axis
            conformer_properties  = run_dbstep(conf, oxygen_atom_id[i], carbon_atom_id[i])
            

       
        results.append({'smiles': smiles, 'L': conformer_properties[0], 'Bmin': conformer_properties[1], 
        'Bmax': conformer_properties[2], 'bur_shell': conformer_properties[3], 'bur_vol': conformer_properties[4], 
        'max_distance': conformer_properties[5], 'tot_V': conformer_properties[6], 'disp': conformer_properties[7],
         'max_axis': conformer_properties[8], 'A2': conformer_properties[9], 'min_axis': conformer_properties[10]})


# Display Sterimol results
results_df = pd.DataFrame(results)

# Save the results to a CSV file
results_csv_file = 'basic_sterimol_results.csv'
results_df.to_csv(results_csv_file, index=False)

0
   exyz_files_conf_sample/O=Cc2ccc1ccccc1c2.xyz / Bmin:  1.70 / Bmax:  8.17 / L:  6.24
      R/Å     %V_Bur     %S_Bur
     3.50      26.27       0.00
9
   exyz_files_conf_sample/COc2ccc1cc(C=O)ccc1c2.xyz / Bmin:  1.70 / Bmax: 10.42 / L:  6.16
      R/Å     %V_Bur     %S_Bur
     3.50      28.44       0.00
0
   exyz_files_conf_sample/O=Cc2ccc1cc(Br)ccc1c2.xyz / Bmin:  1.70 / Bmax:  9.66 / L:  6.14
      R/Å     %V_Bur     %S_Bur
     3.50      26.48       0.00
0
   exyz_files_conf_sample/O=Cc1ccccc1.xyz / Bmin:  1.70 / Bmax:  5.65 / L:  6.37
      R/Å     %V_Bur     %S_Bur
     3.50      26.07       0.00
0
   exyz_files_conf_sample/O=Cc1ccccc1F.xyz / Bmin:  1.70 / Bmax:  5.71 / L:  6.32
      R/Å     %V_Bur     %S_Bur
     3.50      28.61       0.00
8
   exyz_files_conf_sample/Cc1ccccc1C=O.xyz / Bmin:  1.70 / Bmax:  5.66 / L:  6.25
      R/Å     %V_Bur     %S_Bur
     3.50      37.35       0.00
7
   exyz_files_conf_sample/Cc1cccc(C=O)c1.xyz / Bmin:  1.70 / Bmax:  6.83 / L:  6.34
    

In [25]:
unique_df = df1.drop_duplicates(subset='nucleophile SMILES')

# Find Sterimol parameters for each unique SMILES
results = []
for index, row in unique_df.iterrows():
    properties = []
    terminal_double_bonded_carbons = find_terminal_double_bonded_carbons(row['nucleophile SMILES'])
    if terminal_double_bonded_carbons:
        smiles = terminal_double_bonded_carbons['smiles']
        terminal_atom_ids = terminal_double_bonded_carbons['terminal_atoms']
        non_terminal_atoms = terminal_double_bonded_carbons['non_terminal_atoms']
        # Create .xyz file
        output_folder = '../Data/nxyz_files'
        os.makedirs(output_folder, exist_ok=True)
        #output_file_path = os.path.join(output_folder, f'{smiles.replace("/", "_")}.xyz')
        conf = smiles_to_xyz(smiles, output_folder,'basic')
        
            
        conformer_properties = []
        for i in range(len(terminal_atom_ids)):
            print(terminal_atom_ids[i])
                #L, Bmin,Bmax,bur_shell,bur_vol, max_distance, tot_V, disp,max_axis,A2,min_axis
            props  = run_dbstep(conf, terminal_atom_ids[i], non_terminal_atoms[i])
            if props is not None:
                conformer_properties.append(props)

        average_properties_array = np.array(conformer_properties)

        max_L_index = np.argmax(average_properties_array[:, 0])

        
        filtered_properties =  conformer_properties[max_L_index]#[prop for prop in properties if prop[0] == max_L]
        print(filtered_properties )
        results.append({'smiles': smiles, 'L': filtered_properties[0], 'Bmin': filtered_properties[1], 
        'Bmax': filtered_properties[2], 'bur_shell': filtered_properties[3], 'bur_vol': filtered_properties[4], 
        'max_distance': filtered_properties[5], 'tot_V': filtered_properties[6], 'disp': filtered_properties[7],
         'max_axis': filtered_properties[8], 'A2': filtered_properties[9], 'min_axis': filtered_properties[10]})

# Display Sterimol results
results_df = pd.DataFrame(results)

# Save the results to a CSV file
results_csv_file = '../Data/n_basic_sterimol_results.csv'
results_df.to_csv(results_csv_file, index=False)

0
   nxyz_files/C=CC[Si](C)(C)C.xyz / Bmin:  1.70 / Bmax:  5.67 / L:  6.61
      R/Å     %V_Bur     %S_Bur
     3.50      29.87       0.00
(6.61280843, 1.7, 5.674361181787232, 0.0, 29.87146271820041, 6.990867154258461, 127.74720455310258, 3.829167174786352, 6.8668205784888805, 0.685035546402, 0.5759319846448588)
0
   nxyz_files/C=C(OC)O[Si](C)(C)C(C)(C)C.xyz / Bmin:  1.70 / Bmax:  6.19 / L:  7.36
      R/Å     %V_Bur     %S_Bur
     3.50      38.77       0.00
(7.36371876, 1.7, 6.191926025981468, 0.0, 38.76955960217641, 8.497158235703777, 217.3873928309288, 4.752916893179425, 8.447058309515983, 0.6027463376540259, 0.5983885811707568)
0
   nxyz_files/C=C(OCc1ccccc1)O[Si](C)(C)C(C)(C)C.xyz / Bmin:  2.64 / Bmax:  7.80 / L:  5.69
      R/Å     %V_Bur     %S_Bur
     3.50      42.69       0.00
(5.69083997, 2.6396692615506563, 7.79745767447763, 0.0, 42.694696913785684, 13.202731379948649, 373.88185694713394, 7.445038499445073, 13.186831969859647, 0.4220527424611285, 0.38631915941278727)
0
   

In [7]:

results_csv_file = '../Data/basic_sterimol_results.csv'
sterimol_df = pd.read_csv(results_csv_file) #load electrophile data
sterimol_df["input electrophile SMILES"]=  sterimol_df["smiles"]
# Load additional information from the second CSV file
nresults_csv_file = '../Data/n_basic_sterimol_results.csv'
nsterimol_df = pd.read_csv(nresults_csv_file) #load nucleophile data
nsterimol_df["nucleophile SMILES"]=  nsterimol_df["smiles"]


additional_info_csv_file = '../Data/merged.csv'
additional_info_df = pd.read_csv(additional_info_csv_file)

# Merge DataFrames based on the 'SMILES' column
merged_df = pd.merge( additional_info_df, sterimol_df, on='input electrophile SMILES', how='left')
merged_df = pd.merge( merged_df, nsterimol_df, on='nucleophile SMILES', how='left',suffixes=('_e', '_n'))
# Save the merged DataFrame to a new CSV file
merged_csv_file = '../Data/merged_default.csv'
merged_df.to_csv(merged_csv_file, index=False)

print(f"Merged results saved to {merged_csv_file}")

Merged results saved to ../Data/merged_default.csv
