In [None]:
# Run the cells in the order provided
import sys
import os
from rdkit import Chem
import random
import sys
import pandas as pd
import numpy as np
import progressbar
from IPython.display import display
import matplotlib as mpl
from matplotlib import pyplot as plt
import pylab as pl
import numpy as np
from matplotlib_venn import venn3, venn3_circles, venn3_unweighted
import plotly.graph_objects as go
import dash
import dash_table
import dash_html_components as html
import re
from urllib.request import urlopen
import pubchempy

# Class to obtain information from SMILES

In [114]:
class PAH:
    def __init__(self, smile='',inchiKey=''):
        self._smile=smile
        self._numCl=smile.count("Cl")
        self._numBr=smile.count("Br")
        self._numF=smile.count("F")
        self._numC=smile.count("C") - self._numCl 
        self._numH=smile.count("H")
        self._numO=smile.count("O")
        self._numN=smile.count("N")
        self._numS=smile.count("S")
        self._inchiKey=inchiKey
        self.positions=[]
        
    def split(self,word):
        return [char for char in word]
    

    def permutation(self,nCl=0,nF=0,nBr=0):
        elementsList=[]
        numHalo=nCl+nF+nBr
        for i in range(0,self._numH-numHalo): elementsList.append('H')
        if nCl > 0:
            for x in range(0,nCl): elementsList.append('C')
        if nF > 0:
            for x in range(0,nF): elementsList.append('F')
        if nBr > 0:
            for x in range(0,nBr): elementsList.append('B')
        perms = [[]]
        for n in elementsList:
            new_perm = []
            for perm in perms:
                for i in range(len(perm) + 1):
                    new_perm.append(perm[:i] + [n] + perm[i:])
                    # handle duplication
                    if i < len(perm) and perm[i] == n: break
            perms = new_perm
        return perms

    def duplicates(self,seq,item):
        start_at = -1
        locs = []
        while True:
            try:
                loc = seq.index(item,start_at+1)
            except ValueError:
                break
            else:
                locs.append(loc)
                start_at = loc
        return locs
 
    def replaceSmileFromPermutation(self,positions_1=[],positions_2=[],positions_3=[],maxH=0, halo_1="",halo_2="", halo_3="",smile="",cont=0):
        cont=0
        smile=self.split(smile)
        for index in range(0,len(smile)):
                if smile[index] != 'H':
                    continue
                else:
                    cont+=1

                    
                    if positions_1:
                        if (cont-1) in positions_1:
                            smile[index]=halo_1
                    if positions_2:
                        if (cont-1) in positions_2:
                            smile[index]=halo_2
                    if positions_3:
                        if (cont-1) in positions_3:
                            smile[index]=halo_3
                    
                        
        smile="".join(smile)
        return smile

    def generateGroupOfOne(self,group=""):
        numH=self._numH
        allPosibleSmiles=[]
        elementToLook=''
        halo=""
        smile=self._smile

        for h in range(1,numH+1):
            if group=="Cl":
                perms=self.permutation(nCl=h)
                elementToLook='C'
                halo="Cl"
            if group=="F":
                perms=self.permutation(nF=h)
                elementToLook='F'
                halo="F"
            if group=="Br":
                perms=self.permutation(nBr=h)
                elementToLook='B'
                halo="Br"
            
            for p in perms:
                    smile=self._smile
                    elementInPerms=''.join(p)
                    positions=self.duplicates(elementInPerms,elementToLook)
                    smile=self.replaceSmileFromPermutation(positions_1=positions,maxH=numH,halo_1=halo,smile=smile)
                    allPosibleSmiles.append(smile)
                    
        return allPosibleSmiles

    def generateGroupOfTwo(self,group=""):
        numH=self._numH
        allPosibleSmiles=[]
        elementToLook_1=''
        elementToLook_2=''
        halo_1=""
        halo_2=""
        smile=self._smile
        
        for h1 in range(1,numH+2):
            
            for h2 in range(1,(numH+1)-h1):
                if group =="ClBr" or group =="BrCl":
                    perms=self.permutation(nCl=h1,nBr=h2)
                    elementToLook_1='C'
                    elementToLook_2='B'
                    halo_1="Cl" 
                    halo_2="Br"
                if group =="ClF" or group =="FCl":
                    perms=self.permutation(nCl=h1,nF=h2)
                    elementToLook_1='C'
                    elementToLook_2='F'
                    halo_1="Cl" 
                    halo_2="F"
                if group == "FBr" or group == "BrF":
                    perms=self.permutation(nBr=h1,nF=h2)
                    elementToLook_1='B'
                    elementToLook_2='F'
                    halo_1="Br" 
                    halo_2="F"

                for p in perms:
                        smile=self._smile
                        elementInPerms=''.join(p)
                        positions_1=self.duplicates(elementInPerms,elementToLook_1)
                        positions_2=self.duplicates(elementInPerms,elementToLook_2)

                        smile=self.replaceSmileFromPermutation(positions_1=positions_1,positions_2=positions_2,maxH=numH,halo_1=halo_1,halo_2=halo_2,smile=smile)
                        allPosibleSmiles.append(smile)
                    
        return allPosibleSmiles
    
    def generateGroupOfThree(self,group=""):
        numH=self._numH
        allPosibleSmiles=[]
        elementToLook_1=''
        elementToLook_2=''
        elementToLook_3=''
        halo_1=""
        halo_2=""
        halo_3=""
        
        smile=self._smile
        
        for h1 in range(1,numH+2):
            
            for h2 in range(1,(numH+1)-h1):
                
                for h3 in range(1,(numH+1)-(h1+h2)):
            
                    perms=self.permutation(nCl=h1,nBr=h2,nF=h3)
                    elementToLook_1='C'
                    elementToLook_2='B'
                    elementToLook_3='F'
                    halo_1="Cl" 
                    halo_2="Br"
                    halo_3="F"

                    for p in perms:
                            smile=self._smile
                            elementInPerms=''.join(p)
                            positions_1=self.duplicates(elementInPerms,elementToLook_1)
                            positions_2=self.duplicates(elementInPerms,elementToLook_2)
                            positions_3=self.duplicates(elementInPerms,elementToLook_3)


                            smile=self.replaceSmileFromPermutation(positions_1=positions_1,positions_2=positions_2,positions_3=positions_3,maxH=numH,halo_1=halo_1,halo_2=halo_2,halo_3=halo_3,smile=smile)

                            allPosibleSmiles.append(smile)
                    
        return allPosibleSmiles
        
 
    def generateAllPosible(self, group=""):
        allPosibleSmiles=[]
        
        if group =="Cl" :
            allPosibleSmiles=self.generateGroupOfOne(group)
        elif group =="Br":
            allPosibleSmiles=self.generateGroupOfOne(group)
        elif group =="F":
            allPosibleSmiles=self.generateGroupOfOne(group)
        elif group =="ClBr" or group =="BrCl":
            allPosibleSmiles=self.generateGroupOfTwo(group)
        elif group =="ClF" or group =="FCl":
            allPosibleSmiles=self.generateGroupOfTwo(group)
        elif group == "FBr" or group == "BrF":
            allPosibleSmiles=self.generateGroupOfTwo(group)
        elif group == "BrClF":
            allPosibleSmiles=self.generateGroupOfThree(group)
        else:
            print("Group not recognized")
            return

        return allPosibleSmiles
    

    def generateAllPosible_AllGroups(self):
        all_Cl=self.generateAllPosible("Cl")
        all_Br=self.generateAllPosible("Br")
        all_F=self.generateAllPosible("F")
        all_FBr=self.generateAllPosible("BrF")
        all_BrCl=self.generateAllPosible("BrCl")
        all_ClF=self.generateAllPosible("ClF")
        all_BrClF=self.generateAllPosible("BrClF")
        
        all_Posible=all_Cl+all_Br+all_F+ all_FBr+all_BrCl+all_ClF+ all_BrClF
        
        return all_Posible

    
    def replaceMultiple(self, mainString, toBeReplaces, newString):
        for elem in toBeReplaces :
            if elem in mainString :
                mainString = mainString.replace(elem, newString)
        
        return  mainString

    
    def convertSMILE_to_CX(self):
        smileStr=self._smile
        newSmile=None
       

        if smileStr is None or pd.isna(smileStr):
            total_X=self._numH+self._numCl+self._numF+self._numBr
            mult=int((total_X - 2)/2)
            str_carbon= "C("*self._numC
            str_X="X)(X)"*mult
            newSmile=str_carbon+str_X+"(X)X"

        else:
            newSmile=self.replaceMultiple(smileStr,["Cl","H","Br","F"],"X")

        return newSmile  
    
    def convertSMILE_CH(self):
        smileStr=self.convertSMILE_to_CX()
        newSmile= smileStr.replace("X","H")
        
        return newSmile
    
    def generate(self,smile,num,halogen):
        smileStr=list(smile)
        
        if num > len(smileStr):
            smileStr=[halogen for i in smileStr]
        else:
            i=0
            while i in range(num):
                position=random.randrange(0,len(smileStr))
                if smileStr[position] == "H":
                    while smileStr[position]==halogen:
                           position=random.randrange(0,len(smileStr))
                    smileStr[position]=halogen
                    self.positions.append(position)
                    i=i+1
                   
        smileStr="".join(smileStr)
        return smileStr
    def add_IndividualList(self,smile,group,numCl,numF,numBr,numH):
        individualList=[]
        individualList.append(smile)
        individualList.append(group)
        individualList.append(numCl)
        individualList.append(numF)
        individualList.append(numBr)
        individualList.append(self._numC)
        individualList.append(numH)
        
        return individualList
    
    def replaceH_Halo(self):
        all_Possible=[]
        individualList=[]
        
        numH=self._numH
        smile=""
        
        for x in range(1,numH+1):
            individualList=self.add_IndividualList(self.generate(self._smile,x,"Cl"),"Cl",x,0,0,self._numH-x)
            all_Possible.append(individualList)
            
            individualList=self.add_IndividualList(self.generate(self._smile,x,"F"),"F",0,x,0,self._numH-x)
            all_Possible.append(individualList)
            
            individualList=self.add_IndividualList(self.generate(self._smile,x,"Br"),"Br",0,0,x,self._numH-x)
            all_Possible.append(individualList)
        
        for h1 in range(1,numH+2):
            
            for h2 in range(1,(numH+1)-h1):
                smile=self.generate(self._smile,h1,"Cl")
                smile=self.generate(smile,h2,"F")
                individualList=self.add_IndividualList(smile,"ClF",h1,h2,0,self._numH-(h1+h2))
                all_Possible.append(individualList)
                
                
                smile=self.generate(self._smile,h1,"Cl")
                smile=self.generate(smile,h2,"Br")
                individualList=self.add_IndividualList(smile,"BrCl",h1,0,h2,self._numH-(h1+h2))
                all_Possible.append(individualList)
                
                smile=self.generate(self._smile,h1,"F")
                smile=self.generate(smile,h2,"Br")
                individualList=self.add_IndividualList(smile,"BrF",0,h1,h2,self._numH-(h1+h2))
                all_Possible.append(individualList)
        
        smile=""
        for h1 in range(1,numH+2):
            
            for h2 in range(1,(numH+1)-h1):
                
                for h3 in range(1,(numH+1)-(h1+h2)):
                    smile=self.generate(self._smile,h1,"Cl")
                    smile=self.generate(smile,h2,"F")
                    smile=self.generate(smile,h3,"Br")
                    individualList=self.add_IndividualList(smile,"BrClF",h1,h2,h3,self._numH-(h1+h2+h3))
                    all_Possible.append(individualList)
                    
        return all_Possible

    def replaceX_Halo_Hydro(self,smile,num,halogen):
        smileStr=list(smile)
        
        if num > len(smileStr):
            smileStr=[halogen for i in smileStr]
        else:
            i=0
            while i in range(num):
                position=random.randrange(0,len(smileStr))
                if smileStr[position] == "X":
                    while smileStr[position]==halogen:
                           position=random.randrange(0,len(smileStr))
                    smileStr[position]=halogen
                    self.positions.append(position)
                    i=i+1
                   
        smileStr="".join(smileStr)
        return smileStr
    
    def generateRandomSMILE(self):
        smile_CH=self.convertSMILE_CH()
        
        if self._numCl > 0:
            smile_CH=self.generate(smile_CH,self._numCl,"Cl")
        if self._numBr > 0:
            smile_CH=self.generate(smile_CH,self._numBr,"Br")
        if self._numF > 0:
            smile_CH=self.generate(smile_CH,self._numF,"F")
        return smile_CH
    
    
    def getGeneral_Formula(self):
        numX=self._numH + self._numF + self._numCl + self._numBr
        numH=str(numX) if numX not in [0,1] else ""
        numC=str(self._numC) if self._numC not in [0,1] else ""
        numO=str(self._numO) if self._numO not in [0,1] else ""
        numN=str(self._numN) if self._numN not in [0,1] else ""
        numS=str(self._numS) if self._numS not in [0,1] else ""
        
        str_C='C' if self._numC != 0 else ''
        str_H='H' if self._numH != 0 else ''
        str_N='N' if self._numN != 0 else ''
        str_O='O' if self._numO != 0 else ''
        str_S='S' if self._numS != 0 else ''
        
            
        newFormula=str_C+numC+str_H+numH+str_N+numN+str_O+numO+str_S+numS
 
        return newFormula   

    def getFormula(self):
        numH=str(self._numH) if self._numH not in [0,1] else ""
        numC=str(self._numC) if self._numC not in [0,1] else ""
        numO=str(self._numO) if self._numO not in [0,1] else ""
        numN=str(self._numN) if self._numN not in [0,1] else ""
        numS=str(self._numS) if self._numS not in [0,1] else ""
        numCl=str(self._numCl) if self._numCl not in [0,1] else ""
        numBr=str(self._numBr) if self._numBr not in [0,1] else ""
        numF=str(self._numF) if self._numF not in [0,1] else ""
        
        str_C='C' if self._numC != 0 else ''
        str_H='H' if self._numH != 0 else ''
        str_N='N' if self._numN != 0 else ''
        str_O='O' if self._numO != 0 else ''
        str_S='S' if self._numS != 0 else ''
        str_Cl='Cl' if self._numCl != 0 else ''
        str_Br='Br' if self._numBr != 0 else ''
        str_F='F' if self._numF != 0 else ''
        
        newFormula=str_C+numC+str_H+numH+str_Cl+numCl+str_F+numF+str_Br+numBr+str_N+numN+str_O+numO+str_S+numS
 
        return newFormula

    def getGroup(self):
        
        if self._numCl > 0 and self._numF == 0 and self._numBr==0:
            newGroup="Cl"

        if self._numF > 0 and self._numCl==0 and self._numBr==0:
            newGroup="F"

        if self._numBr > 0 and self._numCl==0 and self._numF==0:
            newGroup="Br"

        if self._numBr > 0 and self._numCl>0 and self._numF==0:
            newGroup="ClBr"

        if self._numBr == 0 and self._numCl>0 and self._numF>0:
            newGroup="ClF"

        if self._numBr > 0 and self._numCl==0 and self._numF>0:
            newGroup="FBr"

        if self._numBr > 0 and self._numCl>0 and self._numF>0:
            newGroup="BrClF"
            
        
        
        return newGroup
    def get_All_positions_Halogen_Substituded_information(self):
        smile=self._smile.replace(')',' ').replace('(',' ').replace('[','').replace(']','')
        arr_smile=re.sub( r"([A-Z])", r" \1", smile).split()
        arr_halo=['F','Cl','Br']
        arr_columns=['','','','','','','','','','','','','','','','','','','','','','']
        arr_positions= [23,22,21,20,19,28,27,26,25,24]
        
        for item in arr_halo:
            subst_cont=0
            column = 16 if item == 'Cl' else (17 if item == 'Br' else 18)
            
            for pos in arr_positions:
                
                if item == arr_smile[pos]:
                    arr_columns[arr_positions.index(pos)+1] = item
                    if pos in [22, 21, 27, 26]:
                        subst_cont+=1       
            arr_columns[0]=self._smile
            arr_columns[11]=self._numCl
            arr_columns[12]=self._numBr
            arr_columns[13]=self._numF
            arr_columns[14]=self._numC
            arr_columns[15]=self._numH
            arr_columns[19]=self._inchiKey
            arr_columns[20]=self.getFormula()
            arr_columns[21]=self.getGroup()
            arr_columns[column]= subst_cont 
                
        return arr_columns


# Congeners

In [None]:
def eliminate_by_symmetry():
    df=pd.read_csv('C10H14_practice.csv')
    pd.set_option('display.max_colwidth', None)
    total_rows = len(df.index)
    df_substitited=df.drop_duplicates(subset=['InchiKey'], keep="first")
    
    df_substitited.to_csv('C10H14_Symmetry.csv', index=False)
    
    return df_substitited
        

In [116]:
def halogen_Substituted_information():
    df_c=pd.read_csv('PAHs\Latest Data\Latest Substituted practice\get_pos_info_practice.csv')
    total_rows = len(df_c.index)
    pd.set_option('display.max_colwidth', None)
    bar = progressbar.ProgressBar(max_value=total_rows)
    all_Substituded=[["SMILE","Pos_1","Pos_2","Pos_3","Pos_4","Pos_5","Pos_6","Pos_7","Pos_8","Pos_9","Pos_10","Cl","Br","F","C","H","Subst_Cl","Subst_Br","Subst_F","InchiKey","Formula","Group"]]
    
    
    for row in range(0,total_rows):
        smile=df_c.loc[row,"SMILE"]
        inchikey=df_c.loc[row,"InchiKey"]
        pah=PAH(smile=smile,inchiKey=inchikey)
        substituted=pah.get_All_positions_Halogen_Substituded_information()
        all_Substituded.append(substituted)
        bar.update(row)
    
    data=pd.DataFrame(all_Substituded)
    data.to_csv('PAHs\Latest Data\Latest Substituted practice\get_pos_info_practice.csv', index=False, header = False)
    return data

In [93]:
def get_Compounds_at_least_one_Substituted():  
    df=pd.read_csv('PAHs\Latest Data\Latest Substituted practice\get_pos_info_practice.csv')
    columns_to_check = ["Pos_2", "Pos_3", "Pos_7", "Pos_8"]
    indexes_2 = np.invert( pd.isnull(df["Pos_2"]) )
    indexes_3 = np.invert( pd.isnull(df["Pos_3"]) )
    indexes_7 = np.invert( pd.isnull(df["Pos_7"]) )
    indexes_8 = np.invert( pd.isnull(df["Pos_8"]) )

    df_filtered = df[
        indexes_2 | indexes_3 | indexes_7 | indexes_8 
    ]

    df_filtered.to_csv('PAHs\Latest Data\Latest Substituted practice\get_pos_info_practice.csv', index=False)

In [None]:
def get_Amount_Compounds_Substituted_table():
    df=pd.read_csv('Aromatic_Substituted_Output.csv')
    total_rows = len(df.index)
    pd.set_option('display.max_colwidth', None)
    bar = progressbar.ProgressBar(max_value=total_rows)
    
    subst_arr = list(df.iterrows())
    arr_Cl=["Cl",0,0,0,0]
    arr_F=["F",0,0,0,0]
    arr_Br=["Br",0,0,0,0]
    dataframe=[["HALO","Pos_2","Pos_3","Pos_7","Pos_8"]]
    
    
    for row in range(1,total_rows):
        
        _,subst=subst_arr[row]
        if subst["Pos_2"] != np.nan :
            
            if subst["Pos_2"]=='Cl':
                arr_Cl[1]=arr_Cl[1]+1
            if subst["Pos_2"]=='F':
                arr_F[1]=arr_F[1]+1
            if subst["Pos_2"]=='Br':
                arr_Br[1]=arr_Br[1]+1
                
        if subst["Pos_3"]!= np.nan:
            
            if subst["Pos_3"]=='Cl':
                arr_Cl[2]=arr_Cl[2]+1
            if subst["Pos_3"]=='F':
                arr_F[2]=arr_F[2]+1
            if subst["Pos_3"]=='Br':
                arr_Br[2]=arr_Br[2]+1
                
        if subst["Pos_7"]!= np.nan:
            
            if subst["Pos_7"]=='Cl':
                arr_Cl[3]=arr_Cl[3]+1
            if subst["Pos_7"]=='F':
                arr_F[3]=arr_F[3]+1
            if subst["Pos_7"]=='Br':
                arr_Br[3]=arr_Br[3]+1
                
        if subst["Pos_8"]!= np.nan:
            
            if subst["Pos_8"]=='Cl':
                arr_Cl[4]=arr_Cl[4]+1
            if subst["Pos_8"]=='F':
                arr_F[4]=arr_F[4]+1
            if subst["Pos_8"]=='Br':
                arr_Br[4]=arr_Br[4]+1
        bar.update(row)
                
    dataframe.append(arr_Cl)
    dataframe.append(arr_F)
    dataframe.append(arr_Br)    
    
    data=pd.DataFrame(dataframe)
    data.to_csv("Aromatic_Substituted_Amount_Output.csv", index=False)
    return df

# Isomers

In [None]:
def generate_isomers_Aromatic_Smiles():
    df=pd.read_csv('Aromatic_GeneralStructures.csv')
    total_rows = len(df.index)
    pd.set_option('display.max_colwidth', -1)
    bar = progressbar.ProgressBar(max_value=total_rows)
    all_Possible=[["SMILE","Group","Cl","F","Br","C","H"]]

    for row in range(0,total_rows):
            smile=df.loc[row,"General Smiles"]
            numH=smile.count('H')
            numC=smile.count('C')
            
            pah=PAH(smile,numH=numH,numC=numC)
            all_Possible+=pah.replaceH_Halo()
            bar.update(row)
            
    data=pd.DataFrame(all_Possible)
    data.to_csv("Aromatic_Output.csv", index=False)
            
    return data
    

In [None]:
def get_aromatic_compounds_other_information():
    df_des=pd.read_csv('Gaussian\PAHs\Hundred\PAHs_Compounds_Gaussian.csv')
    total_rows = len(df_des.index)
    pd.set_option('display.max_colwidth', None)
    bar = progressbar.ProgressBar(max_value=total_rows)
    
    for row_1 in range(0,total_rows):
        smile=df_des.loc[row_1,"SMILE"]
        
        pah=PAH(smile)
        df_des.loc[row_1,"N"]=smile.count("N")
        df_des.loc[row_1,"O"]=smile.count("O")
        df_des.loc[row_1,"S"]=smile.count("S")
        df_des.loc[row_1,"X"]=numF+numBr+numCl+numH
        df_des.loc[row_1,"Halo"]=numF+numBr+numCl
        df_des.loc[row_1,"General Formula"]=pah.getGeneral_Formula()
        df_des.loc[row_1,"Formula"]=pah.Formula()
        df_des.loc[row_1,"SMILE_NOT_HYDROGEN"]=smile.replace('H','')
        
        
        cl_mass=numCl*(35.453)
        f_mass=numF*(19.00)
        br_mass=numBr*(79.90)
        molarMass=(1.008)*(numH)+(12.01)*(numC)+(35.453)*(numCl)+(19.00)*(numF)+(79.90)*(numBr)+(14.01)*(numN)+(16.00)*(numO)+(32.07)*(numS)
    
        df_des.loc[row_1,"Molar Mass"]=molarMass
        df_des.loc[row_1,"Clpercent"]=(cl_mass/molarMass)*100
        df_des.loc[row_1,"Brpercent"]=(br_mass/molarMass)*100
        df_des.loc[row_1,"Fpercent"]=(f_mass/molarMass)*100
        df_des.loc[row_1,"Halopercent"]=((cl_mass+br_mass+f_mass)/(molarMass))*100

        
        bar.update(row_1)
    
    df_des.to_csv('Gaussian\PAHs\Hundred\PAHs_Compounds_Gaussian.csv', index=False)
    return df_des

# Other

In [None]:
def strip_Hydrogens_Smile():
    df=pd.read_csv('Gaussian\DFT_Hundred_1402\Batch_2_with_BrClF\Compounds_selected_information.csv')#.head(10)
    pd.set_option('display.max_colwidth', None)
    total_rows = len(df.index)
    bar = progressbar.ProgressBar(max_value=total_rows)
    
    for row in range(0,total_rows):
        smile=df.loc[row,"SMILE"]
        newSmile=smile.replace('H','')
        df.loc[row,"NEW SMILE"]=newSmile
        bar.update(row)
    df.to_csv('Gaussian\DFT_Hundred_1402\Batch_2_with_BrClF\Compounds_selected_information.csv', index=False)
    return df

In [None]:
def generateFormula():
    df=pd.read_csv('C10H14_Symmetry.csv')
    total_rows = len(df.index)
    pd.set_option('display.max_colwidth', -1)
    bar = progressbar.ProgressBar(max_value=total_rows)

    for row in range(0,total_rows):
            smile=df.loc[row,"SMILE"]
            pah=PAH(smile)
            formula=pah.getFormula()
            df.loc[row,"Formula"] = formula
            bar.update(row)
            
    df.to_csv("C10H14_Symmetry.csv", index=None)
            
    return df

In [96]:
def replace_Bracket_from_smile():
    df=pd.read_csv('Aromatic_Output.csv')
    total_rows = len(df.index)
    pd.set_option('display.max_colwidth', -1)
    bar = progressbar.ProgressBar(max_value=total_rows)

    for row in range(0,total_rows):
            smile=df.loc[row,"SMILE"]
            new_smile=smile.replace('[', '')
            new_smile=new_smile.replace(']', '')
            df.loc[row,"Smile"] = new_smile
            bar.update(row)
            
    df.to_csv("Aromatic_Output.csv", index=None)
            
    return df

# Inchi Key

In [None]:
import numpy as np
import pandas as pd

df = pd.read_csv('Output9.txt', sep="\n", header=None)
# df = df.iloc[0:100,:]

In [None]:
df = df[df.iloc[:,0].str.contains("InChIKey=", regex=True)]

In [None]:
df = df.iloc[:,0]#.str.slice(14)
# df = df.str.split(pat="Days")
df = pd.DataFrame(df.values.tolist())
total_rows = len(df.index)
for row in range(0,total_rows):
    df.loc[row,0]=df.loc[row,0].replace('InChIKey=','')
# df = df.iloc[:,0]
df.tail(15)