# Packages/modules


In [1]:
# import packages/modules
import numpy as np
import pandas as pd
import os
import re

# Load gaussian log file


In [4]:
# target folder
file_src = 'D:\\Google Cloud\\Python Source\\Gaussian\\limonene.log'
file_src

'D:\\Google Cloud\\Python Source\\Gaussian\\limonene.log'

# Util


In [5]:
def CheckFileFormat(filePath):
    '''
    check file format

    args:
      filePath: file name dir

    return:
      file directory, file name, file format
    '''
    # check file exist
    if os.path.isfile(filePath):
        # file analysis
        fileDir = os.path.dirname(filePath)
        fileNameWithExtension = os.path.basename(filePath)
        fileName, fileExtension = os.path.splitext(fileNameWithExtension)
        # res
        return fileDir, fileName, fileExtension
    else:
        raise Exception('file path is not valid.')

In [6]:
CheckFileFormat(file_src)

('D:\\Google Cloud\\Python Source\\Gaussian', 'limonene', '.log')

# Periodic table


In [7]:
url = "https://pubchem.ncbi.nlm.nih.gov/rest/pug/periodictable/CSV?response_type=display"
pt_df = pd.read_csv(url)
pt_df.head()

Unnamed: 0,AtomicNumber,Symbol,Name,AtomicMass,CPKHexColor,ElectronConfiguration,Electronegativity,AtomicRadius,IonizationEnergy,ElectronAffinity,OxidationStates,StandardState,MeltingPoint,BoilingPoint,Density,GroupBlock,YearDiscovered
0,1,H,Hydrogen,1.008,FFFFFF,1s1,2.2,120.0,13.598,0.754,"+1, -1",Gas,13.81,20.28,9e-05,Nonmetal,1766
1,2,He,Helium,4.0026,D9FFFF,1s2,,140.0,24.587,,0,Gas,0.95,4.22,0.000179,Noble gas,1868
2,3,Li,Lithium,7.0,CC80FF,[He]2s1,0.98,182.0,5.392,0.618,+1,Solid,453.65,1615.0,0.534,Alkali metal,1817
3,4,Be,Beryllium,9.012183,C2FF00,[He]2s2,1.57,153.0,9.323,,+2,Solid,1560.0,2744.0,1.85,Alkaline earth metal,1798
4,5,B,Boron,10.81,FFB5B5,[He]2s2 2p1,2.04,192.0,8.298,0.277,+3,Solid,2348.0,4273.0,2.37,Metalloid,1808


In [None]:
# find atomic symbol from dataframe
_atomic_symbol = pt_df[pt_df['AtomicNumber'] == 3]['Symbol'].values[0]
_atomic_symbol

'Li'

In [8]:
def FindAtomicSymbol(atomic_number, df):
    '''
    Find atomic symbol

    args:
      atomic_number {number/str}: atomic number
      df {dataframe}: periodic table data
    '''
    _atomic_symbol = df[df['AtomicNumber']
                        == atomic_number]['Symbol'].values[0]
    return _atomic_symbol

In [9]:
FindAtomicSymbol(2, pt_df)

'He'

In [None]:
df2 = pt_df.copy()
df3 = df2.drop('Symbol', axis=1)
df3['Symbol2'] = df3['AtomicNumber'].apply(FindAtomicSymbol, args=(pt_df,))
df3

save a dictionary with atomic number and symbol


In [10]:
_df = pt_df[['AtomicNumber', 'Symbol']]
_df

Unnamed: 0,AtomicNumber,Symbol
0,1,H
1,2,He
2,3,Li
3,4,Be
4,5,B
...,...,...
113,114,Fl
114,115,Mc
115,116,Lv
116,117,Ts


In [12]:
# Transform it to dictionary as described
transformed_dict = _df.to_dict(orient='index')

# Convert AtomicNumber to int in the dictionary
transformed_dict = {str(key): {'AtomicNumber': int(
    value['AtomicNumber']), 'Symbol': value['Symbol']} for key, value in transformed_dict.items()}

# Analyze file


In [13]:
def ReadXYZLogFile(filePath, pt_dict):
    '''
    read the content of log file

    args:
        filePath {str}: full name of file with directory and format
        pt_dict {dict}: parodic table dict

    return:
        res {dict}: dictionary data
    '''
    try:
        # file info
        fileDir, fileName, fileExtension = CheckFileFormat(filePath)

        # dict
        item_loc = []
        item_conv = []
        column_names = ['Center Number', 'Atomic Number',
                        'Atomic Symbol', 'X', 'Y', 'Z']

        # index
        k = 1

        # file open
        with open(filePath, "r") as f:
            # find
            fContent = f.read()

            # find
            # ---------------------------------------------------------------------
            # Center     Atomic      Atomic             Coordinates (Angstroms)
            # Number     Number       Type             X           Y           Z
            # ---------------------------------------------------------------------
            # str_sub_1 = 'Number     Number              X              Y              Z'
            str_sub_1 = 'Center     Atomic      Atomic             Coordinates (Angstroms)'
            i_sub_1 = fContent.rfind(str_sub_1)
            str_1 = fContent[i_sub_1:]

            # split str by lines
            str_sub_lines = str_1.splitlines()

            # coordinate
            coordinate_1 = str_sub_lines[1]

            # separator
            sep_1 = str_sub_lines[2]

            # find the second separator
            k = 0
            for item in str_sub_lines:
                # set regex
                _line = item

                # find
                # force - groups(5)
                # _res = re.search(r"\s+(\d+)\s+(\d+)\s+([-+]?\d*\.\d+)\s+([-+]?\d*\.\d+)\s+([-+]?\d*\.\d+)", _line)
                # coordinate - groups(6)
                _res = re.search(
                    r"\s+(\d+)\s+(\d+)\s+(\w+)\s+([-+]?\d*\.\d+)\s+([-+]?\d*\.\d+)\s+([-+]?\d*\.\d+)", _line)
                # regex 1
                # \s+(\d+)\s+(\d+)\s+(\d+)\s+([-+]?\d*\.\d+)\s+([-+]?\d*\.\d+)\s+([-+]?\d*\.\d+)
                # regex 2
                # \s+(\d+)\s+(\d+)\s+(\w+)\s+([-+]?\d*\.\d+)\s+([-+]?\d*\.\d+)\s+([-+]?\d*\.\d+)
                # check
                if _res:
                    _center_number = _res.group(1)
                    _atomic_number = _res.group(2)
                    _atomic_type = _res.group(3)
                    _X = _res.group(4)
                    _Y = _res.group(5)
                    _Z = _res.group(6)

                    # atomic symbol
                    _atomic_symbol = pt_dict.get(
                        str(int(_atomic_number)-1))['Symbol']
                    # store
                    _ele = {
                        'Center Number': _center_number,
                        'Atomic Number': _atomic_number,
                        'Atomic Symbol': _atomic_symbol,
                        'X': _X,
                        'Y': _Y,
                        'Z': _Z
                    }

                    item_loc.append(_ele)

                if k > 2 and item == sep_1:
                    sep_2 = item
                    break

                # update
                k += 1

        return fileName, item_conv, item_loc, column_names
    except Exception as e:
        raise

In [14]:
xyz_data = ReadXYZLogFile(file_src, transformed_dict)
print(xyz_data)

('limonene', [], [{'Center Number': '1', 'Atomic Number': '6', 'Atomic Symbol': 'C', 'X': '0.711585', 'Y': '0.035352', 'Z': '-0.301777'}, {'Center Number': '2', 'Atomic Number': '6', 'Atomic Symbol': 'C', 'X': '-0.061757', 'Y': '1.300928', 'Z': '0.062906'}, {'Center Number': '3', 'Atomic Number': '6', 'Atomic Symbol': 'C', 'X': '0.098166', 'Y': '-1.165339', 'Z': '0.441734'}, {'Center Number': '4', 'Atomic Number': '6', 'Atomic Symbol': 'C', 'X': '-1.518987', 'Y': '1.189382', 'Z': '-0.381726'}, {'Center Number': '5', 'Atomic Number': '6', 'Atomic Symbol': 'C', 'X': '-2.149119', 'Y': '-0.126629', 'Z': '0.005706'}, {'Center Number': '6', 'Atomic Number': '6', 'Atomic Symbol': 'C', 'X': '-1.407352', 'Y': '-1.166378', 'Z': '0.391990'}, {'Center Number': '7', 'Atomic Number': '6', 'Atomic Symbol': 'C', 'X': '2.204711', 'Y': '0.106169', 'Z': '-0.060168'}, {'Center Number': '8', 'Atomic Number': '6', 'Atomic Symbol': 'C', 'X': '-3.648719', 'Y': '-0.187311', 'Z': '-0.076825'}, {'Center Number':

multiple files


In [None]:
def AnalyzeXYZLogFiles(targetPath, fileList):
    '''
    analyze each file

    args:
        targetPath: target folder
        fileList: list of selected files

    output:
        res: dict
    '''
    try:
        # check
        if len(fileList) > 0:
            # res
            res = []
            for item in fileList:
                # file path
                _file_full_path = str(targetPath) + str(item)
                # read file
                _res = ReadXYZLogFile(_file_full_path, transformed_dict)
                # save
                res.append(_res)

            # return
            return res
        else:
            raise Exception("file list is empty!")

    except Exception as e:
        raise

single file


# Save as str format (txt)


In [15]:
def save_data_to_txt(d, file_name='res', df_format='string'):
    '''
    save xyz coordination output.log to txt file such as Notepad

    args:
        d: input data
        file name: file name, default: res.txt
    '''

    # text file path
    text_file_path = os.getcwd() + '/' + file_name + '.txt'

    # open the text file in write mode
    with open(text_file_path, 'w') as f:
        # add each df to the text file
        for item in d:
            # sheet name
            file_name_full = item[0]
            item_conv = item[1]
            item_loc = item[2]
            column_names = item[3]

            # df
            df = pd.DataFrame.from_dict(item_loc)

            # check df format
            if df_format == 'string':
                # df to string
                df_string = df.to_string(
                    index=False, col_space=10, justify='left')
                # Add 6 spaces indent to each line
                indented_df_string = '\n'.join(
                    ['      ' + line for line in df_string.split('\n')])

                # sep lin
                sep_line = '---------------------------------------------------------------------------------'
                # Split the string into lines
                lines = indented_df_string.split('\n')

                # find atomic symbol
                # atomic_numbers = [item.split(" ")[0] for item in lines]

                # Find the maximum length of the lines
                max_length = max(len(line) for line in lines)

                # write file name as heading
                # f.write('-' * max_length + '\n')
                f.write("Input orientation: "+file_name_full+'\n')
                # f.write('-' * max_length + '\n')

                # write df string to the text file
                # header line
                # f.write(lines[0] + '\n')

                _header = """---------------------------------------------------------------------------------
 Center      Atomic      Atomic                   Coordinates (Angstroms)
 Number      Number      Symbol                  X           Y           Z
---------------------------------------------------------------------------------
"""
                f.write(_header)

                # content
                f.write(indented_df_string[max_length+1:] + '\n')

                # last line
                f.write(sep_line + '\n')
            else:
                # write file name as heading
                f.write(file_name_full + '\n')
                f.write('-' * len(file_name_full) + '\n')

                # write the header
                f.write('\t'.join(df.columns) + '\n')

                # write the rest of the data frame
                for i in range(df.shape[0]):
                    f.write('\t'.join(map(str, df.iloc[i])) + '\n')
                f.write('\n')
    # res
    return (True, text_file_path)

In [16]:
_, res_src = save_data_to_txt([xyz_data])
res_src

'e:\\Python Projects\\GaussParse\\GaussParse\\notebook/res.txt'

In [17]:
with open(res_src, "r") as f:
    content = f.read()
print(content)

Input orientation: limonene
---------------------------------------------------------------------------------
 Center      Atomic      Atomic                   Coordinates (Angstroms)
 Number      Number      Symbol                  X           Y           Z
---------------------------------------------------------------------------------
       1            6             C              0.711585   0.035352  -0.301777 
       2            6             C             -0.061757   1.300928   0.062906 
       3            6             C              0.098166  -1.165339   0.441734 
       4            6             C             -1.518987   1.189382  -0.381726 
       5            6             C             -2.149119  -0.126629   0.005706 
       6            6             C             -1.407352  -1.166378   0.391990 
       7            6             C              2.204711   0.106169  -0.060168 
       8            6             C             -3.648719  -0.187311  -0.076825 
       9   

load txt file


# Save str as xyz format to be open in avogadro


load from txt file then convert it to xyz style


In [18]:
def generate_xyz_file(atom_list, file_name="molecule"):
    """
    Generates an XYZ file from a list of atom positions with nicely formatted columns.

    args:
      atom_list {list of tuples}: Each tuple contains an element symbol and its x, y, z coordinates.
      file_name {str}: The name of the output XYZ file.

    return:
      file name

    Example input for methane (CH4):
    atom_list = [
        ("C", 0.000000, 0.000000, 0.000000),
        ("H", 0.000000, 0.000000, 1.089000),
        ("H", 1.026719, 0.000000, -0.363000),
        ("H", -0.513360, -0.889165, -0.363000),
        ("H", -0.513360, 0.889165, -0.363000)
    ]
    """
    # number of atoms
    num_atoms = len(atom_list)
    # file
    _file = file_name+".xyz"
    # save
    with open(_file, 'w') as file:
        file.write(f"{num_atoms}\n")
        file.write(f"{file_name}\n")
        for atom in atom_list:
            element, x, y, z = atom
            # file.write(f"{element}\t{x:.6f}\t{y:.6f}\t{z:.6f}\n")
            file.write(f"{element:<2} {x:> 10.6f} {y:> 10.6f} {z:> 10.6f}\n")
    # file name
    return _file

In [None]:
# Example usage for methane (CH4)
methane_atoms = [
    ("C", 0.000000, 0.000000, 0.000000),
    ("H", 0.000000, 0.000000, 1.089000),
    ("H", 1.026719, 0.000000, -0.363000),
    ("H", -0.513360, -0.889165, -0.363000),
    ("H", -0.513360, 0.889165, -0.363000)
]

# generate_xyz_file(methane_atoms, "methane.xyz")

In [19]:
def SaveToXYZ(file_path):
    '''
    read the content of txt file

    args:
        file_path {str}: full name of file with directory and format

    return:
        res {dict}: dictinaery data
    '''
    try:
        # file info
        fileDir, fileName, fileExtension = CheckFileFormat(file_path)

        # dict
        item_loc = []
        item_conv = []
        column_names = ['Center Number', 'Atomic Number',
                        'Atomic Symbol', 'X', 'Y', 'Z']

        # index
        k = 1

        # file open
        with open(file_path, "r") as f:
            # find
            fContent = f.read()

            # lines
            lines = fContent.split('\n')

            # title
            title = lines[0].split(":")[1].strip()
            sep_1 = lines[1]

            # split str by lines
            str_sub_lines = lines[4:]

            # find the second separator
            k = 0
            for item in str_sub_lines:
                # set regex
                _line = item

                # find
                # force - groups(5)
                # _res = re.search(r"\s+(\d+)\s+(\d+)\s+([-+]?\d*\.\d+)\s+([-+]?\d*\.\d+)\s+([-+]?\d*\.\d+)", _line)
                # coordinate - groups(6)
                _res = re.search(
                    r"\s+(\d+)\s+(\d+)\s+(\w+)\s+([-+]?\d*\.\d+)\s+([-+]?\d*\.\d+)\s+([-+]?\d*\.\d+)", _line)
                # regex 1
                # \s+(\d+)\s+(\d+)\s+(\d+)\s+([-+]?\d*\.\d+)\s+([-+]?\d*\.\d+)\s+([-+]?\d*\.\d+)
                # regex 2
                # \s+(\d+)\s+(\d+)\s+(\w+)\s+([-+]?\d*\.\d+)\s+([-+]?\d*\.\d+)\s+([-+]?\d*\.\d+)
                # check
                if _res:
                    _center_number = _res.group(1)
                    _atomic_number = _res.group(2)
                    _atomic_symbol = _res.group(3)
                    _X = _res.group(4)
                    _Y = _res.group(5)
                    _Z = _res.group(6)

                    # store
                    _ele = (_atomic_symbol, float(_X), float(_Y), float(_Z))

                    item_loc.append(_ele)
                # else:
                #   print(f"{_res},{k}")

                if k > 2 and item.strip() == sep_1:
                    sep_2 = item.strip()
                    break

                # update
                k += 1

        # save xyz file
        _file_name = generate_xyz_file(item_loc, title)

        return _file_name, item_conv, item_loc, column_names
    except Exception as e:
        raise

In [20]:
# save
xyz_file, _, _, _ = SaveToXYZ(res_src)
xyz_file

'limonene.xyz'

display result


In [21]:
with open(xyz_file, "r") as f:
    content = f.read()
print(content)

26
limonene
C    0.711585   0.035352  -0.301777
C   -0.061757   1.300928   0.062906
C    0.098166  -1.165339   0.441734
C   -1.518987   1.189382  -0.381726
C   -2.149119  -0.126629   0.005706
C   -1.407352  -1.166378   0.391990
C    2.204711   0.106169  -0.060168
C   -3.648719  -0.187311  -0.076825
C    2.993707  -1.090578  -0.527604
C    2.804791   1.137288   0.536148
H    0.568108  -0.145304  -1.377639
H    0.400316   2.178604  -0.395865
H   -0.023604   1.442964   1.149418
H    0.467344  -2.102062   0.014276
H    0.439450  -1.153109   1.486036
H   -1.593159   1.310528  -1.470409
H   -2.107651   2.006366   0.049264
H   -1.904337  -2.089766   0.681666
H   -3.986754   0.068177  -1.086367
H   -4.102558   0.541242   0.602209
H   -4.026775  -1.179490   0.173781
H    2.764888  -1.971431   0.079018
H    2.740027  -1.340900  -1.562117
H    4.066485  -0.905778  -0.464835
H    3.878832   1.139859   0.689967
H    2.257227   2.002790   0.889307

