# Packages


In [2]:
# import packages/modules
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from math import pow, exp, log, sqrt
# from scipy import optimize
import os
import re
import csv
import openpyxl
import xlsxwriter
# word
from docx import Document

### Base Dir


In [3]:
# current directory
base_dir = os.getcwd()
print(f"base_dir {base_dir}")

base_dir e:\Python Projects\gaussian-utility\gutility\notebook


### List uploaded files from desired folder


In [4]:
def ListFiles(targetPath, fileExtension='txt'):
    '''
    list files in a target file

    args:
        targetPath: target path
        fileExtension: file extention, default is empty

    return:
        a list of files in the target path
    '''
    try:
        # check
        if os.path.exists(targetPath):
            filesFound = []
            for f in os.listdir(targetPath):
                if f.endswith('.'+str(fileExtension)):
                    filesFound.append(f)
            # res
            return filesFound
        else:
            raise Exception("target path is not valid.")

    except Exception as e:
        raise

In [5]:
def CheckFileFormat(filePath):
    '''
    check file format

    args:
        filePath: file name dir

    return:
        file directory, file name, file format
    '''
    # check file exist
    if os.path.isfile(filePath):
        # file analysis
        fileDir = os.path.dirname(filePath)
        fileNameWithExtension = os.path.basename(filePath)
        fileName, fileExtension = os.path.splitext(fileNameWithExtension)
        # res
        return fileDir, fileName, fileExtension
    else:
        raise Exception('file path is not valid.')

### Load file names

**_ `file_folder` _**

1. no-cal-sol
2. isolate-gas
3. isolate-sol


In [8]:
# log dir
target_folder = "D:\\OneDrive\\Project Analysis\\Computational Chemistry\\analysis\\terpens epoxidation\\extract data\\acetone-limonene-mechanism-1"

# list files
file_list = ListFiles(target_folder, "log")
print("res:", file_list)
print("count: ", len(file_list))

res: ['1,2LO.log', 'LDO-1.log', 'LDO-2.log', 'PC1.log', 'PC2-1.log', 'PC2-2.log', 'RC1.log', 'RC2-1.log', 'RC2-2.log', 'TS1-trans.log', 'TS2-1.log', 'TS2-2.log']
count:  12


In [7]:
# # file folder
# file_folder = "data\\isolate-sol"
# # target folder
# target_folder = os.path.join(base_dir, file_folder)

# # list files
# file_list = ListFiles(target_folder, "log")
# print("res:", file_list)

### Sort file list based on model


Read log file


In [9]:
def ReadLogFile(filePath):
    '''
    read the content of log file

    args:
        filePath: full name of file with directory and format

    return:
        res: dict
    '''
    try:
        # file info
        fileDir, fileName, fileExtension = CheckFileFormat(filePath)

        # dict
        item_loc = []
        item_conv = []
        column_names = ['Center Number', 'Atomic Number', 'X', 'Y', 'Z']

        # index
        k = 1

        # file open
        with open(filePath, "r") as f:
            # find
            fContent = f.read()

            # str regex
            str_regex = r'^\s*(\w+(\s+\w+)*)\s+(\S+)\s+(\S+)\s+(YES|NO)\s*$'

            # res
            str_res = re.findall(str_regex, fContent, re.M)

            # Maximum Force
            group_1 = str_res[-4]
            # RMS     Force
            group_1 = str_res[-3]
            # Maximum Displacement
            group_1 = str_res[-2]
            #  RMS     Displacement
            group_1 = str_res[-1]

            for item in str_res[-4:]:
                # item
                _item = item[0]
                # value
                _value = item[2]
                # threshold
                _threshold = item[3]
                # converged
                _converged = item[4]

                # store
                _res = {
                    'Item': _item,
                    'Value': _value,
                    'Threshold': _threshold,
                    'Converged': _converged
                }

                item_conv.append(_res)

            # check status (optional)

            # find
            # ---------------------------------------------------------------------
            # Center     Atomic      Atomic             Coordinates (Angstroms)
            # Number     Number       Type             X           Y           Z
            # ---------------------------------------------------------------------
            # str_sub_1 = 'Number     Number              X              Y              Z'
            str_sub_1 = 'Center     Atomic      Atomic             Coordinates (Angstroms)'
            i_sub_1 = fContent.rfind(str_sub_1)
            str_1 = fContent[i_sub_1:]

            # split str by lines
            str_sub_lines = str_1.splitlines()

            # coordinate
            coordinate_1 = str_sub_lines[1]

            # separator
            sep_1 = str_sub_lines[2]

            # find the second separator
            k = 0
            for item in str_sub_lines:
                # set regex
                _line = item

                # find
                # force - groups(5)
                # _res = re.search(r"\s+(\d+)\s+(\d+)\s+([-+]?\d*\.\d+)\s+([-+]?\d*\.\d+)\s+([-+]?\d*\.\d+)", _line)
                # coordinate - groups(6)
                _res = re.search(
                    r"\s+(\d+)\s+(\d+)\s+(\d+)\s+([-+]?\d*\.\d+)\s+([-+]?\d*\.\d+)\s+([-+]?\d*\.\d+)", _line)
                # check
                if _res:
                    _center_number = _res.group(1)
                    _atomic_number = _res.group(2)
                    _atomic_type = _res.group(3)
                    _X = _res.group(4)
                    _Y = _res.group(5)
                    _Z = _res.group(6)
                    # store
                    _ele = {
                        'Center Number': _center_number,
                        'Atomic Number': _atomic_number,
                        'X': _X,
                        'Y': _Y,
                        'Z': _Z
                    }

                    item_loc.append(_ele)

                if k > 2 and item == sep_1:
                    sep_2 = item
                    break

                # update
                k += 1

        return fileName, item_conv, item_loc, column_names
    except Exception as e:
        raise

In [10]:
def AnalyzeLogFiles(targetPath, fileList):
    '''
    analyze each file

    args:
        targetPath: target folder
        fileList: list of selected files

    output:
        res: dict
    '''
    try:
        # check
        if len(fileList) > 0:
            # res
            res = []
            for item in fileList:
                # file path
                _file_full_path = os.path.join(str(targetPath), str(item))
                # read file
                _res = ReadLogFile(_file_full_path)
                # save
                res.append(_res)

            # return
            return res
        else:
            raise Exception("file list is empty!")

    except Exception as e:
        raise

In [11]:
# target folder
print(target_folder)
# file list
print(file_list)

D:\OneDrive\Project Analysis\Computational Chemistry\analysis\terpens epoxidation\extract data\acetone-limonene-mechanism-1
['1,2LO.log', 'LDO-1.log', 'LDO-2.log', 'PC1.log', 'PC2-1.log', 'PC2-2.log', 'RC1.log', 'RC2-1.log', 'RC2-2.log', 'TS1-trans.log', 'TS2-1.log', 'TS2-2.log']


In [12]:
log_res = AnalyzeLogFiles(target_folder, file_list)
print(log_res)
print(len(log_res))

[('1,2LO', [{'Item': 'Maximum Force', 'Value': '0.000002', 'Threshold': '0.000015', 'Converged': 'YES'}, {'Item': 'RMS     Force', 'Value': '0.000000', 'Threshold': '0.000010', 'Converged': 'YES'}, {'Item': 'Maximum Displacement', 'Value': '0.000038', 'Threshold': '0.000060', 'Converged': 'YES'}, {'Item': 'RMS     Displacement', 'Value': '0.000007', 'Threshold': '0.000040', 'Converged': 'YES'}], [{'Center Number': '1', 'Atomic Number': '6', 'X': '0.907440', 'Y': '0.075107', 'Z': '-0.365896'}, {'Center Number': '2', 'Atomic Number': '6', 'X': '0.151492', 'Y': '1.336344', 'Z': '0.049125'}, {'Center Number': '3', 'Atomic Number': '6', 'X': '0.272229', 'Y': '-1.172834', 'Z': '0.282609'}, {'Center Number': '4', 'Atomic Number': '6', 'X': '-1.282978', 'Y': '1.300923', 'Z': '-0.476185'}, {'Center Number': '5', 'Atomic Number': '6', 'X': '-2.019782', 'Y': '0.038336', 'Z': '-0.089697'}, {'Center Number': '6', 'Atomic Number': '6', 'X': '-1.240156', 'Y': '-1.154346', 'Z': '0.285919'}, {'Center N

**_ `model_list` _**

model_list = ["RC1","TS1","INT1","PC1","RC2","TS2","PC2","RC3","TS3","INT2","PC3","RC4","TS4","PC4"]

model_list = ["RC1","TS1","PC1","RC2","TS2","PC2","RC3","TS3","PC3","RC4","TS4","PC4"]

model_list = ["RC1","TS1","PC1","RC2","TS2","PC2"]


In [57]:
model_list = ["RC1", "TS1", "PC1", "RC2", "TS2", "PC2"]

# Sort the original list based on the order in the model list
# sorted_log_res = sorted(log_res, key=lambda x: model_list.index(x[0]))

# for isolated
sorted_log_res = log_res

print(sorted_log_res)

[('DEC', [{'Item': 'Maximum Force', 'Value': '0.000001', 'Threshold': '0.000015', 'Converged': 'YES'}, {'Item': 'RMS     Force', 'Value': '0.000000', 'Threshold': '0.000010', 'Converged': 'YES'}, {'Item': 'Maximum Displacement', 'Value': '0.000003', 'Threshold': '0.000060', 'Converged': 'YES'}, {'Item': 'RMS     Displacement', 'Value': '0.000001', 'Threshold': '0.000040', 'Converged': 'YES'}], [{'Center Number': '1', 'Atomic Number': '8', 'X': '-0.000000', 'Y': '1.081064', 'Z': '-0.409061'}, {'Center Number': '2', 'Atomic Number': '8', 'X': '-0.000000', 'Y': '-1.081064', 'Z': '-0.409061'}, {'Center Number': '3', 'Atomic Number': '8', 'X': '0.000000', 'Y': '-0.000000', 'Z': '1.577518'}, {'Center Number': '4', 'Atomic Number': '6', 'X': '-0.000000', 'Y': '2.346445', 'Z': '0.285042'}, {'Center Number': '5', 'Atomic Number': '6', 'X': '-0.000000', 'Y': '-2.346445', 'Z': '0.285042'}, {'Center Number': '6', 'Atomic Number': '6', 'X': '-0.000000', 'Y': '3.431581', 'Z': '-0.767644'}, {'Center 

### Display data in table


In [13]:
def save_data_to_word(d, file_name='res', df_format='string'):
    '''
    save gaussian output.log to excel

    args:
        d: input data
            file name
    '''

    #  word file path
    word_file_path = os.getcwd()+'/'+file_name+'.docx'

    # check file exist
    if os.path.exists(word_file_path):
        print("file exists")
        doc = Document()
    else:
        # create excel file
        doc = Document()
        doc.save(word_file_path)

    # add each df to the excel sheet
    for item in d:
        # sheet name
        file_name_full = item[0]
        item_conv = item[1]
        item_loc = item[2]
        column_names = item[3]

        # df
        df = pd.DataFrame.from_dict(item_loc)
        # size
        row, col = df.shape

        # check df format
        if df_format == 'string':
            # df to string
            df_string = df.to_string(index=False, col_space=20, justify='left')

            # des text
            doc.add_heading(file_name_full, level=1)

            # df string to word
            doc.add_paragraph(df_string)
        else:
            # add a table to the end and create a reference variable
            # extra row is so we can add the header row
            t = doc.add_table(df.shape[0]+1, df.shape[1])

            # add the header rows.
            for j in range(df.shape[-1]):
                t.cell(0, j).text = df.columns[j]

            # add the rest of the data frame
            for i in range(df.shape[0]):
                for j in range(df.shape[-1]):
                    t.cell(i+1, j).text = str(df.values[i, j])

    # save the doc
    doc.save(word_file_path)

**_ `file_name` _**


In [58]:
# test
save_data_to_word(sorted_log_res, file_name="isolate-sol")