In [3]:
import codecs
import glob
import math
import os
from decimal import Decimal
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import cm
%matplotlib inline


data_dir = "data/input/isolate_Fc/NH/*.out"
result_dir = data_dir.replace("*.out", "")
paths = [p for p in glob.glob(data_dir)]

excited_state_1 = "Excited State   1"
excited_state_2 = "Excited State   2"
excited_state_3 = "Excited State   3"

SCF_done_key = "SCF Done"
initial_key_1 = "Excited State   1"
end_key_1 = "SavETr"
df_output = pd.DataFrame()

for path in paths:
    # file_nameから角度の情報を取る
    file_name = path.split('/')[-1]
    torsion_angle = int(file_name.split("@")[0].split("_")[-1])

    with codecs.open(path, "r", "UTF-8", "ignore") as file:
        df = pd.read_table(file)
        df = df.rename(columns={df.columns[0]: 'Col_1'})
        # initial_key_1を含む行を探す
        initial_key_1_df = df[df['Col_1'].str.contains(initial_key_1, case=False)]
        initial_key_1_index = initial_key_1_df.index[0]
        df_1 = df.iloc[initial_key_1_index:]
        # initial_key_1を含む行以降のend_key_1を含む行を抜き出す
        end_key_1_df = df_1[df_1['Col_1'].str.contains(end_key_1)]
        # dfを更新しているのでここでの行番号は0から始まる
        end_key_1_index = end_key_1_df.index[0] - initial_key_1_index
        df_1 = df_1.iloc[:end_key_1_index]
        # 該当行の抜き出しが完了
        
        S1_data = [a for a in df_1[df_1['Col_1'].str.contains(excited_state_1)]['Col_1'].iloc[0].split(' ') if a != '']
        f_1 = float(S1_data[-2].split('f=')[-1])
        E_1 = float(S1_data[4])
        nm_1 = float(S1_data[-4])
        
        S2_data = [a for a in df_1[df_1['Col_1'].str.contains(excited_state_2)]['Col_1'].iloc[0].split(' ') if a != '']
        f_2 = float(S2_data[-2].split('f=')[-1])
        E_2 = float(S2_data[4])
        nm_2 = float(S2_data[-4])
        
        S3_data = [a for a in df_1[df_1['Col_1'].str.contains(excited_state_3)]['Col_1'].iloc[0].split(' ') if a != '']
        f_3 = float(S3_data[-2].split('f=')[-1])
        E_3 = float(S3_data[4])
        nm_3 = float(S3_data[-4])
    
        data_list = [f_1, E_1, nm_1, f_2, E_2, nm_2, f_3, E_3, nm_3]
        df_parameters = pd.DataFrame(data=data_list, 
                            columns=[torsion_angle],
                            index=["f_1", "S1", "nm_1", "f_2", "S2", "nm_2", "f_3", "S3", "nm_3"]).T
        
        
        
        SCF_done_energy = float([a for a in df[df['Col_1'].str.contains(SCF_done_key)]['Col_1'].iloc[0].split(' ') if a != ''][4])
        # 各エネルギー値の抜き出し
        S1_energy = SCF_done_energy+(E_1/27.2116) 
        S2_energy = SCF_done_energy+(E_2/27.2116) 
        S3_energy = SCF_done_energy+(E_3/27.2116) 
        
        df_E = pd.DataFrame(data=[S1_energy, S2_energy, S3_energy, SCF_done_energy], 
                            columns=[torsion_angle],
                            index=["S1_energy", "S2_energy", "S3_energy", "SCF_done_energy"]).T
        df_result = pd.concat([df_parameters, df_E], axis=1)
        
        df_output = pd.concat([df_output, df_result]).sort_index()

In [4]:
df_output.to_csv(result_dir + "result.csv")