In [1]:
import codecs
import glob
import math
import os
from decimal import Decimal
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import cm
%matplotlib inline


data_dir = "data/input/isolate_Fc/OH/*.out"
result_dir = data_dir.split("*.out")[0].replace("input", "output")
paths = [p for p in glob.glob(data_dir)]

excited_state_1 = "Excited State   1"
excited_state_2 = "Excited State   2"
excited_state_3 = "Excited State   3"

SCF_done_key = "SCF Done"
initial_key_1 = "Excited State   1"
end_key_1 = "SavETr"
df_output = pd.DataFrame()

for path in paths:
    # file_nameから角度の情報を取る
    file_name = path.split('/')[-1]
    torsion_angle = int(file_name.split("@")[0].split("_")[-1])

    with codecs.open(path, "r", "UTF-8", "ignore") as file:
        df = pd.read_table(file)
        df = df.rename(columns={df.columns[0]: 'Col_1'})
        # initial_key_1を含む行を探す
        initial_key_1_df = df[df['Col_1'].str.contains(initial_key_1, case=False)]
        initial_key_1_index = initial_key_1_df.index[0]
        df_1 = df.iloc[initial_key_1_index:]
        # initial_key_1を含む行以降のend_key_1を含む行を抜き出す
        end_key_1_df = df_1[df_1['Col_1'].str.contains(end_key_1)]
        # dfを更新しているのでここでの行番号は0から始まる
        end_key_1_index = end_key_1_df.index[0] - initial_key_1_index
        df_1 = df_1.iloc[:end_key_1_index]
        # 該当行の抜き出しが完了
        
        S1_data = [a for a in df_1[df_1['Col_1'].str.contains(excited_state_1)]['Col_1'].iloc[0].split(' ') if a != '']
        f_1 = float(S1_data[-2].split('f=')[-1])
        E_1 = float(S1_data[4])
        nm_1 = float(S1_data[-4])
        
        S2_data = [a for a in df_1[df_1['Col_1'].str.contains(excited_state_2)]['Col_1'].iloc[0].split(' ') if a != '']
        f_2 = float(S2_data[-2].split('f=')[-1])
        E_2 = float(S2_data[4])
        nm_2 = float(S2_data[-4])
        
        S3_data = [a for a in df_1[df_1['Col_1'].str.contains(excited_state_3)]['Col_1'].iloc[0].split(' ') if a != '']
        f_3 = float(S3_data[-2].split('f=')[-1])
        E_3 = float(S3_data[4])
        nm_3 = float(S3_data[-4])
    
        data_list = [f_1, E_1, nm_1, f_2, E_2, nm_2, f_3, E_3, nm_3]
        df_parameters = pd.DataFrame(data=data_list, 
                            columns=[torsion_angle],
                            index=["f_1", "S1", "nm_1", "f_2", "S2", "nm_2", "f_3", "S3", "nm_3"]).T
        
        
        
        SCF_done_energy = float([a for a in df[df['Col_1'].str.contains(SCF_done_key)]['Col_1'].iloc[0].split(' ') if a != ''][4])
        # 各エネルギー値の抜き出し
        S1_energy = SCF_done_energy+(E_1/27.2116) 
        S2_energy = SCF_done_energy+(E_2/27.2116) 
        S3_energy = SCF_done_energy+(E_3/27.2116) 
        
        df_E = pd.DataFrame(data=[S1_energy, S2_energy, S3_energy, SCF_done_energy], 
                            columns=[torsion_angle],
                            index=["S1_energy", "S2_energy", "S3_energy", "SCF_done_energy"]).T
        df_result = pd.concat([df_parameters, df_E], axis=1)
        
        df_output = pd.concat([df_output, df_result]).sort_index()

In [2]:
df_output

Unnamed: 0,f_1,S1,nm_1,f_2,S2,nm_2,f_3,S3,nm_3,S1_energy,S2_energy,S3_energy,SCF_done_energy
0,0.3274,3.6707,337.77,0.1445,4.0375,307.08,0.0277,4.5388,273.17,-632.01043,-631.996951,-631.978528,-632.145325
15,0.3156,3.6768,337.21,0.1608,4.0518,306.0,0.0518,4.542,272.98,-632.008801,-631.99502,-631.977006,-632.14392
30,0.2962,3.6851,336.45,0.1667,4.0712,304.54,0.0932,4.552,272.37,-632.004509,-631.99032,-631.972651,-632.139933
45,0.2654,3.7086,334.32,0.1556,4.0981,302.54,0.1434,4.5714,271.21,-631.997475,-631.983161,-631.965767,-632.133762
60,0.2058,3.7717,328.72,0.1314,4.1383,299.6,0.15,4.6083,269.05,-631.988094,-631.974622,-631.957349,-632.1267
120,0.2304,3.8648,320.8,0.0132,4.0745,304.29,0.0446,4.6747,265.22,-631.979147,-631.971441,-631.949384,-632.121175
135,0.297,3.7706,328.82,0.0094,4.0463,306.42,0.0124,4.5856,270.38,-631.98551,-631.975379,-631.95556,-632.124076
150,0.3297,3.7049,334.65,0.0222,4.0297,307.67,0.0305,4.5166,274.51,-631.990236,-631.9783,-631.960407,-632.126388
165,0.3445,3.6704,337.79,0.0381,4.0213,308.32,0.0425,4.4801,276.74,-631.992681,-631.979786,-631.962925,-632.127565
180,0.3514,3.6744,337.42,0.0471,4.0197,308.44,0.0465,4.4848,276.46,-631.992557,-631.979867,-631.962775,-632.127587
