In [1]:
import codecs
import glob
import math
import os
from decimal import Decimal
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import cm
%matplotlib inline


data_dir = "data/input/salhy_TD/*.out"
result_dir = data_dir.replace("*.out", "")
paths = [p for p in glob.glob(data_dir)]

excited_state_1 = "Excited State   1"
excited_state_2 = "Excited State   2"
excited_state_3 = "Excited State   3"

SCF_done_key = "SCF Done"
initial_key_1 = "Excited State   1"
end_key_1 = "SavETr"
df_output = pd.DataFrame()

for path in paths:
    # file_nameから角度の情報を取る
    file_name = path.split('/')[-1]

    with codecs.open(path, "r", "UTF-8", "ignore") as file:
        df = pd.read_table(file)
        df = df.rename(columns={df.columns[0]: 'Col_1'})
        # initial_key_1を含む行を探す
        initial_key_1_df = df[df['Col_1'].str.contains(initial_key_1, case=False)]
        initial_key_1_index = initial_key_1_df.index[0]
        df_1 = df.iloc[initial_key_1_index:]
        # initial_key_1を含む行以降のend_key_1を含む行を抜き出す
        end_key_1_df = df_1[df_1['Col_1'].str.contains(end_key_1)]
        # dfを更新しているのでここでの行番号は0から始まる
        end_key_1_index = end_key_1_df.index[0] - initial_key_1_index
        df_1 = df_1.iloc[:end_key_1_index]
        # 該当行の抜き出しが完了
        state_1_index = df_1[df_1['Col_1'].str.contains(excited_state_1)].index[0] - df_1[df_1['Col_1'].str.contains(excited_state_1)].index[0]
        state_2_index = df_1[df_1['Col_1'].str.contains(excited_state_2)].index[0] - df_1[df_1['Col_1'].str.contains(excited_state_1)].index[0]
        state_3_index = df_1[df_1['Col_1'].str.contains(excited_state_3)].index[0] - df_1[df_1['Col_1'].str.contains(excited_state_1)].index[0]
        
#         S1_data = [a for a in df_1[df_1['Col_1'].str.contains(excited_state_1)]['Col_1'].iloc[0].split(' ') if a != '']
        S1_data = df_1[state_1_index:state_2_index]
        f_1 = float(S1_data['Col_1'].iloc[0].split("f=")[1].split()[0])
        E_1 = float(S1_data['Col_1'].iloc[0].split("eV")[0].split()[-1])
        nm_1 = float(S1_data['Col_1'].iloc[0].split("nm")[0].split()[-1])
        S1_Configs = ""
        S1_Configs_arr = S1_data[S1_data["Col_1"].str.contains("->")].values
        if len(S1_Configs_arr) == 1:
            S1_Configs = S1_Configs_arr[0][0]
        elif len(S1_Configs_arr) > 1:
            for i in range(len(S1_Configs_arr)):
                S1_Configs += S1_Configs_arr[i][0] + ", "
        S1_Configs = S1_Configs.replace("->", "→")
                
        S2_data = df_1[state_2_index:state_3_index]
        f_2 = float(S2_data['Col_1'].iloc[0].split("f=")[1].split()[0])
        E_2 = float(S2_data['Col_1'].iloc[0].split("eV")[0].split()[-1])
        nm_2 = float(S2_data['Col_1'].iloc[0].split("nm")[0].split()[-1])
        S2_Configs = ""
        S2_Configs_arr = S2_data[S2_data["Col_1"].str.contains("->")].values
        if len(S2_Configs_arr) == 1:
            S2_Configs = S2_Configs_arr[0][0]
        elif len(S2_Configs_arr) > 1:
            for i in range(len(S2_Configs_arr)):
                S2_Configs += S2_Configs_arr[i][0] + ", "
        S2_Configs = S2_Configs.replace("->", "→")
        
        S3_data = df_1[state_3_index:]
        f_3 = float(S3_data['Col_1'].iloc[0].split("f=")[1].split()[0])
        E_3 = float(S3_data['Col_1'].iloc[0].split("eV")[0].split()[-1])
        nm_3 = float(S3_data['Col_1'].iloc[0].split("nm")[0].split()[-1])
        S3_Configs = ""
        S3_Configs_arr = S3_data[S3_data["Col_1"].str.contains("->")].values
        if len(S3_Configs_arr) == 1:
            S3_Configs = S3_Configs_arr[0][0]
        elif len(S3_Configs_arr) > 1:
            for i in range(len(S3_Configs_arr)):
                S3_Configs += S3_Configs_arr[i][0] + ", "
        S3_Configs = S3_Configs.replace("->", "→")
        
        
        data_list = [S1_Configs, f_1, nm_1, E_1, S2_Configs, f_2, nm_2, E_2, S3_Configs, f_3, nm_3, E_3]
        df_parameters = pd.DataFrame(data=data_list, 
                            columns=[file_name],
                            index=["S1 Contributing Configurations and CI coefficients", "S1 Oscillator strengh", "S1 λ(calcd.)", "S1 transition Energy (eV)", "S2 Contributing Configurations and CI coefficients", "S2 Oscillator strengh", "S2 λ(calcd.)", "S2 transition Energy (eV)", "S3 Contributing Configurations and CI coefficients", "S3 Oscillator strengh", "S3 λ(calcd.)", "S3 transition Energy (eV)"]).T
        
        
        
        SCF_done_energy = float([a for a in df[df['Col_1'].str.contains(SCF_done_key)]['Col_1'].iloc[0].split(' ') if a != ''][4])
        # 各エネルギー値の抜き出し
        S1_energy = SCF_done_energy+(E_1/27.2116) 
        S2_energy = SCF_done_energy+(E_2/27.2116) 
        S3_energy = SCF_done_energy+(E_3/27.2116) 
        
        df_E = pd.DataFrame(data=[S1_energy, S2_energy, S3_energy, SCF_done_energy], 
                            columns=[file_name],
                            index=["S1_energy", "S2_energy", "S3_energy", "SCF_done_energy"]).T
        df_result = pd.concat([df_parameters, df_E], axis=1)
        
        df_output = pd.concat([df_output, df_result]).sort_index()

In [2]:
df_output.to_csv(result_dir + "result.csv")

In [3]:
df_output

Unnamed: 0,S1 Contributing Configurations and CI coefficients,S1 Oscillator strengh,S1 λ(calcd.),S1 transition Energy (eV),S2 Contributing Configurations and CI coefficients,S2 Oscillator strengh,S2 λ(calcd.),S2 transition Energy (eV),S3 Contributing Configurations and CI coefficients,S3 Oscillator strengh,S3 λ(calcd.),S3 transition Energy (eV),S1_energy,S2_energy,S3_energy,SCF_done_energy
C2_EEEE_opt_td.out,68 → 69 0.69413,0.3655,359.5,3.4488,"67 → 69 0.60160, 68 → 70 ...",0.0422,306.56,4.0444,"66 → 69 0.63706, 68 → 71 ...",0.0143,287.96,4.3056,-841.251966,-841.230078,-841.220479,-841.378706
C2_EEEZ_opt_td.out,68 → 69 0.69353,0.355,357.22,3.4708,"67 → 69 0.61143, 68 → 70 ...",0.0481,306.27,4.0482,"66 → 69 0.65537, 68 → 71 ...",0.0491,286.02,4.3348,-841.248149,-841.22693,-841.216398,-841.375698
C2_ZEEE_opt_td.out,68 → 69 0.69451,0.2936,349.2,3.5505,"67 → 69 0.56175, 68 → 71 ...",0.0418,299.11,4.1451,"66 → 69 -0.36903, 68 → 70 ...",0.0323,286.23,4.3316,-841.242374,-841.220523,-841.213669,-841.372852
C2_ZEEZ_opt_td.out,68 → 69 0.69329,0.2975,327.36,3.7874,"67 → 69 0.56864, 68 → 71 ...",0.0357,294.82,4.2055,"64 → 69 0.13706, 65 → 69 ...",0.0012,277.64,4.4657,-841.228418,-841.213054,-841.203491,-841.367602
NpOH_C2_EEEE_td.out,64 → 65 0.69284,0.3689,353.24,3.5099,"63 → 65 0.59788, 64 → 66 ...",0.0395,303.71,4.0823,"61 → 65 0.33509, 62 → 65 ...",0.009,286.48,4.3279,-801.940878,-801.919843,-801.910817,-802.069863
NpOH_C2_EEEZ_td.out,64 → 65 0.69199,0.3532,350.35,3.5388,"63 → 65 0.60510, 64 → 66 ...",0.0445,303.18,4.0894,"62 → 65 0.65685, 64 → 67 ...",0.044,284.05,4.3649,-801.93677,-801.916536,-801.906412,-802.066818
NpOH_C2_ZEEE_syn_opt_td.out,64 → 65 0.69429,0.3291,350.78,3.5345,"63 → 65 0.62333, 64 → 67 ...",0.0766,304.96,4.0656,"62 → 65 0.21126, 64 → 66 ...",0.0206,288.42,4.2987,-801.951287,-801.931769,-801.923203,-802.081176
NpOH_C2_ZEZZ_syn_opt_td.out,64 → 65 0.69529,0.3018,350.77,3.5346,"63 → 65 0.63547, 64 → 67 ...",0.0687,305.69,4.0558,"61 → 65 0.67704, 61 → 66 ...",0.0004,284.85,4.3527,-801.951159,-801.932005,-801.921095,-802.081052
