In [57]:
import codecs
import glob
import math
import os
from decimal import Decimal
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import cm
%matplotlib inline


data_dir = "data/input/X-ray/0508GM/OMe/*.cif"
result_dir = data_dir.replace("*.cif", "")
paths = [p for p in glob.glob(data_dir)]

space_group_IT_number_key = "space_group_IT_number"
space_group_name_HM_alt_key = "space_group_name_H-M_alt"
cell_length_a_key = "cell_length_a"
cell_length_b_key = "cell_length_b"
cell_length_c_key = "cell_length_c"
cell_angle_alpha_key = "cell_angle_alpha"                
cell_angle_beta_key = "cell_angle_beta"             
cell_angle_gamma_key = "cell_angle_gamma"              
cell_volume_key = "cell_volume"                  
cell_formula_units_Z_key = "cell_formula_units_Z"           
cell_measurement_temperature_key = "cell_measurement_temperature"    
exptl_crystal_description_key = "exptl_crystal_description"    
exptl_crystal_colour_key = "exptl_crystal_colour"       
refine_ls_R_factor_gt_key = "refine_ls_R_factor_gt" 
refine_ls_wR_factor_ref_key = "refine_ls_wR_factor_ref"

# 追加
exptl_crystal_density_diffrn_key = "exptl_crystal_density_diffrn" #密度 g/cm-3 Dcalcd
diffrn_reflns_av_R_equivalents_key = "diffrn_reflns_av_R_equivalents" #Rint
reflns_number_total_key = "reflns_number_total" #反射数
diffrn_reflns_theta_max_key = "diffrn_reflns_theta_max" #2倍したものを2θmaxとして抜き出す
refine_ls_restrained_S_all_key = "refine_ls_restrained_S_all" #GoFall
refine_diff_density_max_key = "refine_diff_density_max"
refine_diff_density_min_key = "refine_diff_density_min"

# ここから次の改行までの対称操作の数でZを割った値がZ'
space_group_symop_operation_xyz_key = "_space_group_symop_operation_xyz"



df_output = pd.DataFrame()
index_array = ["Space group IT number", 
               "Space group", 
               "a / Å", 
               "b / Å", 
               "c / Å", 
               "α / °",
               "β / °", 
               "γ / °", 
               "V / Å\u00B3", 
               "Dcalc / g cm-3",
               "Z", 
               "Z'",
               "T / K", 
               "2θ",
               "R", 
               "Rw", 
               "Rint",
               "GoFall",
               "reflns_number_total",
               "refine_diff_density_max",
               "refine_diff_density_min",
               "crystal description", 
               "crystal colour"]

for path in paths:
    # file_nameから角度の情報を取る
    file_name = path.split('/')[-1]

    with codecs.open(path, "r", "UTF-8", "ignore") as file:
        df = pd.read_table(file)
        df = df.rename(columns={df.columns[0]: 'Col_1'})
#         keyを含む行を探す
        space_group_IT_number = df[df['Col_1'].str.contains(space_group_IT_number_key, case=False)]['Col_1'].iloc[0].split()[-1]
        space_group_name_HM_alt = df[df['Col_1'].str.contains(space_group_name_HM_alt_key, case=False)]['Col_1'].iloc[0].split("'")[-2]
        cell_length_a = df[df['Col_1'].str.contains(cell_length_a_key, case=False)]['Col_1'].iloc[0].split()[-1]
        cell_length_b = df[df['Col_1'].str.contains(cell_length_b_key, case=False)]['Col_1'].iloc[0].split()[-1]
        cell_length_c = df[df['Col_1'].str.contains(cell_length_c_key, case=False)]['Col_1'].iloc[0].split()[-1]
        cell_angle_alpha = df[df['Col_1'].str.contains(cell_angle_alpha_key, case=False)]['Col_1'].iloc[0].split()[-1]
        cell_angle_beta = df[df['Col_1'].str.contains(cell_angle_beta_key, case=False)]['Col_1'].iloc[0].split()[-1]
        cell_angle_gamma = df[df['Col_1'].str.contains(cell_angle_gamma_key, case=False)]['Col_1'].iloc[0].split()[-1]
        cell_volume = df[df['Col_1'].str.contains(cell_volume_key, case=False)]['Col_1'].iloc[0].split()[-1]
        cell_formula_units_Z = df[df['Col_1'].str.contains(cell_formula_units_Z_key, case=False)]['Col_1'].iloc[0].split()[-1]
        cell_measurement_temperature = df[df['Col_1'].str.contains(cell_measurement_temperature_key, case=False)]['Col_1'].iloc[0].split()[-1]
        refine_ls_R_factor_gt = df[df['Col_1'].str.contains(refine_ls_R_factor_gt_key, case=False)]['Col_1'].iloc[0].split()[-1]
        refine_ls_wR_factor_ref = df[df['Col_1'].str.contains(refine_ls_wR_factor_ref_key, case=False)]['Col_1'].iloc[0].split()[-1]
        exptl_crystal_description = df[df['Col_1'].str.contains(exptl_crystal_description_key, case=False)]['Col_1'].iloc[0].split()[-1]
        exptl_crystal_colour = df[df['Col_1'].str.contains(exptl_crystal_colour_key, case=False)]['Col_1'].iloc[0].split()[-1]
        exptl_crystal_density_diffrn = df[df['Col_1'].str.contains(exptl_crystal_density_diffrn_key, case=False)]['Col_1'].iloc[0].split()[-1]
        diffrn_reflns_av_R_equivalents = df[df['Col_1'].str.contains(diffrn_reflns_av_R_equivalents_key, case=False)]['Col_1'].iloc[0].split()[-1]
        refine_ls_restrained_S_all = df[df['Col_1'].str.contains(refine_ls_restrained_S_all_key, case=False)]['Col_1'].iloc[0].split()[-1]
        reflns_number_total = df[df['Col_1'].str.contains(reflns_number_total_key, case=False)]['Col_1'].iloc[0].split()[-1]
        diffrn_reflns_theta_max = float(df[df['Col_1'].str.contains(diffrn_reflns_theta_max_key, case=False)]['Col_1'].iloc[0].split()[-1]) * 2
        
        refine_diff_density_max = df[df['Col_1'].str.contains(refine_diff_density_max_key, case=False)]['Col_1'].iloc[0].split()[-1]
        refine_diff_density_min = df[df['Col_1'].str.contains(refine_diff_density_min_key, case=False)]['Col_1'].iloc[0].split()[-1]
        
#         Z'の計算
        cell_length_a_index = df[df['Col_1'].str.contains(cell_length_a_key, case=False)]['Col_1'].index[0]
        space_group_symop_operation_xyz_index = df[df['Col_1'].str.contains(space_group_symop_operation_xyz_key, case=False)]['Col_1'].index[0]
        Z_prime = round(int(cell_formula_units_Z) / (cell_length_a_index - space_group_symop_operation_xyz_index - 1))
        
        
#         CSVデータで取り出し
        data_array = [space_group_IT_number, 
                      space_group_name_HM_alt, 
                      cell_length_a, 
                      cell_length_b, 
                      cell_length_c, 
                      cell_angle_alpha, 
                      cell_angle_beta, 
                      cell_angle_gamma, 
                      cell_volume, 
                      exptl_crystal_density_diffrn,
                      cell_formula_units_Z, 
                      Z_prime,
                      cell_measurement_temperature, 
                      diffrn_reflns_theta_max,
                      refine_ls_R_factor_gt, 
                      refine_ls_wR_factor_ref, 
                      diffrn_reflns_av_R_equivalents,
                      refine_ls_restrained_S_all,
                      reflns_number_total,
                      refine_diff_density_max,
                      refine_diff_density_min,
                      exptl_crystal_description, 
                      exptl_crystal_colour]
    
        df_result = pd.DataFrame(data_array, index=index_array, columns=[file_name])

        df_output = pd.concat([df_output, df_result], axis=1)
        
df_output.to_csv(result_dir + "output.csv")
        

In [58]:
df_output

Unnamed: 0,NpOMeC4.cif,NpOMeC5.cif,NpOMeC6.cif,NpOMeC6_UV.cif,NpOMeC7.cif,NpOMeC7_UV.cif,NpOMeC8.cif,NpOMeC9.cif,NpOMeC9_UV.cif
Space group IT number,19,14,14,2,19,19,14,19,19
Space group,P 21 21 21,P 21/c,P 21/c,P -1,P 21 21 21,P 21 21 21,P 21/c,P 21 21 21,P 21 21 21
a / Å,13.37250(10),19.9313(3),4.87110(10),8.1811(6),4.88890(10),4.9350(2),4.85440(10),4.8918(2),4.9505(2)
b / Å,13.80470(10),16.2804(2),12.3681(4),13.0367(11),12.1565(3),11.8683(6),12.7314(4),12.0309(4),11.8214(4)
c / Å,16.21460(10),19.9861(3),27.2911(8),16.2278(16),29.4745(7),29.9683(12),29.8074(7),32.7574(11),32.9829(10)
α / °,90,90,90,79.591(8),90,90,90,90,90
β / °,90,96.5510(14),94.187(2),81.297(7),90,90,93.395(2),90,90
γ / °,90,90,90,89.600(7),90,90,90,90,90
V / Å³,2993.27(4),6442.94(16),1639.80(8),1682.3(3),1751.73(7),1755.25(13),1838.96(8),1927.86(12),1930.22(12)
Dcalc / g cm-3,1.262,1.230,1.265,1.233,1.238,1.235,1.230,1.221,1.220


In [50]:
round(4.0)

4