# Processing Hot Structure Files

Made for processing hot structures of a fragmentation channel for the QC experiment. Need to adapt for future implementation

In [1]:
import numpy as np
import gued_theory as gt
import matplotlib.pyplot as plt
import glob
import pandas as pd

%matplotlib widget

## Load Hot structures
Takes approx 1 min

In [2]:
file_path = "C:\\Users\\laure\\OneDrive - University of Nebraska-Lincoln\\Documents\\Centurion Lab\\QC data and code\\hot_structures\\"
file_names = ["ethanol", "CP", "NBD", "QC"]
file_type = ".xyz"


structure_dict = {key: {"coordinates": [], "atom_sums": [],} for key in file_names}

for file_name in file_names:
    coors, atom_sum = gt.load_hot_xyz(file_path, file_name, file_type)
    structure_dict[file_name]["coordinates"] = coors
    structure_dict[file_name]["atom_sums"] = atom_sum

## Get I for all hot structurs and find mean
Takes ~ 15 minutes

In [3]:
I_totals = {key: [] for key in file_names}
I_stds = {key: [] for key in file_names}

for key in file_names:
    print(f"Calculating I total for all {key} structures")
    all_I = []
    count = 0
    for structure in structure_dict[key]["coordinates"]:
        I_tot, I_at, I_mol, s_temp = gt.get_I_xray(structure, structure_dict[key]['atom_sums'])
        all_I.append(np.array(I_tot))
        count+=1 
        if count%1000 == 0:
            print(f"Processed {count}")
    I_totals[key] = np.mean(all_I, axis=0)
    I_stds[key] = np.std(all_I, axis=0)



Calculating I total for all ethanol structures
Processed 1000
Processed 2000
Processed 3000
Processed 4000
Processed 5000
Processed 6000
Processed 7000
Processed 8000
Processed 9000
Processed 10000
Calculating I total for all CP structures
Processed 1000
Processed 2000
Processed 3000
Processed 4000
Processed 5000
Processed 6000
Processed 7000
Processed 8000
Processed 9000
Processed 10000
Calculating I total for all NBD structures
Processed 1000
Processed 2000
Processed 3000
Processed 4000
Processed 5000
Processed 6000
Processed 7000
Processed 8000
Processed 9000
Processed 10000
Calculating I total for all QC structures
Processed 1000
Processed 2000
Processed 3000
Processed 4000
Processed 5000
Processed 6000
Processed 7000
Processed 8000
Processed 9000
Processed 10000


## Get I0 from Trajectories
Takes ~ 11 minutes

In [5]:
# Define the folder path to the trajectory files
ids = ["NB", "QC"]
folders = ["Singlet_2", "Singlet_3", "Singlet_4"]
I0_all = []


def process_trajectory(folder, mol_name, file_type):
    print(f"getting trajectory for {folder[-6:-2]}")
    xyz, at_sum, counts = gt.load_time_evolving_xyz(folder, mol_name, file_type)
    print(at_sum)
    I0, I_at0, I_mol0, _ = gt.get_I_xray(xyz[0], at_sum)
    return I0


path_traj = f"C:/Users/laure/OneDrive - University of Nebraska-Lincoln/Documents/Centurion Lab/QC data and code/Theory Structures/*/*/*/"

traj_folder = glob.glob(path_traj)
# sort the folder names in order of trajectory number

print(f"Processing {len(traj_folder)} trajectory files.")
mol_name = "output"
file_type = ".xyz"
for file in traj_folder:
    I0 = process_trajectory(file, mol_name, file_type)
    I0_all.append(np.array(I0))

I0_all = np.array(I0_all)
print(I0_all.shape)
I0_mean = np.mean(np.array(I0_all), axis=0)

Processing 239 trajectory files.
getting trajectory for 0008
15
getting trajectory for 0020
15
getting trajectory for 0037
15
getting trajectory for 0068
15
getting trajectory for 0070
15
getting trajectory for 0079
15
getting trajectory for 0090
15
getting trajectory for 0107
15
getting trajectory for 0115
15
getting trajectory for 0128
15
getting trajectory for 0142
15
getting trajectory for 0149
15
getting trajectory for 0236
15
getting trajectory for 0237
15
getting trajectory for 0238
15
getting trajectory for 0267
15
getting trajectory for 0275
15
getting trajectory for 0278
15
getting trajectory for 0285
15
getting trajectory for 0308
15
getting trajectory for 0354
15
getting trajectory for 0372
15
getting trajectory for 0398
15
getting trajectory for 0409
15
getting trajectory for 0489
15
getting trajectory for 0499
15
getting trajectory for 0512
15
getting trajectory for 0516
15
getting trajectory for 0524
15
getting trajectory for 0530
15
getting trajectory for 0541
15
gettin

In [8]:
key_words = ["NB", "QC", "frags"]
dI_I_hot = {key: [] for key in key_words}
dI_I_hot["QC"] = (I_totals["QC"]-I0_mean)/I0_mean
dI_I_hot["NB"] = (I_totals["NBD"]-I0_mean)/I0_mean
dI_I_hot["frags"] = ((I_totals["CP"]+I_totals["ethanol"])-I0_mean)/I0_mean

In [9]:
for key in key_words:
    file_name = key+"_xray.txt"
    frag_dict = {"s_values": s_temp, "dI_I": dI_I_hot[key]}
    frag_df = pd.DataFrame(frag_dict)
    frag_df.to_csv(file_name, sep='\t', index=False)