When using the cluster, the simulations can't be outputed in subdirectories according to slope, and thus loading the data into json files requires a different process.

In [1]:
import numpy as np
import json
import ROOT as r
import os
from ReadKTOutputFile import *
from root_numpy import tree2array
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

Welcome to JupyROOT 6.18/00
        _                __     
  _____(_)________ _____/ /___ _
 / ___/ / ___/ __ `/ __  / __ `/
/ /__/ / /__/ /_/ / /_/ / /_/ / 
\___/_/\___/\__,_/\__,_/\__,_/  



2020-01-21T21:45:04[32m[INFO    ] CicadaPy(50) -> [35mAll set![0m


In [2]:
def LoadFilelist(mypath, search_str = '.'):
    '''
    Get list of all files fullfilling a few naming conditions
    '''
    filelist = []
    print('Searching files in {} with "{}" in filename'.format(mypath, search_str))
    for (dirpath, dirnames, filenames) in os.walk(mypath):
        for name in filenames:
            if '.root' in name and search_str in name:
                filelist.append(name)
            elif '.json' in name and search_str in name:
                filelist.append(name)
    return filelist

def GetEventIdFromFilename(filename):
    splitted_name = filename.replace('.', '_')
    splitted_name = splitted_name.split('_')
    for s in splitted_name:
        #print(s)
        if s.isdigit():
            break
    return s

# get list of files

def get_path_list(paths, slopes):
    list = []
    for path in paths:
        for slope in slopes:
            list.append(path + str(slope))
    return list

def read_root(path_to_sim, branch_name):
    f = r.TFile.Open(path_to_sim, 'read')
    
    tree=f.Get("Event_0")
    
    start_freq_true = tree2array(tree, branches=[branch_name])[0][0]
    
    return start_freq_true

# get the list of slopes from the name of the root files
# assuming a file name with the format: '[type]_event_[slope]_[simnumber].root'
# if a different format is used then this function will need to be changed

def get_slope(event_file):
    pre_index = (event_file.find('_'))+1
    first_index = event_file.find('_', pre_index)+1
    last_index = event_file.find('_', first_index)
    slope = float(event_file[first_index:last_index])
    return slope

In [3]:
#CHANGE THIS!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
list_of_directories = ['../Testing_1directory_slopescan19/', '../Testing_1directory_slopescan20/']
# list_of_slopes = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6,
#                   0.7, 0.8, 0.9, 1.0, 1.1]
path_list = list_of_directories
#path_list = get_path_list(list_of_directories, list_of_slopes)
TrackAndEventFileList = []
SimulatedEventFileList = []
SNRFileList = []
k_index = []
simulated_snr = []
simulated_snr_event = []
simulated_slope = []
simulated_slope_event = []
start_frequencies = []
true_start_frequencies = []
true_track_times = []
reconstructed_track_times = []
for path in path_list:
    #Track and event file and simulated event file
    TrackAndEventFileList.append(sorted(LoadFilelist(path, 'reconstructed_event')))
    SimulatedEventFileList.append(sorted(LoadFilelist(path, 'simulated_event')))
    SNRFileList.append(sorted(LoadFilelist(path, 'snr_and_power_and_slope')))
    #print(SNRFileList)

Searching files in ../Testing_1directory_slopescan19/ with "reconstructed_event" in filename
Searching files in ../Testing_1directory_slopescan19/ with "simulated_event" in filename
Searching files in ../Testing_1directory_slopescan19/ with "snr_and_power_and_slope" in filename
Searching files in ../Testing_1directory_slopescan20/ with "reconstructed_event" in filename
Searching files in ../Testing_1directory_slopescan20/ with "simulated_event" in filename
Searching files in ../Testing_1directory_slopescan20/ with "snr_and_power_and_slope" in filename


In [4]:
print(SNRFileList)

[['snr_and_power_and_slope_0.1.json', 'snr_and_power_and_slope_0.4.json', 'snr_and_power_and_slope_0.7.json', 'snr_and_power_and_slope_1.0.json'], ['snr_and_power_and_slope_0.1.json', 'snr_and_power_and_slope_0.4.json', 'snr_and_power_and_slope_0.7.json', 'snr_and_power_and_slope_1.0.json']]


In [5]:
# check events are in files
    
for each_slope, each_path in zip(TrackAndEventFileList, path_list):
    k_id = []
    for i in range(len(each_slope)):
        try:
            start_times = ReadKTOutputFile(os.path.join(each_path, each_slope[i]), var='StartTimeInRunC')
            #print(start_times)
        except Exception as e:
            #print(e) # you ll probably get an error when no events are present in the file. if you 're sure that is the error, you can comment this line.
            continue

        file_id = i
        if len(start_times) > 0:
            k_id.append(file_id)
    k_index.append(k_id)

    #load reconstructed events' start frequencies
    for each in each_slope:
        try:
            start_frequency = ReadKTOutputFile(os.path.join(each_path, each), var='StartFrequency')
            #print(start_frequency)
        except Exception as e:
            #print(e) #comment out after making sure the code works
            continue
        if len(start_frequency) != 0:
            start_frequencies.append(start_frequency)
            
    #load json file with simulated snrs
    snr_run_list = []
    for each_snr in SNRFileList[0]:
        snr_and_power_file = each_path + each_snr
        with open(snr_and_power_file) as infile:
            a = json.load(infile)
        snr_run_list.append(a['snr'])
    simulated_snr.append(np.asarray(snr_run_list).flatten())
    
        #print(simulated_snr)
        
#load the simulated slopes
for each_sim in SimulatedEventFileList:
    slope_run_list = []
    for sim in each_sim:
        slope_run_list.append(get_slope(sim))
    simulated_slope.append(slope_run_list)

#select the simulated slopes and snrs associated with the events successfully reconstructed
for each_path, each_sim, k_id, slope, snr in zip(path_list, SimulatedEventFileList, k_index, simulated_slope, simulated_snr):
    slope_run_list_event = np.asarray(slope)[k_id]
    simulated_slope_event.append(slope_run_list_event.tolist())
    snr_run_list_event = np.asarray(snr)[k_id]
    simulated_snr_event.append(snr_run_list_event.tolist())
        
# print(simulated_slope)
# print(simulated_slope_event)
# print(simulated_snr)
# print(simulated_snr_event)

Error: no tree multiTrackEvents in file
Error: no tree multiTrackEvents in file
Error: no tree multiTrackEvents in file
Error: no tree multiTrackEvents in file
Error: no tree multiTrackEvents in file
Error: no tree multiTrackEvents in file


In [7]:
print(start_frequencies)
print(simulated_slope_event)

[[56409004.40705187], [56318359.37499995], [56030273.43749994], [56416829.42708318]]
[[0.1, 0.4], [0.4, 0.4]]


In [8]:
#converting all of the lists to arrays
simulated_snr = np.asarray(simulated_snr)
simulated_snr_event = np.asarray(simulated_snr_event).flatten()
simulated_slope = np.asarray(simulated_slope)
simulated_slope_event = np.asarray(simulated_slope_event).flatten()
reconstructed_freq = np.asarray(start_frequencies)

In [9]:
#load true start frequencies

for each_sim, each_path, each_rec in zip(SimulatedEventFileList, path_list, TrackAndEventFileList):
    for i in range(len(each_sim)):
        try:
            true_start_frequencies.append(read_root(os.path.join(each_path, each_sim[i]), 'StartFrequencies'))
        except Exception as e:
            print(e)
            print(each_path+each_sim[i])
            true_start_frequencies.append(np.nan)
            continue

In [13]:
print(true_start_frequencies[0])

[2.59064051e+10]


In [14]:
#load true start times

for each_sim, each_path, each_rec in zip(SimulatedEventFileList, path_list, TrackAndEventFileList):
    for i in range(len(each_sim)):
        try:
             true_track_times.append(read_root(os.path.join(each_path, each_sim[i]), 'StartTimes'))
        except Exception as e:
            print(e)
            print(each_path+each_sim[i])
            true_track_times.append(np.nan)
            continue

In [18]:
print(len(true_track_times))

16


In [16]:
#load reconstructed start times

for each_sim, each_path, each_rec in zip(SimulatedEventFileList, path_list, TrackAndEventFileList):
    for i in range(len(each_rec)):
        try:
            reconstructed_track_times.append(ReadKTOutputFile(os.path.join(each_path, each_rec[i]), 'StartTimeInRunC', objectType='TProcessedTrackData', name='procTracks:Track'))
        except Exception as e:
            print(e)
            print(each_path+each_rec[i])
            reconstructed_track_times.append(np.nan)
            continue

In [20]:
print(reconstructed_track_times)

[[0.0057548799999999995], [0.005632, 0.0166912], [0.00595968], [0.0060416], [0.00534528, 0.00546816], [0.00595968], [0.00526336]]


In [21]:
rec_freq = np.ndarray.tolist((reconstructed_freq+24.5e9+1.4e9-50e6)/1e3)

In [22]:
print(len(rec_freq))
print(len(true_start_frequencies))
print(len(true_track_times))
print(len(reconstructed_track_times))

4
16
16
7


In [24]:
#getting the true start frequencies and selecting the datapoints that correspond to reconstructed events
sub_event_ids = k_index
real_freq = []
for event in true_start_frequencies:
    real_freq.append(np.min(event))

true_freq_simulated = np.asarray(real_freq)/1e3

true_freq = []
true_start_times = []
reconstructed_start_times = []
for each_run in sub_event_ids:
    for i in each_run:
        true_freq.append(true_freq_simulated[i])
        true_start_times.append(list(true_track_times[i]))
        reconstructed_start_times.append(list(reconstructed_track_times[i]))
#true_freq = np.asarray(true_freq)

In [25]:
print(len(true_freq_simulated.flatten()))
print(len(true_freq))

print(len(true_start_times))
print(len(reconstructed_start_times))

16
4
4
4


In [32]:
simulated_slope_event = simulated_slope_event.tolist()
simulated_snr_event = simulated_snr_event.tolist()

In [33]:
#storing data in json files
with open('../Testing_1directory_slopescan20/true_frequencies.json', 'w') as true_freq_file:
    json.dump(true_freq, true_freq_file)
with open('../Testing_1directory_slopescan20/reconstructed_frequencies.json', 'w') as rec_freq_file:
    json.dump(rec_freq, rec_freq_file)

with open('../Testing_1directory_slopescan20/simulated_slope.json', 'w') as sim_slope_file:
    json.dump(np.ndarray.tolist(simulated_slope), sim_slope_file)
with open('../Testing_1directory_slopescan20/simulated_slope_event.json', 'w') as sim_slope_event_file:
    json.dump(simulated_slope_event, sim_slope_event_file)

with open('../Testing_1directory_slopescan20/simulated_snr.json', 'w') as sim_snr_file:
    json.dump(np.ndarray.tolist(simulated_snr), sim_snr_file)
with open('../Testing_1directory_slopescan20/simulated_snr_event.json', 'w') as sim_snr_event_file:
    json.dump(simulated_snr_event, sim_snr_event_file)

with open('../Testing_1directory_slopescan20/simulated_start_times.json', 'w') as sim_start_times_file:
    json.dump(true_start_times, sim_start_times_file)
with open('../Testing_1directory_slopescan20/reconstructed_start_times.json', 'w') as rec_start_times_file:
    json.dump(reconstructed_start_times, rec_start_times_file)

In [14]:
#load reconstructed snr and slope files that are known to have reconstructed events
reconstructed_snr = []
reconstructed_slope = []
for each_path, each_rec in zip(path_list, TrackAndEventFileList):
    for each in each_rec:
        reconstructed_slope.append(ReadKTOutput(os.path.join(each_path, each), var='firstTrackSlope'))
        resonstructed_snr.append(ReadKTOutput(os.path.join(each_path, each), var=))
        
#do I need to load in the actual simulated frequencies and compare track to track, or just compare the first track slopes and snrs?

SyntaxError: invalid syntax (<ipython-input-14-3f70b7c3bd06>, line 6)