In [2]:
import matplotlib.pyplot as plt
import numpy as np
import astropy.units as u
from datetime import datetime, timedelta
import pickle, json, sys, os, glob
import pandas as pd
pd.set_option("display.max_columns", None)

from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

# location of the scripts
sys.path.insert(0, os.path.join("/fefs/aswg/workspace/juan.jimenez/cosmic_ray_data_correction/scripts"))
import auxiliar as aux
import geometry as geom

# Paths and definitions

In [3]:
dcheck_root = "/fefs/aswg/workspace/abelardo.moralejo/data/datachecks/night_wise/DL1_datacheck_"

ws_database = "/fefs/aswg/workspace/juan.jimenez/cosmic_ray_data_correction/analysis_first_corrections/objects/WS2003-22_short.h5"

dir_objects = "/fefs/aswg/workspace/juan.jimenez/cosmic_ray_data_correction/analysis_weather/objects"

results_path = "/fefs/aswg/workspace/juan.jimenez/cosmic_ray_data_correction/bash_weather_data/RESULTS.txt"

create_data_dict = True

# Datachecks information

## `cosmics_intensity_spectrum`

'yyyymmdd', 'ra_tel', 'dec_tel', 'cos_zenith', 'az_tel', 'runnumber',
       'subrun', 'time', 'elapsed_time', 'corrected_elapsed_time',
       'cosmics_rate', 'cosmics_cleaned_rate', 'intensity_at_half_peak_rate',
       'ZD_corrected_intensity_at_half_peak_rate', 'cosmics_peak_rate',
       'ZD_corrected_cosmics_peak_rate', 'cosmics_rate_at_422_pe',
       'ZD_corrected_cosmics_rate_at_422_pe', 'cosmics_spectral_index',
       'ZD_corrected_cosmics_spectral_index', 'intensity_spectrum_fit_p_value',
       'intensity_at_reference_rate', 'diffuse_nsb_std',
       'num_star_affected_pixels', 'anomalous_low_intensity_peak'

## `runsummary`

'runnumber', 'time', 'elapsed_time', 'min_altitude', 'mean_altitude',
       'max_altitude', 'min_azimuth', 'max_azimuth', 'mean_azimuth', 'mean_ra',
       'mean_dec', 'num_cosmics', 'num_pedestals', 'num_flatfield',
       'num_unknown_ucts_trigger_tags', 'num_wrong_ucts_tags_in_cosmics',
       'num_wrong_ucts_tags_in_pedestals', 'num_wrong_ucts_tags_in_flatfield',
       'num_ucts_jumps', 'num_unknown_tib_trigger_tags',
       'num_wrong_tib_tags_in_cosmics', 'num_wrong_tib_tags_in_pedestals',
       'num_wrong_tib_tags_in_flatfield', 'num_pedestals_after_cleaning',
       'num_contained_mu_rings', 'ff_charge_mean', 'ff_charge_mean_err',
       'ff_charge_stddev', 'ff_time_mean', 'ff_time_mean_err',
       'ff_time_stddev', 'ff_rel_time_stddev', 'ped_charge_mean',
       'ped_charge_mean_err', 'ped_charge_stddev',
       'ped_fraction_pulses_above10', 'ped_fraction_pulses_above30',
       'cosmics_fraction_pulses_above10', 'cosmics_fraction_pulses_above30',
       'mu_effi_mean', 'mu_effi_stddev', 'mu_width_mean', 'mu_width_stddev',
       'mu_hg_peak_sample_mean', 'mu_hg_peak_sample_stddev',
       'mu_intensity_mean', 'mean_number_of_pixels_nearby_stars'
       
## Weather Station data

'sun_alt', 'sun_az', 'fBits', 'mjd', 'temperature', 'pressure',
       'windDirection', 'humidity', 'windSpeedCurrent', 'windGust',
       'windSpeedAverage', 'windDirectionAverage', 'tempSensor', 'tngDust',
       'tngSeeing', 'rain', 'state', 'Any', 'Mes', 'DP', 'diff1', 'is_dup',
       'temperatureR'

# Extracting dates and parameters of all runs/subruns

In [9]:
if create_data_dict:
    dchecks = glob.glob(dcheck_root + "*.h5")
    
    
    run, srun, time  = [], [], []
    telapsed, az, zd = [], [], []
    zd_i_a_h_p_r, zd_c_r_a_4, zd_d_c_r_a_4, zd_c_s_i = [], [], [], []
    i_a_h_p_r, c_r_a_4, d_c_r_a_4, c_s_i, l_y        = [], [], [], [], []
    for i, dcheck in enumerate(dchecks):
        if i % 10 == 0:
            print(f"{i}/{len(dchecks)}")

        ds = pd.read_hdf(dcheck, key="runsummary")
        di = pd.read_hdf(dcheck, key="cosmics_intensity_spectrum")
        
        for j in range(len(ds)):
            runref = ds["runnumber"].iloc[j]
            
            di_run = di.query(f"runnumber == {runref}")
            
            for k in range(len(di_run)):
                
                run.append(runref)
                srun.append(di["subrun"].iloc[k])
                az.append(ds["mean_azimuth"].iloc[j])
                zd.append(np.arccos(di["cos_zenith"].iloc[k]))
                time.append(datetime.fromtimestamp(di["time"].iloc[k]))
                telapsed.append(di["corrected_elapsed_time"].iloc[k])
                zd_i_a_h_p_r.append(di["ZD_corrected_intensity_at_half_peak_rate"].iloc[k])
                zd_c_r_a_4.append(di["ZD_corrected_cosmics_rate_at_422_pe"].iloc[k])
                zd_d_c_r_a_4.append(di["ZD_corrected_delta_cosmics_rate_at_422_pe"].iloc[k])
                zd_c_s_i.append(di["ZD_corrected_cosmics_spectral_index"].iloc[k])
                i_a_h_p_r.append(di["intensity_at_half_peak_rate"].iloc[k])
                c_r_a_4.append(di["cosmics_rate_at_422_pe"].iloc[k])
                d_c_r_a_4.append(di["delta_cosmics_rate_at_422_pe"].iloc[k])
                c_s_i.append(di["cosmics_spectral_index"].iloc[k])
                l_y.append(di["light_yield"].iloc[k])
                
        
    dict_dcheck = {
        "run" : np.array(run),
        "srun" : np.array(srun),
        "time" : np.array(time),
        "telapsed" : np.array(telapsed),
        "az" : np.rad2deg(az),
        "zd" : np.rad2deg(zd),
        "ZD_corrected_intensity_at_half_peak_rate" : np.array(zd_i_a_h_p_r),
        "ZD_corrected_cosmics_rate_at_422_pe" : np.array(zd_c_r_a_4),
        "ZD_corrected_delta_cosmics_rate_at_422_pe" : np.array(zd_d_c_r_a_4),
        "ZD_corrected_cosmics_spectral_index" : np.array(zd_c_s_i),
        "intensity_at_half_peak_rate" : np.array(i_a_h_p_r),
        "cosmics_rate_at_422_pe" : np.array(c_r_a_4),
        "delta_cosmics_rate_at_422_pe" : np.array(d_c_r_a_4),
        "cosmics_spectral_index" : np.array(c_s_i),
        "light_yield" : np.array(l_y)
    }
            
            
    # Saving the objects
    with open(dir_objects + "/data_dict.pkl", 'wb') as f:
        pickle.dump(dict_dcheck, f, pickle.HIGHEST_PROTOCOL)
else:
    with open(dir_objects + "/data_dict.pkl", 'rb') as f:
        dict_dcheck = pickle.load(f)    

0/494
10/494
20/494
30/494
40/494
50/494
60/494
70/494
80/494
90/494
100/494
110/494
120/494
130/494
140/494
150/494
160/494
170/494
180/494
190/494
200/494
210/494
220/494
230/494
240/494
250/494
260/494
270/494
280/494
290/494
300/494
310/494
320/494
330/494
340/494
350/494
360/494
370/494
380/494
390/494
400/494
410/494
420/494
430/494
440/494
450/494
460/494
470/494
480/494
490/494


In [10]:
df_ws = pd.read_hdf(ws_database)

dates = dict_dcheck["time"][:-3]

maxdate = np.max(dates)
mindate = np.min(dates)

# Assuming df_ws.index is already a NumPy array
dates_ws = np.array([datetime.fromisoformat(str(d).split(".")[0]) for d in df_ws.index])

maxdate_ws = np.max(dates_ws)

# Combine date filtering in NumPy
mask = (dates_ws > mindate) & (dates_ws < maxdate)
dates_ws = dates_ws[mask]
df_ws = df_ws[mask]

_index_ws   = [int(s.split(",")[0]) for s in np.loadtxt(results_path, dtype=str)]
_index_data = [int(s.split(",")[1]) for s in np.loadtxt(results_path, dtype=str)]

_index_ws, _index_data = aux.sortbased(_index_ws, _index_data)

In [11]:
if create_data_dict:
#     df_ws = pd.read_hdf(ws_database)

#     dates = dict_dcheck["time"]

#     maxdate = np.max(dates)
#     mindate = np.min(dates)

#     # Assuming df_ws.index is already a NumPy array
#     dates_ws = np.array([datetime.fromisoformat(str(d).split(".")[0]) for d in df_ws.index])

#     maxdate_ws = np.max(dates_ws)
        
#     # Combine date filtering in NumPy
#     mask = (dates_ws > mindate) & (dates_ws < maxdate)
#     dates_ws = dates_ws[mask]
#     df_ws = df_ws[mask]

#     _index_ws   = [int(s.split(",")[0]) for s in np.loadtxt(results_path, dtype=str)]
#     _index_data = [int(s.split(",")[1]) for s in np.loadtxt(results_path, dtype=str)]

#     _index_ws, _index_data = aux.sortbased(_index_ws, _index_data)


    index_ws, index_data = [], []
    for i in range(len((dates))):
        
        if i < 690675:
            date = dates[i]

            if date <= maxdate_ws:
                index_ws.append(i)
            else:
                index_ws.append(None)
                
            index_data.append(_index_data[i])    
            
        else:
            index_ws.append(None)

            
    
    dict_dcheck["index_ws"] = np.array(index_ws)


    temperature, pressure, humidity = [], [], []
    tngDust, tngSeeing, rain        = [], [], []
    for i in range(len(dates)):
        if i % 50000 == 0:
            print(f"{i}/{len(dates)}")
        if index_ws[i] != None:
            temperature.append(df_ws.iloc[index_ws[i]]["temperature"])
            pressure.append(df_ws.iloc[index_ws[i]]["pressure"])
            humidity.append(df_ws.iloc[index_ws[i]]["humidity"])
            tngDust.append(df_ws.iloc[index_ws[i]]["tngDust"])
            tngSeeing.append(df_ws.iloc[index_ws[i]]["tngSeeing"])
            rain.append(df_ws.iloc[index_ws[i]]["rain"])
        else:
            temperature.append(None)
            pressure.append(None)
            humidity.append(None)
            tngDust.append(None)
            tngSeeing.append(None)
            rain.append(None)

    dict_dcheck["temperature"] = np.array(temperature)
    dict_dcheck["pressure"]    = np.array(pressure)
    dict_dcheck["humidity"]    = np.array(humidity)
    dict_dcheck["tngDust"]     = np.array(tngDust)
    dict_dcheck["tngSeeing"]   = np.array(tngSeeing)
    dict_dcheck["rain"]        = np.array(rain)
    
    with open(dir_objects + "/data_dict.pkl", 'wb') as f:
        pickle.dump(dict_dcheck, f, pickle.HIGHEST_PROTOCOL)

0/696142
50000/696142
100000/696142
150000/696142
200000/696142
250000/696142
300000/696142
350000/696142
400000/696142
450000/696142
500000/696142
550000/696142
600000/696142
650000/696142
