In [1]:
import matplotlib.pyplot as plt
import numpy as np
import astropy.units as u
from datetime import datetime, timedelta
import pickle, json, sys, os, glob
import pandas as pd
pd.set_option("display.max_columns", None)

from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

# location of the scripts
sys.path.insert(0, os.path.join("/fefs/aswg/workspace/juan.jimenez/cosmic_ray_data_correction/scripts"))
import auxiliar as aux
import geometry as geom

# Paths and definitions

In [60]:
dcheck_root = "/fefs/aswg/workspace/abelardo.moralejo/data/datachecks/night_wise/DL1_datacheck_"

ws_database = "/fefs/aswg/workspace/juan.jimenez/cosmic_ray_data_correction/analysis_first_corrections/objects/WS2003-22_short.h5"

dir_objects = "/fefs/aswg/workspace/juan.jimenez/cosmic_ray_data_correction/analysis_weather/objects"

create_data_dict = True

# Extracting dates and parameters of all runs/subruns

In [None]:
if create_data_dict:
    dchecks = glob.glob(dcheck_root + "*.h5")
    
    
    run, srun, time  = [], [], []
    telapsed, az, zd = [], [], []
    i_a_h_p_r, c_r_a_4, d_c_r_a_4, c_s_i, l_y = [], [], [], [], []
    for i, dcheck in enumerate(dchecks):
        if i % 10 == 0:
            print(f"{i}/{len(dchecks)}")

        ds = pd.read_hdf(dcheck, key="runsummary")
        di = pd.read_hdf(dcheck, key="cosmics_intensity_spectrum")
        
        for j in range(len(ds)):
            runref = ds["runnumber"].iloc[j]
            
            di_run = di.query(f"runnumber == {runref}")
            
            for k in range(len(di_run)):
                
                run.append(runref)
                srun.append(di["subrun"].iloc[k])
                az.append(ds["mean_azimuth"].iloc[j])
                zd.append(ds["mean_altitude"].iloc[j])
                time.append(datetime.fromtimestamp(di["time"].iloc[k]))
                telapsed.append(di["elapsed_time"].iloc[k])
                i_a_h_p_r.append(di["ZD_corrected_intensity_at_half_peak_rate"].iloc[k])
                c_r_a_4.append(di["ZD_corrected_cosmics_rate_at_422_pe"].iloc[k])
                d_c_r_a_4.append(di["ZD_corrected_delta_cosmics_rate_at_422_pe"].iloc[k])
                c_s_i.append(di["ZD_corrected_cosmics_spectral_index"].iloc[k])
                l_y.append(di["light_yield"].iloc[k])
                
        
    dict_dcheck = {
        "run" : np.array(run),
        "srun" : np.array(srun),
        "time" : np.array(time),
        "telapsed" : np.array(telapsed),
        "az" : np.rad2deg(az),
        "zd" : np.rad2deg(zd),
        "ZD_corrected_intensity_at_half_peak_rate" : np.array(i_a_h_p_r),
        "ZD_corrected_cosmics_rate_at_422_pe" : np.array(c_r_a_4),
        "ZD_corrected_delta_cosmics_rate_at_422_pe" : np.array(d_c_r_a_4),
        "ZD_corrected_cosmics_spectral_index" : np.array(c_s_i),
        "light_yield" : np.array(l_y)
    }
            
            
    # Saving the objects
    with open(dir_objects + "/data_dict.pkl", 'wb') as f:
        pickle.dump(dict_dcheck, f, pickle.HIGHEST_PROTOCOL)
else:
    with open(dir_objects + "/data_dict.pkl", 'rb') as f:
        dict_dcheck = pickle.load(f)    

0/491
10/491
20/491
30/491
40/491
50/491
60/491
70/491
80/491
90/491
100/491
110/491
120/491
130/491
140/491
150/491
160/491
170/491
180/491
190/491
200/491
210/491


In [None]:
df_ws = pd.read_hdf(ws_database)

dates = dict_dcheck["time"]

In [None]:
maxdate = np.max(dates)
mindate = np.min(dates)

dates_ws = np.array([datetime.fromisoformat(str(d).split(".")[0]) for d in np.array(df.index)])

mask = [d > mindate and d < maxdate for d in dates_ws]

dates_ws = dates_ws[mask]
df_ws    = df_ws[mask]


indexes = []
for date in dates:
    deltas = np.abs([(date - d).total_seconds() for d in dates_ws])
    indexes.append(np.argmin(deltas))

In [None]:
if create_run_night_dict:
    dchecks = glob.glob(dcheck_root + "*.h5")
    
    dict_night_run = {}
    for i, dcheck in enumerate(dchecks):
        if i % 23 == 0:
            print(f"{i}/{len(dchecks)}")

        night = dcheck.split("_")[-1].split(".")[0]

        runs = np.unique(pd.read_hdf(dcheck, key="runsummary")["runnumber"])
        for run in runs:
            dict_night_run[run] = night

    # Saving the objects
    with open(dir_objects + "/runsubrun_entry_ws.pkl", 'wb') as f:
        pickle.dump(dict_night_run, f, pickle.HIGHEST_PROTOCOL)
else:
    with open(dir_objects + "/runsubrun_entry_ws.pkl", 'rb') as f:
        dict_night_run = pickle.load(f)    

# Datachecks information

## `cosmics_intensity_spectrum`

'yyyymmdd', 'ra_tel', 'dec_tel', 'cos_zenith', 'az_tel', 'runnumber',
       'subrun', 'time', 'elapsed_time', 'corrected_elapsed_time',
       'cosmics_rate', 'cosmics_cleaned_rate', 'intensity_at_half_peak_rate',
       'ZD_corrected_intensity_at_half_peak_rate', 'cosmics_peak_rate',
       'ZD_corrected_cosmics_peak_rate', 'cosmics_rate_at_422_pe',
       'ZD_corrected_cosmics_rate_at_422_pe', 'cosmics_spectral_index',
       'ZD_corrected_cosmics_spectral_index', 'intensity_spectrum_fit_p_value',
       'intensity_at_reference_rate', 'diffuse_nsb_std',
       'num_star_affected_pixels', 'anomalous_low_intensity_peak'

## `runsummary`

'runnumber', 'time', 'elapsed_time', 'min_altitude', 'mean_altitude',
       'max_altitude', 'min_azimuth', 'max_azimuth', 'mean_azimuth', 'mean_ra',
       'mean_dec', 'num_cosmics', 'num_pedestals', 'num_flatfield',
       'num_unknown_ucts_trigger_tags', 'num_wrong_ucts_tags_in_cosmics',
       'num_wrong_ucts_tags_in_pedestals', 'num_wrong_ucts_tags_in_flatfield',
       'num_ucts_jumps', 'num_unknown_tib_trigger_tags',
       'num_wrong_tib_tags_in_cosmics', 'num_wrong_tib_tags_in_pedestals',
       'num_wrong_tib_tags_in_flatfield', 'num_pedestals_after_cleaning',
       'num_contained_mu_rings', 'ff_charge_mean', 'ff_charge_mean_err',
       'ff_charge_stddev', 'ff_time_mean', 'ff_time_mean_err',
       'ff_time_stddev', 'ff_rel_time_stddev', 'ped_charge_mean',
       'ped_charge_mean_err', 'ped_charge_stddev',
       'ped_fraction_pulses_above10', 'ped_fraction_pulses_above30',
       'cosmics_fraction_pulses_above10', 'cosmics_fraction_pulses_above30',
       'mu_effi_mean', 'mu_effi_stddev', 'mu_width_mean', 'mu_width_stddev',
       'mu_hg_peak_sample_mean', 'mu_hg_peak_sample_stddev',
       'mu_intensity_mean', 'mean_number_of_pixels_nearby_stars'
       
## Weather Station data

'sun_alt', 'sun_az', 'fBits', 'mjd', 'temperature', 'pressure',
       'windDirection', 'humidity', 'windSpeedCurrent', 'windGust',
       'windSpeedAverage', 'windDirectionAverage', 'tempSensor', 'tngDust',
       'tngSeeing', 'rain', 'state', 'Any', 'Mes', 'DP', 'diff1', 'is_dup',
       'temperatureR'

In [5]:
d = "/fefs/aswg/workspace/abelardo.moralejo/data/datachecks/night_wise/DL1_datacheck_20201215.h5"

In [6]:
df = pd.read_hdf(d, key="runsummary")

In [7]:
dd = pd.read_hdf(d, key="cosmics_intensity_spectrum")

In [8]:
df[:3]

Unnamed: 0,runnumber,time,elapsed_time,min_altitude,mean_altitude,max_altitude,min_azimuth,max_azimuth,mean_azimuth,mean_ra,mean_dec,num_cosmics,num_pedestals,num_flatfield,num_unknown_ucts_trigger_tags,num_wrong_ucts_tags_in_cosmics,num_wrong_ucts_tags_in_pedestals,num_wrong_ucts_tags_in_flatfield,num_ucts_jumps,num_unknown_tib_trigger_tags,num_wrong_tib_tags_in_cosmics,num_wrong_tib_tags_in_pedestals,num_wrong_tib_tags_in_flatfield,num_pedestals_after_cleaning,num_contained_mu_rings,ff_charge_mean,ff_charge_mean_err,ff_charge_stddev,ff_time_mean,ff_time_mean_err,ff_time_stddev,ff_rel_time_stddev,ped_charge_mean,ped_charge_mean_err,ped_charge_stddev,ped_fraction_pulses_above10,ped_fraction_pulses_above30,cosmics_fraction_pulses_above10,cosmics_fraction_pulses_above30,mu_effi_mean,mu_effi_stddev,mu_width_mean,mu_width_stddev,mu_hg_peak_sample_mean,mu_hg_peak_sample_stddev,mu_intensity_mean,mean_number_of_pixels_nearby_stars
0,3264,1608067000.0,1193.087857,0.612275,0.649045,0.687155,1.427711,1.461961,1.444479,83.959048,22.24897,5233250,109512,110256,30462,0.0,0.0,109627.0,0,28434,0.0,43.0,109632.0,500,2606,73.406534,0.067816,10.001201,19.192501,0.017123,1.045364,0.399294,2.30236,0.004139,1.847598,0.001345,1.882772e-07,0.007057,0.000786,0.171555,0.013648,0.069318,0.019417,17.097467,1.349948,2115.737793,210.84466
1,3265,1608068000.0,899.054521,0.697147,0.725299,0.754061,1.466593,1.4934,1.479797,83.961934,22.248169,4815995,86047,86041,201,0.0,0.0,86033.0,0,0,0.0,0.0,86041.0,209,2256,74.187825,0.068549,10.04551,19.954806,0.017108,1.045793,0.397761,2.243048,0.004239,1.798347,0.001125,1.90186e-07,0.004874,0.000758,0.171263,0.014112,0.068883,0.020677,17.825798,1.479878,2119.459717,210.126316
2,3266,1608069000.0,882.607903,0.768983,0.796811,0.824714,1.512324,1.540423,1.526249,83.259897,21.789253,4486943,84669,84764,186,0.0,0.0,84763.0,0,0,0.0,0.0,84764.0,129,2376,74.34607,0.052186,10.068956,19.395544,0.011304,1.013113,0.39839,2.216129,0.004066,1.779053,0.001069,3.897177e-07,0.005239,0.000862,0.171088,0.014855,0.067991,0.01979,17.198232,1.318633,2115.946777,127.568182


In [12]:
df = pd.read_hdf(ws_database)

In [13]:
df

Unnamed: 0,sun_alt,sun_az,fBits,mjd,temperature,pressure,windDirection,humidity,windSpeedCurrent,windGust,windSpeedAverage,windDirectionAverage,tempSensor,tngDust,tngSeeing,rain,state,Any,Mes,DP,diff1,is_dup,temperatureR
2003-01-30 15:01:20,38.049360,209.643614,33554432,-2330.374074,1.00,786.823231,127.00,99.80,13.60,13.90,-999.00,-999.00,-999.0,-999.00,-999.00,-1,255,2003,1,0.070628,0 days 00:00:00,,1.000000
2003-01-30 15:03:20,37.831142,210.180579,33554432,-2330.372685,1.00,786.823231,106.00,99.80,22.10,22.20,-999.00,-999.00,-999.0,-999.00,-999.00,-1,255,2003,1,0.070628,0 days 00:02:00,,1.000000
2003-01-30 15:05:20,37.609386,210.713618,33554432,-2330.371296,1.00,786.723088,59.00,99.80,19.80,20.40,-999.00,-999.00,-999.0,-999.00,-999.00,-1,255,2003,1,0.070628,0 days 00:02:00,,1.033333
2003-01-30 15:07:20,37.384135,211.242719,33554432,-2330.369907,1.10,786.723088,103.00,98.40,21.90,23.10,-999.00,-999.00,-999.0,-999.00,-999.00,-1,255,2003,1,0.063730,0 days 00:02:00,,1.066667
2003-01-30 15:09:20,37.155435,211.767874,33554432,-2330.368519,1.10,786.723088,137.00,97.50,29.80,30.20,-999.00,-999.00,-999.0,-999.00,-999.00,-1,255,2003,1,0.054542,0 days 00:02:00,,1.133333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-12-30 23:50:00,-70.263478,281.758703,33554432,4943.993056,5.74,789.630000,106.41,91.41,18.42,29.68,22.62,111.70,0.0,13.15,1.42,0,24,2022,12,0.318846,0 days 00:01:59,,5.696667
2022-12-30 23:52:01,-70.695571,282.260801,33554432,4943.994456,5.56,789.630000,107.51,92.18,21.56,26.69,22.29,110.55,0.0,13.15,1.42,0,24,2022,12,0.314717,0 days 00:02:01,,5.616667
2022-12-30 23:54:00,-71.119707,282.771629,33554432,4943.995833,5.55,789.640000,117.36,92.43,23.76,33.21,26.16,116.67,0.0,13.15,1.42,0,24,2022,12,0.316729,0 days 00:01:59,,5.566667
2022-12-30 23:56:00,-71.546523,283.305017,33554432,4943.997222,5.59,789.570000,84.88,91.92,28.99,36.19,26.52,108.29,0.0,13.15,1.42,0,24,2022,12,0.313980,0 days 00:02:00,,5.576667


In [14]:
df.columns

Index(['sun_alt', 'sun_az', 'fBits', 'mjd', 'temperature', 'pressure',
       'windDirection', 'humidity', 'windSpeedCurrent', 'windGust',
       'windSpeedAverage', 'windDirectionAverage', 'tempSensor', 'tngDust',
       'tngSeeing', 'rain', 'state', 'Any', 'Mes', 'DP', 'diff1', 'is_dup',
       'temperatureR'],
      dtype='object')