### Import packages

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import astropy.units as u
from datetime import datetime
import pandas as pd
from scipy.optimize import curve_fit
from scipy.stats import chi2
pd.set_option('display.max_columns', None)
import warnings
from scipy.optimize import OptimizeWarning
warnings.filterwarnings("ignore", category=RuntimeWarning)
warnings.filterwarnings("ignore", category=OptimizeWarning)

from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

# location of the scripts
sys.path.insert(0, os.getcwd() + "/../scripts/")
import geometry as geom

### Paths and directories

In [7]:
# Root path of this script
root = os.getcwd() + "/"
# Objects directory
root_objects = root + "objects/"

# Some filenames -------------------
# Filename of the total dictionary
fname_srunwise_dict = root + "../weather_analysis/objects/" + "total_dict.pkl"
fname_runwise_dict  = root_objects + "runwise_total_dict.pkl"


####################################
# --- POWER LAW PARAMETERS REF --- #
ref_p0 =  1.74 
ref_p1 = -2.23
####################################

# Create the paths that do not exist
for path in [root_objects]:
    if not os.path.exists(path):
        os.makedirs(os.path.join(path), exist_ok=True)

### Reading the subrun-wise dictionary

In [3]:
# Reading the object
with open(fname_srunwise_dict, 'rb') as f:
    dict_srunwise = pickle.load(f)

print(f"Number of runs: {len(dict_srunwise.keys())}")

Number of runs: 7528


### Averaging and creating a run-wise dictionary instead

In [11]:
columns = [
    "n_subruns", "telspsed", "timestamp", "az", "zd",
    "i_half_peak", "drdi", "u_drdi", "pindex", "u_pindex",
    "light_yield", "u_light_yield",
    "pressure", "temperature", "humidity", "wind_speed",
    "tng_dust", "tng_seeing",
    "fit_slope","fit_u_slope", "fit_intercept", "fit_u_intercept",
    "fit_chi2", "fit_pvalue", "run"
]

df_runwise = pd.DataFrame(columns=columns)

for run_i, run in enumerate(list(dict_srunwise.keys())[:]):

    print(f"Adding data... {run_i:6}/{len(dict_srunwise.keys())} runs") if run_i % 500 == 0 else None
    
    row_run = [np.nan for _ in columns]

    # Number of subruns
    row_run[0] = len(dict_srunwise[run].keys())
    # Time elapsed
    row_run[1] = np.sum([dict_srunwise[run][srun]["telapsed"] for srun in dict_srunwise[run].keys()])
    # Timestamp
    row_run[2] = dict_srunwise[run][list(dict_srunwise[run].keys())[0]]["time"]
    # Azimuth
    row_run[3] = np.mean([dict_srunwise[run][srun]["az"] for srun in dict_srunwise[run].keys()])
    # Zenith distance
    row_run[4] = np.mean([dict_srunwise[run][srun]["zd"] for srun in dict_srunwise[run].keys()])

    # Pressure
    if not (dict_srunwise[run][list(dict_srunwise[run].keys())[0]]["weather"]["pressure"] == None):
        row_run[12] = np.mean([dict_srunwise[run][srun]["weather"]["pressure"] for srun in dict_srunwise[run].keys()])
    # Temperature
    if not (dict_srunwise[run][list(dict_srunwise[run].keys())[0]]["weather"]["temperature"] == None):
        row_run[13] = np.mean([dict_srunwise[run][srun]["weather"]["temperature"] for srun in dict_srunwise[run].keys()])
    # Humidity
    if not (dict_srunwise[run][list(dict_srunwise[run].keys())[0]]["weather"]["humidity"] == None):
        row_run[14] = np.mean([dict_srunwise[run][srun]["weather"]["humidity"] for srun in dict_srunwise[run].keys()])
    # Wind average speed
    if not (dict_srunwise[run][list(dict_srunwise[run].keys())[0]]["weather"]["wind_speed_average"] == None):
        row_run[15] = np.mean([dict_srunwise[run][srun]["weather"]["wind_speed_average"] for srun in dict_srunwise[run].keys()])
    # TNG dust
    if not (dict_srunwise[run][list(dict_srunwise[run].keys())[0]]["weather"]["tng_dust"] == None):
        row_run[16] = np.mean([dict_srunwise[run][srun]["weather"]["tng_dust"] for srun in dict_srunwise[run].keys()])
    # TNG seeing
    if not (dict_srunwise[run][list(dict_srunwise[run].keys())[0]]["weather"]["tng_seeing"] == None):
        row_run[17] = np.mean([dict_srunwise[run][srun]["weather"]["tng_seeing"] for srun in dict_srunwise[run].keys()])
    
    # Intensity at half light peak
    row_run[5] = np.mean([dict_srunwise[run][srun]["ZD_corrected_intensity_at_half_peak_rate"] for srun in dict_srunwise[run].keys()])

    p0   = np.array([dict_srunwise[run][srun]["ZD_corrected_cosmics_rate_at_422_pe"] for srun in dict_srunwise[run].keys()])
    u_p0 = np.array([dict_srunwise[run][srun]["ZD_corrected_delta_cosmics_rate_at_422_pe"] for srun in dict_srunwise[run].keys()])
    p1   = np.array([dict_srunwise[run][srun]["ZD_corrected_cosmics_spectral_index"] for srun in dict_srunwise[run].keys()])
    u_p1 = np.array([dict_srunwise[run][srun]["delta_cosmics_spectral_index"] for srun in dict_srunwise[run].keys()])

    ly, u_ly = geom.calc_light_yield(
        p0_fit = np.abs(p0),
        p1_fit = p1, 
        sigma_p0_fit = u_p0, 
        sigma_p1_fit = u_p1, 
        p0_ref = ref_p0,
    )
    
    # Cosmics rate @ 422 p.e.
    row_run[6] = np.mean(p0)
    # Delta Cosmics rate @ 422 p.e.
    row_run[7] = np.mean(u_p0)
    # Power index
    row_run[8] = np.mean(p1)
    # Delta power index
    row_run[9] = np.mean(u_p1)
    # Light yield
    row_run[10] = np.mean(ly[ly != np.inf])
    # Delta light yield
    row_run[11] = np.mean(u_ly[u_ly != np.inf])

    t_elapsed_sruns = [dict_srunwise[run][srun]["telapsed"] for srun in dict_srunwise[run].keys()]
    
    x_fit = np.cumsum(t_elapsed_sruns)
    y_fit = ly
    yerr_fit = u_ly
    
    nan_mask_x, nan_mask_y = ~np.isnan(x_fit),   ~np.isnan(y_fit)
    inf_mask_x, inf_mask_y = ~(x_fit == np.inf), ~(y_fit == np.inf)
    
    mask = (nan_mask_x & nan_mask_y & inf_mask_x & inf_mask_y)

    x_fit = x_fit[mask]
    y_fit = y_fit[mask]
    yerr_fit = yerr_fit[mask]
    
    if len(x_fit) > 1 and len(y_fit) > 1:
        
        # Performing the fit
        try:
            params, pcov, info, _, _ = curve_fit(
                f     = geom.straight_line,
                xdata = x_fit,
                ydata = y_fit,
                sigma = yerr_fit,
                p0    = [1, 0],
                full_output = True,
            )
            
            intercept       = params[0]
            slope           = params[1]
            delta_intercept = np.sqrt(pcov[0, 0])
            delta_slope     = np.sqrt(pcov[1, 1])
            _chi2           = np.sum(info['fvec'] ** 2)
            pvalue          = 1 - chi2.cdf(_chi2, len(x_fit)-2)
            
        except RuntimeError:
            intercept, delta_intercept = np.nan, np.nan
            slope, delta_slope = np.nan, np.nan
            _chi2, pvalue = np.nan, np.nan 
            
    # Fit slope
    row_run[18] = slope
    # Delta fit slope
    row_run[19] = delta_slope
    # Fit intercept
    row_run[20] = intercept
    # Delta fit intercept
    row_run[21] = delta_intercept
    # Fit chi2
    row_run[22] = _chi2
    # Fit pvalue
    row_run[23] = pvalue

    row_run[24] = run

    df_runwise.loc[run] = row_run

Adding data...      0/7528 runs
Adding data...    500/7528 runs
Adding data...   1000/7528 runs
Adding data...   1500/7528 runs
Adding data...   2000/7528 runs
Adding data...   2500/7528 runs
Adding data...   3000/7528 runs
Adding data...   3500/7528 runs
Adding data...   4000/7528 runs
Adding data...   4500/7528 runs
Adding data...   5000/7528 runs
Adding data...   5500/7528 runs
Adding data...   6000/7528 runs
Adding data...   6500/7528 runs
Adding data...   7000/7528 runs
Adding data...   7500/7528 runs


In [22]:
display(df_runwise[:6])

Unnamed: 0,n_subruns,telspsed,timestamp,az,zd,i_half_peak,drdi,u_drdi,pindex,u_pindex,light_yield,u_light_yield,pressure,temperature,humidity,wind_speed,tng_dust,tng_seeing,fit_slope,fit_u_slope,fit_intercept,fit_u_intercept,fit_chi2,fit_pvalue,run
1615,61,1479.185384,2019-11-23 23:40:47.731307,265.754848,31.35929,124.759916,2.045992,0.020157,-2.126104,0.059882,1.155216,0.013513,787.011475,7.588689,45.536721,14.341311,0.38,1.71,-1.6e-05,6e-06,1.158636,0.005587,259.529051,0.0,1615
1616,62,1492.465697,2019-11-24 00:11:26.987927,95.332874,30.722616,124.538413,2.085128,0.021588,-2.114527,0.062913,1.176689,0.015404,786.889839,7.951452,46.629677,14.384194,0.277419,2.077581,9e-06,4e-06,1.169722,0.003368,69.341068,0.191536,1616
1617,35,877.16172,2019-11-24 00:45:42.507987,273.215706,44.511343,118.41847,2.04973,0.019581,-2.127638,0.058021,1.157435,0.013404,786.62,7.710571,51.568,15.432286,0.26,2.14,4e-06,7e-06,1.150611,0.003916,71.571138,0.000114,1617
1618,81,1822.499924,2019-11-24 01:07:29.246502,108.322078,18.014448,125.807134,2.164965,0.038297,-2.051108,0.094747,1.160424,0.066008,786.379877,7.585802,60.808272,13.87358,0.08679,2.463827,1e-06,4e-06,1.18254,0.003499,252.113457,0.0,1618
1619,30,922.360651,2019-11-24 01:45:25.986127,279.314177,57.57176,112.330784,2.045831,0.02145,-2.085522,0.063585,1.161393,0.015284,786.082,7.658333,58.738667,15.738,0.03,2.57,-3e-05,9e-06,1.174208,0.005152,42.331114,0.040341,1619
1620,78,1748.229183,2019-11-24 02:03:06.030581,148.833255,8.163757,125.24893,2.099011,0.021409,-2.123014,0.061951,1.182556,0.015357,785.949103,7.733974,57.757564,16.961667,0.03,2.375385,-2.4e-05,3e-06,1.199698,0.003803,124.860485,0.000352,1620


### Store the dataframe information

In [16]:
# Save DataFrame to CSV
df_runwise.to_csv(fname_runwise_dict, index=False)

# # Read DataFrame from CSV
# df_runwise = pd.read_csv(fname_runwise_dict)
# df_runwise.set_index("run", inplace=True)