In [None]:
import pickle
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from collections import defaultdict

from iminuit import Minuit
from iminuit.cost import LeastSquares
from termcolor import colored
from time_templates.signalmodel import signal_model
from time_templates.utilities import atmosphere
from time_templates.utilities.fitting import plot_fit_curve
from time_templates.templates.start_time_deMauro import start_time_plane_front_DXmax, start_time_DXmax, catenary
from time_templates.utilities.plot import plot_hist, plot_profile_1d
from time_templates.utilities import geometry

In [None]:
from time_templates.datareader.get_data import get_MC_data_from_tree

df = get_MC_data_from_tree(dense=False, energy='18.5_19', primary='proton', det='new', no_traces=False)


In [None]:
df['MCCosTheta_sq_bin'] = pd.cut(df['MCCosTheta']**2, np.linspace(0.25, 1, 5))
df['MCDXstation_bin'] = pd.cut(df['MCDXstation'], np.arange(0, 1000, 25))
df = df.set_index(['MCCosTheta_sq_bin', 'MCDXstation_bin', 'MCr_round', 'MCcospsi_round'])
# sort index for speed
df = df.reset_index().dropna().set_index(df.index.names).sort_index()


In [None]:
diff = np.abs(df['MCPlaneTimeRes'] - df['PlaneTimeRes'])
mean = np.mean(diff)
std = np.std(diff)
df['ptr_diff'] = np.abs(diff-mean)#/std
print(df.shape)
df_ = df.query('MCCosTheta > 0.5 & ptr_diff < 100')
# t0_mean = df_.groupby(df.index.names).apply(lambda x: x['t0_deMauro'].mean())
# df_['t0_deMauro_'] = df_['t0_deMauro']
# df_['t0_deMauro'] = t0_mean
# ptr_mean = df_.groupby(df.index.names).apply(lambda x: x['PlaneTimeRes'].mean())
# df_['PlaneTimeRes_'] = df_['PlaneTimeRes']
# df_['PlaneTimeRes'] = ptr_mean
df_['MCPlaneTimeRes_'] = df_['MCPlaneTimeRes']
df_['MCPlaneTimeRes_shift']  = df_.groupby(df.index.names).apply(lambda x: (x['MCPlaneTimeRes'] - x['PlaneTimeRes']).mean())
df_['MCPlaneTimeRes'] = df_['MCPlaneTimeRes'] - df_['MCPlaneTimeRes_shift']
print(df_.shape)

In [None]:
plot_profile_1d(df['MCSecTheta'], diff, bins=10);

In [None]:
plt.figure(figsize=(10, 10))
axes = df[['MCCosTheta', 'MClgE', 'MCr', 'MCcospsi']].hist(histtype='step', density=True)
axes = df_[['MCCosTheta', 'MClgE', 'MCr', 'MCcospsi']].hist(ax=axes, histtype='step', density=True)

In [None]:
from time_templates.preprocessing.phase_traces_df import phase_traces_df
df_copy = df_.loc[0.8, 200, 600, 1].copy()
phased = phase_traces_df(df_copy, univ_comp=True, use_t0_model=True, useMC=True)
#Use MC plane time res but then use t0 model as start time. Is that OK?

In [None]:
plt.figure(figsize=(15, 8))
t = np.arange(0, 600*25/3, 25/3)
for i, trace in enumerate(phased['wcd_em_trace']):
    plt.plot(t, trace,
             marker='', ls='-', color='b', alpha=0.1)
plt.plot(t, phased['wcd_em_trace'].mean(), 'k-', lw=3)
plt.plot(t, np.median(np.vstack(phased['wcd_em_trace'].values), axis=0), 'k--', lw=3)
plt.xlim([0, 1000])
# plt.legend()

In [None]:
from fit_lognormal_traces import get_trace_fit, fit_lognormal

In [None]:
dfs = []
import glob
for fl in glob.glob('../../data/mean_df/mean*.pl'):
    df = pd.read_pickle(fl)
    df.index.rename([name + "_idx" for name in df.index.names], inplace=True)
    dfs.append(df)

dd = defaultdict(list)

ct2_bins = dfs[0].index.get_level_values(level=0).unique()
DX_bins = dfs[0].index.get_level_values(level=1).unique()
rs = [0]
for df in dfs:
    rs_ = df.index.get_level_values(level=2).unique()
    if len(rs_) > len(rs):
        rs = rs_
cps = df.index.get_level_values(level=3).unique()

for ct2_bin in ct2_bins:
    ct2 = ct2_bin.mid
    print("at", ct2)
    for DX_bin in DX_bins:
        DX = DX_bin.mid
        for r in rs:
            for cp in cps:
                new_row = 0
                n_total = 0
                for df in dfs:
                    try:
                        row = df.loc[ct2, DX, r, cp]
                        n = row['nstations']
                        if n <= 0:
                            continue
                        new_row += row*n
                        n_total += n
                    except KeyError:
                        continue
                        
                if n_total <= 0:
                    continue
                    
                new_row /= n_total
                    
                for key, val in new_row.items():
                    if key == "nstations":
                        continue
                    dd[key].append(val)
                    
                dd["nstations"].append(n_total)
                dd["MCCosTheta_sq_bin_idx"].append(ct2_bin)
                dd["MCDXstation_bin_idx"].append(DX_bin)
                dd["MCr_round_idx"].append(r)
                dd["MCcospsi_round_idx"].append(cp)
                
                
# for idx, row in tqdm.tqdm(dfs[0].iterrows()):
#     n0 = row["nstations"]
#     n_total = n0

#     new_row = row.copy() * n0

#     for df in dfs[1:]:
#         row = df.loc[idx]
#         n = row["nstations"]
#         new_row += row * n
#         n_total += n

#     if n_total > 0:
#         new_row = new_row / n_total

#     for key, val in new_row.items():
#         if key == "nstations":
#             continue
#         dd[key].append(val)

#     dd["nstations"].append(n_total)
#     dd["MCCosTheta_sq_bin_idx"].append(idx[0])
#     dd["MCDXstation_bin_idx"].append(idx[0])
#     dd["MCr_round_idx"].append(idx[0])
#     dd["MCcospsi_round_idx"].append(idx[0])


In [None]:
df_test = pd.DataFrame(dd).set_index(dfs[0].index.names)

In [None]:
df_test.loc[0.9, 200, 1000, 1]

In [None]:

ct2 = 0.8
DX = 200
r = 1400
cp = -1

plt.plot(df_test.loc[ct2, DX, r, cp]['wcd_em_trace_mean'], 'k-', lw=2)
plt.plot(dfs[0].loc[ct2, DX, r, cp]['wcd_em_trace_mean'])
plt.plot(dfs[1].loc[ct2, DX, r, cp]['wcd_em_trace_mean'])
plt.plot(dfs[2].loc[ct2, DX, r, cp]['wcd_em_trace_mean'])
print(dfs[0]['MClgE'].mean())
print(dfs[1]['MClgE'].mean())
print(dfs[2]['MClgE'].mean())
plt.xlim([0, 300])
plt.ylim([0, 0.002])

In [None]:
df = pd.read_pickle('../../data/mean_df/mean_df_EPOS_LHC_proton_19_19.5.pl')
# df = df.droplevel(0)
df.index.rename([name+"_idx" for name in df.index.names], inplace=True)

In [None]:
f, ax = plt.subplots(1, figsize=(10, 5))

m = get_trace_fit(df.loc[0.5, 400, 500, 1], ax=ax, fit_t0=False, cdf_min=0.0, cdf_max=0.95)
ax.set_xscale('log')
# ax.set_yscale('log')
# ax.set_ylim([1e-5, 1e-2])
m

In [None]:
cos_theta_sq_bins = sorted(df.index.get_level_values(level=0).unique())
DX_bins = sorted(df.index.get_level_values(level=1).unique())
rs = sorted(df.index.get_level_values(level=2).unique())
cos_psis = sorted(df.index.get_level_values(level=3).unique())[::-1]
rs = [500, 800, 1200, 1600]
ct2 = 0.7

f, axes = plt.subplots(4, 4, figsize=(15, 15), sharey=True, sharex=True)

for i, DX_edges in enumerate(DX_bins[1::1]):
    DX = DX_edges.mid

    for j, r in enumerate(rs):
        try:
            ax = axes[i, j]
            ax.set_title(f'DX = {DX} r = {r}')
        except IndexError:
            pass

        for cp in cos_psis:
            if cp != 1:
                ax = None
            try:
                m = get_trace_fit(df.loc[ct2, DX, r, cp], ax=ax)
                pfit = m.values
            except:
                pfit = [np.nan, np.nan]
                
for ax in axes.flatten():
    ax.set_xscale('log')
    ax.set_yscale('log')
    ax.set_ylim([1e-5, 1e-2])
    ax.set_xlim([10, 2e4])
plt.tight_layout()

In [None]:
DX = np.linspace(0, 600)
f, ax = plt.subplots(1)
theta = np.deg2rad(30)
for r, c in zip([500, 1000, 1500], ['r', 'g', 'b']):
    ax.plot(DX, (start_time_plane_front_DXmax(r, DX, theta, 19)-
            start_time_plane_front_DXmax(r, DX+50, theta, 19))/25, color=c)
    
ax.set_ylabel('t(DX) - t(DX+25) [25 ns]')
ax.set_xlabel('DX')

In [None]:
from fit_lognormal_traces import lognormal_pdf
n = 200
t = np.arange(0, n*25/3, 25/3)
mean = np.zeros(n)
N = 100
out = np.zeros((N, n))
plt.figure(figsize=(15, 7))
m = 5.5
s = 0.6
for i in range(N):
    out[i] = lognormal_pdf(t+np.random.normal(0, 50), m, s)
    
plt.plot(np.mean(out, axis=0), 'k-', lw=2)
# median = np.median(out, axis=0)
# median /= median.sum()*25/3
mean = np.mean(out, axis=0)
mean /= mean.sum()*25/3
# plt.plot(median, 'k--', lw=2)
plt.plot(mean, 'k-', lw=2)
ln = lognormal_pdf(t, m, s)
ln /= ln.sum()*25/3
plt.plot(ln, 'r-', lw=2)


In [None]:
df['t0_deMauro_var']