In [None]:
import numpy as np
import pandas as pd
from math import isnan
import re
import matplotlib.pyplot as plt
from ape_producer.geom_reader import create_geom_df
from ape_producer.parse_report import make_cov_objects
from ape_producer.sign_conventions import signConventions, square_coordinates
from scipy.optimize import curve_fit
import os
import glob
#import mplhep as hep
%load_ext autoreload
%autoreload 2
pd.set_option('display.max_rows', 500)

In [None]:
import glob
from importlib import import_module
from ape_producer.parse_report import make_cov_objects, square_array
import numpy as np
import pandas as pd
import re

In [None]:
####
## Helper functions
####

In [None]:
def sel_wheel_station(df, wheel, station):
    return df[(df.wheel==wheel) &  (df.station==station)]

In [None]:
def select_sectors(df, wheel, station):
    return df[(df.wheel==wheel) & (df.station==station)]

In [None]:
wheel_station = [[-2,  1],
       [-2,  2],
       [-2,  3],
       [-2,  4],
       [-1,  1],
       [-1,  2],
       [-1,  3],
       [-1,  4],
       [ 0,  1],
       [ 0,  2],
       [ 0,  3],
       [ 0,  4],
       [ 1,  1],
       [ 1,  2],
       [ 1,  3],
       [ 1,  4],
       [ 2,  1],
       [ 2,  2],
       [ 2,  3],
       [ 2,  4]]
alignables = ['x','y','z','phix', 'phiy', 'phiz']


In [None]:
####
##create list of xmls and reports that we want
###

In [None]:
re_xmls = [
    "data/SM_10_6_.+.xml",
    'data/design_10_6_.+.xml'
]
re_report = [
    "data/SM_10_6_.+.py",
    'data/design_10_6_.+.py'
]
def match_re(re_list, string):
    for re_string in re_list:
        if bool(re.match(re_string, string)): return 1
    return 0
files = glob.glob('data/*')
xml_files = [file for file in files if match_re(re_xmls, file)]
report_files = [file for file in files if match_re(re_report, file)]

In [None]:
####
## Now, we create the report.py dataframe
####

In [None]:
#format files as modules
modules = [f.replace('/', '.').replace('.py','') for f in report_files]

In [None]:
def get_report(module_name):
    return import_module(module_name).reports

In [None]:
def make_hesse_df_dt(df):
    hesse_list = []
    for wheel, station in wheel_station:
        tdf = sel_wheel_station(df,wheel,station)
        covMatrix = tdf.covMatrix.mean()
        hess_stds = np.diagonal(covMatrix)**.5
        nMuons, nMuonsStD = tdf.nMuons.mean(), tdf.nMuons.std()
        hesse_list.append({"wheel": wheel, "station": station,"nMuons_mean": nMuons, "nMuons_std":nMuonsStD, **{'hesse_unc_'+dim:hess for hess, dim in zip(hess_stds, alignables)}})
    hesse_df = pd.DataFrame(hesse_list)
    return hesse_df

In [None]:
def make_summary_report_df(module_name, njobs):
    report_ex = get_report(module_name)
    dt_cov_df, csc_cov_df = make_cov_objects(report_ex, debug=False)
    df = make_hesse_df_dt(dt_cov_df)
    df['njobs']= njobs
    df['name']= module_name.split('.')[1].replace('_report_','_')
    return df

In [None]:
def number_from_name(name):
    return int(re.findall('report_(\d+)', name)[0])

In [None]:
stats_df = pd.DataFrame()
for module in modules:
    print(module)
    df = make_summary_report_df(module, number_from_name(module))
    stats_df = stats_df.append(df)

In [None]:
####
## Make xml df
####

In [None]:
# calculating sigma and ucn
from scipy.optimize import curve_fit
from scipy.stats import rv_continuous
from scipy.stats import norm
import uncertainties 
def calc_likelihood(x, width):
    return np.product(norm.pdf(x,0,width))
def norm_fit(x, c, scale):
    return c*norm.pdf(x, loc=0, scale=scale)
def calc_uncertainty(mean, width, values):
    offsets = np.linspace(-width+1e-10,width,100)
    liklihoods = np.array(list(map(lambda y: calc_likelihood(values, width+y), offsets)))
    liklihoods = np.array(liklihoods)/np.sum(liklihoods)
    try:
        popt, pcov = curve_fit(norm_fit, offsets, liklihoods ,maxfev = int(1e4) )
    except:
        plt.plot(offsets, liklihoods)
        print(offsets, liklihoods, values)
    return popt[1]

fit = False
if fit:
    def sigma_and_unc(key, *args):
        values = select_sectors(*args)[key]
        mean, width = norm.fit(values, floc=0)
        unc = calc_uncertainty(mean, width, values)
        return uncertainties.ufloat(width, unc)
else:
    def sigma_and_unc(key, *args):
        values = select_sectors(*args)[key]
        sigma = np.mean(values**2)**.5
        unc =  sigma/(2*len(values)-2)**.5
        return uncertainties.ufloat(sigma, unc)

In [None]:
xml_files
xml_list = []
for xml_file in xml_files:
    xml_name = xml_file.split('/')[1].replace('.xml','')
    dt_df, csc_df = create_geom_df(xml_file)
    for i in range(-2, 3):
        for j in range (1, 5):
            dim_dict = {}
            for dim in ['x', 'y', 'z', 'phix', 'phiy', 'phiz']:
                ufloat = sigma_and_unc(dim, dt_df, i, j)
                dim_dict["{} std".format(dim)] =  ufloat.nominal_value
                dim_dict["{} std unc".format(dim)] =  ufloat.std_dev
            row_dict = {'wheel':i, 'station':j, 'name':xml_name, **dim_dict}
            xml_list.append(row_dict)
xml_df = pd.DataFrame(xml_list)


In [None]:
Result = pd.merge(xml_df,stats_df)

In [None]:
Result.to_csv('Result.csv')

In [None]:
Result