In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy.optimize import curve_fit
from scipy.stats import norm, multivariate_normal
from scipy.optimize import minimize_scalar
from scipy import integrate

In [2]:
def LoadData(energy):
    
    #filename = f'/home/llr/ilc/ritzmann/work/ana_files_ECAL_QGSP_BERT_conf6_e-__GeV_5kevt_build_masked/data_nhits_wenergy_{energy}GeV.txt'
    filename = f'/home/llr/ilc/ritzmann/work/ECAL_QGSP_BERT_conf6_e-_GeV_5kevt_-42_-42_build_masked_data_nhits_wenergy_{energy}GeV.txt'
    df = pd.read_csv(filename, header=0)
    
    return df

In [7]:
#take 3 energy points spaced out enough to check, if our method really works
Energies = [8, 80, 150]

means1 = []
means2 = []
sigmas1 = []
sigmas2 = []
rhos = []

for energy in Energies:
    df = LoadData(energy)
    mean = np.mean(df.to_numpy(), axis=0)
    cov = np.cov(df.to_numpy(), rowvar=0)
    sigma1 = np.sqrt(cov[0][0])
    sigma2 = np.sqrt(cov[1][1])
    #rho_new = cov[1][0]
    rho = cov[1][0]/(sigma1*sigma2)
    means1.append(mean[0])
    means2.append(mean[1])
    sigmas1.append(sigma1)
    sigmas2.append(sigma2)
    rhos.append(rho)
    
df1 = pd.DataFrame({
    
    'energy': Energies,
    'mean_nhit':means1,
    'mean_sumE':means2,
    'sigma_nhit':sigmas1,
    'sigma_sumE':sigmas2,
    'rhoNew':rhos
})
df1


Unnamed: 0,energy,mean_nhit,mean_sumE,sigma_nhit,sigma_sumE,rhoNew
0,8,139.753707,118756.9,11.769384,12484.034269,0.379308
1,80,668.146092,1389142.0,37.877292,67938.78608,-0.288462
2,150,971.05992,2705063.0,56.346445,114040.230991,-0.411339


In [19]:
normdist = []
for i in range(len(Energies)):
    energy = Energies[i] 
    mean = [df1['mean_nhit'][i], df1['mean_sumE'][i]]# this will be the mean.
    cov = np.eye(2)
    cov[0][0] = df1['sigma_nhit'][i]**2.0
    cov[1][1] = df1['sigma_sumE'][i]**2.0
    cov[1][0] = df1['rhoNew'][i]*df1['sigma_nhit'][i]*df1['sigma_sumE'][i]
    cov[0][1] = df1['rhoNew'][i]*df1['sigma_nhit'][i]*df1['sigma_sumE'][i]
    #cov[1][0] = rhonew[i]
    #cov[0][1] = rhonew[i]
    #print(cov)
    # cov will be the covariance matrix.
    var = multivariate_normal(mean=mean, cov=cov)
    # var will be the multivariate normal distribution.
    normdist.append(var)

In [25]:
mean_points = df1.to_numpy()[:, 1:3]

In [26]:
#point = np.array([[5, 60000]])
#print(np.sum((mean_points - point)**2, axis=1))

[3.45239375e+09 1.76661853e+12 6.99636087e+12]


In [29]:
def probability(x, y, normdist=normdist):
    point = [x, y]
    probs = []
    for i in range(len(normdist)):
        probs.append(normdist[i].pdf(point))
    return probs
probability = np.vectorize(probability)
        
    
for energy in Energies:
    df=df.append(LoadData(energy))
df

Unnamed: 0,nhits,wenergy
0,120,105981.0
1,118,92179.0
2,139,120140.0
3,109,109110.0
4,128,121942.0
...,...,...
4985,1015,2624110.0
4986,1015,2779800.0
4987,1014,2764220.0
4988,991,2678360.0


In [33]:
Ntrials=100
res=[]
e=[]
std_dev=[]
Energies = [8, 80, 150]
#Energies = [1, 2, 5, 8, 10, 20]
for energy, mean_hit, mean_sumE in zip(Energies, means1, means2):
    print(f"Energy(GeV):{energy}")
    df = LoadData(energy)
    trials = []
    for i in range(Ntrials):
        randint = np.random.randint(0,len(df))
        point = df.iloc[randint].to_numpy()
        prob = probability(point[0],point[1]) #maybe change probability, such that only 
        trials.append(Energies[np.argmax(prob)])
    trials = np.array(trials)
    print(f"After {Ntrials} trials,estimated energy:{trials.mean().round(3)}, with std: {trials.std().round(3)}, and resolution:{np.round(trials.std()/trials.mean(), 3)}")
    res.append(trials.std()/trials.mean())
    e.append(trials.mean())
    std_dev.append(trials.std())
print(f"Lower Bound on Resolution:{(np.max(Energies)-np.min(Energies))/len(Energies)} due to spacing")
df3 = pd.DataFrame({
    'energy': e,
    'resolution':res
    
})
df3.head()

Energy(GeV):8
After 100 trials,estimated energy:8.0, with std: 0.0, and resolution:0.0
Energy(GeV):80
After 100 trials,estimated energy:80.0, with std: 0.0, and resolution:0.0
Energy(GeV):150
After 100 trials,estimated energy:150.0, with std: 0.0, and resolution:0.0
Lower Bound on Resolution:47.333333333333336 due to spacing


Unnamed: 0,energy,resolution
0,8.0,0.0
1,80.0,0.0
2,150.0,0.0
