# Import libraries

In [None]:
import os

import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from scipy.stats import norm, truncnorm, multivariate_normal
from sklearn.linear_model import LinearRegression
import corner
import pylab

# Functions

In [None]:
def positive_gauss(mean, std):
    rndnum = 0
    while rndnum <= 0:
        rndnum = random.gauss(mean, std)
    return rndnum

In [None]:
def dist_trunc(mean,std,n):
    x_dist = truncnorm(a=-mean/std, b=np.inf,loc=mean,scale=std)
    return (x_dist.rvs(n)).astype(float)

In [None]:
def dist_trunc_dep(meana,stda,meanb,stdb,covar,n):
    ref_matrix=np.array([[stda**2,covar],[covar,stdb**2]])
    ref_sample=multivariate_normal.rvs(mean=(meana,meanb),cov=ref_matrix, size=3*n)
    ref_sample = ref_sample[ref_sample[:,1] > 0]
    return ref_sample

In [None]:
def var_q(df2):
    for name, group in df2.groupby('article'):
        for energy, data in group.groupby('energy'):
            if energy ==0:
                ab_ref=data["alpha(fit)/beta(fit)"].unique()                
                a_ref=data["alpha_fit"].unique()
                a_ref_err=data["alpha_fit_err"].unique()
                b_ref=data["beta_fit"].unique()
                b_ref_err=data["beta_fit_err"].unique()
                covar_ref= data["covar_fit"].unique()
                
                df2.loc[(name,energy),'a_fit_ref'] = a_ref
                df2.loc[(name,energy),'a_fit_ref_err'] = a_ref_err
                df2.loc[(name,energy),'b_fit_ref'] = b_ref
                df2.loc[(name,energy),'b_fit_ref_err'] = b_ref_err
                df2.loc[(name,energy),'covar_ref'] = covar_ref
            
            else:
                a=data["alpha_fit"].unique()
                a_err=data["alpha_fit_err"].unique()
                
                aa_ref=data['a(fit)/a_ref(fit)'].unique()
                aa_ref_art=data['a/a_ref'].unique()
                L=(data["LET"]).unique()
                
               
                q = (aa_ref-1.0)*ab_ref/L
                k=(aa_ref-1.0)/L

                
                error=  np.sqrt(np.power((1-a_ref)/b_ref*a_err,2)+
                                np.power((a-1)/b_ref*a_ref_err,2)+
                                np.power((a_ref-a)/np.power(b_ref,2)*b_ref_err,2))

                df2.loc[(name,energy),'q'] = q
                df2.loc[(name,energy),'k'] = k
                df2.loc[(name,energy), "error"] = error
                
    for name, group in df2.groupby('article'):
        for energy, data in group.groupby('energy'):
            df2.loc[(name, energy),"ab_ref"]=df2.loc[(name,0), "alpha(fit)/beta(fit)"]
            
    df2 = df2.replace([np.inf, -np.inf, np.NaN], 0)
    df2.isnull().sum().sum()

In [None]:
def dict_q(df2):
    d={}
    for name, group in df2.groupby('article'):
        d[name]={}
        for energy, data in group.groupby('energy'):

            if energy == 0:
                d[name][0] = {}
                d[name][0][ 'ref_alpha'] = dist_trunc(data.a_fit_ref.values, 
                                                      data.a_fit_ref_err.values, num)
                d[name][0][ 'ref_beta'] = dist_trunc(data.b_fit_ref.values, 
                                                     data.b_fit_ref_err.values, num)
                ref_sample = np.array(dist_trunc_dep(data.a_fit_ref.values.max(),
                                                     data.a_fit_ref_err.values.max(),
                                                     data.b_fit_ref.values.max(),
                                                     data.b_fit_ref_err.values,
                                                     data.covar_fit,num))
                d[name][0][ 'ref_alpha_dep']=ref_sample[:,0]
                d[name][0][ 'ref_beta_dep']=ref_sample[:,1]

            else:
                d[name][energy]={}
                d[name][energy][ 'alpha'] = dist_trunc(data.alpha_fit.values, 
                                                       data.alpha_fit_err.values, num)
                d[name][energy][ 'beta'] = dist_trunc(data.beta_fit.values, 
                                                      data.beta_fit_err.values, 
                                                      num)
                ref_sample = np.array(dist_trunc_dep(data.alpha_fit.values.max(),
                                                     data.alpha_fit_err.values.max(),
                                                     data.beta_fit.values.max(),
                                                     data.beta_fit_err.values,data.covar_fit,num))
                d[name][energy][ 'alpha_dep']=ref_sample[:,0]
                d[name][energy][ 'beta_dep']=ref_sample[:,1]
                d[name][energy]['LET']=data.LET.values.max()
    return d

\begin{align}
\frac{\alpha}{\alpha_{phot}} = 1+ \frac{q L}{(\frac{\alpha}{\beta})_{phot}}
\end{align}

In [None]:
def find_q_sk(d,i):
    x=list()
    y=list()
    
    for k,v in d.items():
        for m,n in v.items():
            if m>0:
                y.append(d[k][m]['alpha'][i] /  d[k][0]['ref_alpha_dep'][i] -1)
                x.append(d[k][m]['LET']*d[k][0]['ref_beta_dep'][i]/d[k][0]['ref_alpha_dep'][i])
             
    x= np.asarray(x).reshape(-1,1)
    reg = LinearRegression(fit_intercept=False).fit(x, y)
    q=reg.coef_[0]
    if q<-1 :
        print(k,m,i)
    R=reg.score(x,y)
    return q,R

In [None]:
params = {'legend.fontsize': '20',
         'xtick.direction' : 'in',  
         'ytick.direction' : 'in', 
         'xtick.top' : True, 
         'figure.figsize': (10,6),
         'axes.labelsize': '26',
         'axes.titlesize':'26',
         'xtick.labelsize':'20',
         'ytick.labelsize':'20',
         'xtick.major.pad':'16',
         'ytick.major.pad':'16'}
    
pylab.rcParams.update(params)

# Open data

In [None]:
open_fname = os.path.join('tmp','fitted_data.h5')
save_fname = os.path.join('tmp','distrib_q.h5')

In [None]:
datas= ["data_1","data_2","data_3","data_4","data_5","data_6"]
index = ['mean','std','median','r2','quantile025','quantile975']
q_compare=pd.DataFrame(columns = datas, index = index)

In [None]:
num=100000

In [None]:
for data_q in datas:

    df = pd.read_hdf(open_fname,data_q)
    df.rename(columns = {"a(fit)/b(fit)": "alpha(fit)/beta(fit)"}, 
                         inplace=True)
    df2 = df.groupby(["article",'energy']).max()
    df2.is_copy=False  
#
    var_q(df2);
#create dict
    d = dict_q(df2);

#calculate q values and R^2
    qtmp,Rtmp=zip(*[find_q_sk(d,i) for i in range(num)])  ;
    
    R=pd.DataFrame(np.asarray(Rtmp),columns = ['r']);
    qdf=pd.DataFrame(np.asarray(qtmp),columns = ['q']);
    
#compare q distributions   
    q_compare[data_q] = qdf.q.mean(),qdf.q.std(),qdf.q.median(),R.r.mean(),qdf.q.quantile(0.025),qdf.q.quantile(0.975)
#save q distributions
    qdf.to_hdf(save_fname, data_q, format='table')

In [None]:
q_compare