# Import libraries

In [None]:
import os

import pandas as pd
import numpy as np
import scipy.stats

# Read data

In [None]:
open_fname = os.path.join('tmp','fitted_data.h5')
df = pd.read_hdf(open_fname, 'data')

In [None]:
df['div_alpha']=df['alpha']/df['alpha_fit']
df['div_beta']=df['beta']/df['beta_fit']

# Conditions for the rejection of data

### A. number of experimental points <=4

In [None]:
for name, group in df.groupby('article'):
        for energy, data in group.groupby('energy'):
            if  (len(data.index))>4 :
                 df.loc[(name,energy),'A'] = True
            else:
                df.loc[(name,energy),'A'] = False
                print(name, energy)

### B beta <=0

In [None]:
for name, group in df.groupby('article'):
        for energy, data in group.groupby('energy'):
            if  ((data.beta_fit)>=0.0).all() :
                 df.loc[(name,energy),'B'] = True
            else:
                df.loc[(name,energy),'B'] = False   
                print(name, energy)

### C differences in alfa parameters >20%

In [None]:
for name, group in df.groupby('article'):
        for energy, data in group.groupby('energy'):
            if ( ((data.div_alpha)>0.8)&((data.div_alpha)<1.2) ).all() :
                 df.loc[(name,energy),'C'] = True
            else:                                 
                df.loc[(name,energy),'C'] = False
                print(name, energy)

### D differences in beta parameters >20%

In [None]:
for name, group in df.groupby('article'):
        for energy, data in group.groupby('energy'):
            if ( ((data.div_beta)>=0.8)&((data.div_beta)<1.2) ).all() :
                 df.loc[(name,energy),'D'] = True
            else:
                
                df.loc[(name,energy),'D'] = False
                print(name, energy)

### E differences in beta parameters >200%

In [None]:
for name, group in df.groupby('article'):
        for energy, data in group.groupby('energy'):
            if ( ((data.div_beta)>=0.)&((data.div_beta)<2.) ).all() :
                 df.loc[(name,energy),'E'] = True
            else:
                
                df.loc[(name,energy),'E'] = False
                print(name, energy)

### F error SF  >50%

In [None]:
for name, group in df.groupby('article'):
        for energy, data in group.groupby('energy'):
            if  (energy == 0.0) :
                er=data.loc[name].sf_error / data.loc[name].sf
                if (er > 0.5).any():
                    df.loc[(name),'F'] = False
                    print(name, energy)
                else:
                    df.loc[(name),'F'] = True

### G alfa ref > alfa prot

In [None]:
for name, group in df.groupby('article'):
    alpha_ref = df.loc[name].loc[0].alpha_fit[0]
    for energy, data in group.groupby('energy'):
        if energy > 0 :
            alpha = data.alpha_fit.values[0]
            
            if (alpha_ref > alpha) : 
                df.loc[(name,energy),'G'] = False
                print(name, energy)
            else:
                df.loc[(name,energy),'G'] = True
        else:
            df.loc[(name,energy),'G'] = True


# Delete protons data when references experiment has been removed

In [None]:
#delete experiments for which there are no reference curves 
#because they were rejected according to the A-G criteria defined above

for name, group in df.groupby('article'):
    #print(group[group.index.get_level_values('energy') == 0.0])  
    if (group[group.index.get_level_values('energy') == 0.0].A == False).all() :
        df.loc[(name),'A'] = False
        
    if (group[group.index.get_level_values('energy') == 0.0].B == False).all() :       
        df.loc[(name),'B'] = False

    if (group[group.index.get_level_values('energy') == 0.0].C == False).all() :         
        df.loc[(name),'C'] = False
        
    if (group[group.index.get_level_values('energy') == 0.0].D == False).all() :         
        df.loc[(name),'D'] = False
        
    if (group[group.index.get_level_values('energy') == 0.0].E == False).all() :         
        df.loc[(name),'E'] = False
        
    if (group[group.index.get_level_values('energy') == 0.0].F == False).all() :         
        df.loc[(name),'F'] = False
        
    if (group[group.index.get_level_values('energy') == 0.0].G == False).all() :         
        df.loc[(name),'G'] = False


## Check outliers

In [None]:
#creation of 6 samples containing data satisfying specified criteria A-G

df0 = df[["alpha_fit","alpha_fit_err","beta_fit","beta_fit_err","covar_fit","LET"]]
df1 = df[(df.A == True) &(df.B == True)] [["alpha_fit","alpha_fit_err","beta_fit","beta_fit_err","covar_fit","LET"]]
df2 = df[(df.A == True) &(df.B == True) & (df.E == True) & (df.F == True) & (df.G == True)] [["alpha_fit","alpha_fit_err","beta_fit","beta_fit_err","covar_fit","LET"]]
df3 = df[(df.A == True) &(df.B == True) & (df.C == True) & (df.D == True)] [["alpha_fit","alpha_fit_err","beta_fit","beta_fit_err","covar_fit","LET"]]
df4 = df[(df.A == True) &(df.B == True) & (df.C == True)] [["alpha_fit","alpha_fit_err","beta_fit","beta_fit_err","covar_fit","LET"]]
df5 = df[(df.A == True) &(df.B == True) & (df.E == True)] [["alpha_fit","alpha_fit_err","beta_fit","beta_fit_err","covar_fit","LET"]]
df6 = df[(df.A == True) &(df.B == True) & (df.G == True)] [["alpha_fit","alpha_fit_err","beta_fit","beta_fit_err","covar_fit","LET"]]

In [None]:
#checking the number of experiments in each samples
j=0
for data in [df0,df1,df2,df3,df4,df5,df6]:
    i=0
    for art , d in data.groupby("article"):
        for ene , dd in d.groupby("energy"):
            if ene != 0:
                i=i+1
    
    print("SAMPLE ",j,"\n no of experiments: ",i)
    j=j+1

# Save temporary files

In [None]:
save_fname = os.path.join('tmp','fitted_data.h5')

In [None]:
for data,df in {"data_0":df0,"data_1":df1,"data_2":df2,"data_3":df3,"data_4":df4,"data_5":df5,"data_6":df6}.items():
    data
    df.to_hdf(save_fname, data, format='table')