# Import libraries

In [None]:
import os

import pandas as pd
import numpy as np
import scipy.stats

# Read data

In [None]:
open_fname = os.path.join('tmp','fitted_data.h5')
df = pd.read_hdf(open_fname, 'data')

In [None]:
df['div_alpha']=df['alpha']/df['alpha_fit']
df['div_beta']=df['beta']/df['beta_fit']

# Conditions for the rejection of data

### A. number of experimental points <=4

In [None]:
for name, group in df.groupby('article'):
        for energy, data in group.groupby('energy'):
            if  (len(data.index))>4 :
                 df.loc[(name,energy),'A'] = True
            else:
                df.loc[(name,energy),'A'] = False
                print(name, energy)

### B beta <=0

In [None]:
for name, group in df.groupby('article'):
        for energy, data in group.groupby('energy'):
            if  ((data.beta_fit)>=0.0).all() :
                 df.loc[(name,energy),'B'] = True
            else:
                df.loc[(name,energy),'B'] = False   
                print(name, energy)

### C differences in alfa parameters

In [None]:
for name, group in df.groupby('article'):
        for energy, data in group.groupby('energy'):
            if ( ((data.div_alpha)>0.8)&((data.div_alpha)<1.2) ).all() :
                 df.loc[(name,energy),'C'] = True
            else:                                 
                df.loc[(name,energy),'C'] = False
                print(name, energy)

### C2 differences in beta parameters

In [None]:
for name, group in df.groupby('article'):
        for energy, data in group.groupby('energy'):
            if ( ((data.div_beta)>=0.8)&((data.div_beta)<1.2) ).all() :
                 df.loc[(name,energy),'C2'] = True
            else:
                
                df.loc[(name,energy),'C2'] = False
                print(name, energy)

### C3 differences in beta parameters >100%

In [None]:
for name, group in df.groupby('article'):
        for energy, data in group.groupby('energy'):
            if ( ((data.div_beta)>=0.)&((data.div_beta)<2.) ).all() :
                 df.loc[(name,energy),'C3'] = True
            else:
                
                df.loc[(name,energy),'C3'] = False
                print(name, energy)

### D uncertainties >100%

In [None]:
for name, group in df.groupby('article'):
        for energy, data in group.groupby('energy'):
            if  (data.sf_err_up.max() < 1.2) :
                df.loc[(name,energy),'D'] = True
            else:
                if energy == 0.0 :
                    df.loc[(name,energy),'D'] = True
                else: 
                    df.loc[(name,energy),'D'] = False  
                    print(name, energy)

### E CDF > 10**-5

In [None]:
for name, group in df.groupby('article'):
        for energy, data in group.groupby('energy'):
            if  (energy == 0.0) :
                cdf=scipy.stats.norm.cdf(0,data.alpha_fit.values,data.alpha_fit_err.values)
                if (cdf > 10**-5).any():
                    df.loc[(name),'E'] = False
                    print(name, energy)
                else:
                    df.loc[(name),'E'] = True    
                    

### F error SF  >50%

In [None]:
for name, group in df.groupby('article'):
        for energy, data in group.groupby('energy'):
            if  (energy == 0.0) :
                er=data.loc[name].sf_error / data.loc[name].sf
                if (er > 0.5).any():
                    df.loc[(name),'F'] = False
                    print(name, energy)
                else:
                    df.loc[(name),'F'] = True

### G alfa ref > alfa prot

In [None]:
for name, group in df.groupby('article'):
    alpha_ref = df.loc[name].loc[0].alpha_fit[0]
    for energy, data in group.groupby('energy'):
        if energy > 0 :
            alpha = data.alpha_fit.values[0]
            
            if (alpha_ref > alpha) : 
                df.loc[(name,energy),'G'] = False
                print(name, energy)
            else:
                df.loc[(name,energy),'G'] = True
        else:
            df.loc[(name,energy),'G'] = True


# Delete protons data when references=0

In [None]:
for name, group in df.groupby('article'):
    #print(group[group.index.get_level_values('energy') == 0.0])  
    if (group[group.index.get_level_values('energy') == 0.0].A == False).all() :
        df.loc[(name),'A'] = False
        
    if (group[group.index.get_level_values('energy') == 0.0].B == False).all() :       
        df.loc[(name),'B'] = False
        
    if (group[group.index.get_level_values('energy') == 0.0].C2 == False).all() :           
        df.loc[(name),'C2'] = False
      
    if (group[group.index.get_level_values('energy') == 0.0].C == False).all() :         
        df.loc[(name),'C'] = False
        
    if (group[group.index.get_level_values('energy') == 0.0].C3 == False).all() :         
        df.loc[(name),'C3'] = False
        
    if (group[group.index.get_level_values('energy') == 0.0].D == False).all() :         
        df.loc[(name),'D'] = False
        
    if (group[group.index.get_level_values('energy') == 0.0].E == False).all() :         
        df.loc[(name),'E'] = False
        
    if (group[group.index.get_level_values('energy') == 0.0].F == False).all() :         
        df.loc[(name),'F'] = False
        
    if (group[group.index.get_level_values('energy') == 0.0].G == False).all() :         
        df.loc[(name),'G'] = False


## Check outliers

In [None]:
df1 = df[["alpha_fit","alpha_fit_err","beta_fit","beta_fit_err","covar_fit",'a(fit)/a_ref(fit)','a/a_ref',"LET","a(fit)/b(fit)"]]
df2 = df[ (df.C3 == True) & (df.F == True) & (df.G == True)] [["alpha_fit","alpha_fit_err","beta_fit","beta_fit_err","covar_fit",'a(fit)/a_ref(fit)','a/a_ref',"LET","a(fit)/b(fit)"]]
df3 = df[  (df.C == True) & (df.C2 == True)] [["alpha_fit","alpha_fit_err","beta_fit","beta_fit_err","covar_fit",'a(fit)/a_ref(fit)','a/a_ref',"LET","a(fit)/b(fit)"]]
df4 = df[  (df.C == True)] [["alpha_fit","alpha_fit_err","beta_fit","beta_fit_err","covar_fit",'a(fit)/a_ref(fit)','a/a_ref',"LET","a(fit)/b(fit)"]]
df5 = df[  (df.C2 == True)] [["alpha_fit","alpha_fit_err","beta_fit","beta_fit_err","covar_fit",'a(fit)/a_ref(fit)','a/a_ref',"LET","a(fit)/b(fit)"]]
df6 = df[  (df.G == True)] [["alpha_fit","alpha_fit_err","beta_fit","beta_fit_err","covar_fit",'a(fit)/a_ref(fit)','a/a_ref',"LET","a(fit)/b(fit)"]]

In [None]:
for data in [df1,df2,df3,df4,df5,df6]:
    i=0
    for art , d in data.groupby("article"):
        for ene , dd in d.groupby("energy"):
            if ene != 0:
                i=i+1
    print("art: ",art,"\n exp: ",i)

# Save temporary files

In [None]:
save_fname = os.path.join('tmp','fitted_data.h5')

In [None]:
for data,df in {"data_1":df1,"data_2":df2,"data_3":df3,"data_4":df4,"data_5":df5,"data_6":df6}.items():

    df.to_hdf(save_fname, data, format='table')