# Import libraries

In [None]:
import os

import pandas as pd
import numpy as np
import scipy.stats

# Read data

In [None]:
fname = os.path.join('tmp','fit.h5')
df = pd.read_hdf(fname, 'data')

# Conditions for the rejection of data

### A. number of experimental points <=4

In [None]:
for name, group in df.groupby('article'):
        for energy, data in group.groupby('energy'):
            if  (len(data.index))>4 :
                 df.loc[(name,energy),'A'] = True
            else:
                df.loc[(name,energy),'A'] = False
                print(name, energy)

### B beta <=0

In [None]:
for name, group in df.groupby('article'):
        for energy, data in group.groupby('energy'):
            if  ((data.beta_fit)>=0.0).all() :
                 df.loc[(name,energy),'B'] = True
            else:
                df.loc[(name,energy),'B'] = False   
                print(name, energy)

### C differences in alfa parameters

In [None]:
for name, group in df.groupby('article'):
        for energy, data in group.groupby('energy'):
            if ( ((data.div_alfa)>0.8)&((data.div_alfa)<1.2) ).all() :
                 df.loc[(name,energy),'C'] = True
            else:                                 
                df.loc[(name,energy),'C'] = False
                print(name, energy)

### C2 differences in beta parameters

In [None]:
for name, group in df.groupby('article'):
        for energy, data in group.groupby('energy'):
            if ( ((data.div_beta)>=0.8)&((data.div_beta)<1.2) ).all() :
                 df.loc[(name,energy),'C2'] = True
            else:
                
                df.loc[(name,energy),'C2'] = False
                print(name, energy)

### D uncertainties >100%

In [None]:
for name, group in df.groupby('article'):
        for energy, data in group.groupby('energy'):
            if  (data.sf_err_up.max() < 1.2) :
                df.loc[(name,energy),'D'] = True
            else:
                if energy == 0.0 :
                    df.loc[(name,energy),'D'] = True
                else: 
                    df.loc[(name,energy),'D'] = False  
                    print(name, energy)

### E CDF > 10**-5

In [None]:
for name, group in df.groupby('article'):
        for energy, data in group.groupby('energy'):
            if  (energy == 0.0) :
                cdf=scipy.stats.norm.cdf(0,data.alfa_fit.values,data.alfa_fit_err.values)
                if (cdf > 10**-5).any():
                    df.loc[(name),'E'] = False
                    print(name, energy)
                else:
                    df.loc[(name),'E'] = True    
                    

### F error SF  >50%

In [None]:
for name, group in df.groupby('article'):
        for energy, data in group.groupby('energy'):
            if  (energy == 0.0) :
                er=data.loc[name].sf_error / data.loc[name].sf
                if (er > 0.5).any():
                    df.loc[(name),'F'] = False
                    print(name, energy)
                else:
                    df.loc[(name),'F'] = True

### G alfa ref > alfa prot

In [None]:
for name, group in df.groupby('article'):
    alfa_ref = df.loc[name].loc[0].alfa_fit[0]
    for energy, data in group.groupby('energy'):
        if energy > 0 :
            alfa = data.alfa_fit.values[0]
            
            if (alfa_ref > alfa) : 
                df.loc[(name,energy),'G'] = False
                print(name, energy)
            else:
                df.loc[(name,energy),'G'] = True
        else:
            df.loc[(name,energy),'G'] = True


# Delete protons data when references=0

In [None]:
for name, group in df.groupby('article'):
    #print(group[group.index.get_level_values('energy') == 0.0])  
    if (group[group.index.get_level_values('energy') == 0.0].A == False).all() :
        df.loc[(name),'A'] = False
        
    if (group[group.index.get_level_values('energy') == 0.0].B == False).all() :       
        df.loc[(name),'B'] = False
        
    if (group[group.index.get_level_values('energy') == 0.0].C2 == False).all() :           
        df.loc[(name),'C2'] = False
      
    if (group[group.index.get_level_values('energy') == 0.0].C == False).all() :         
        df.loc[(name),'C'] = False
        
    if (group[group.index.get_level_values('energy') == 0.0].D == False).all() :         
        df.loc[(name),'D'] = False
        
    if (group[group.index.get_level_values('energy') == 0.0].E == False).all() :         
        df.loc[(name),'E'] = False
        
    if (group[group.index.get_level_values('energy') == 0.0].F == False).all() :         
        df.loc[(name),'F'] = False
        
    if (group[group.index.get_level_values('energy') == 0.0].G == False).all() :         
        df.loc[(name),'G'] = False


## Check data without outliers

In [None]:
df2 = df[ (df.G == True)  ] 
#&

In [None]:
df2 = df2[["alfa(fit)/beta(fit)","alfa_fit","alfa_fit_err","beta_fit","beta_fit_err","covar_fit",'a(fit)/a_ref(fit)','a/a_ref',"LET"]]

In [None]:
for name, group in df2.groupby('article'):
    for energy, data in group.groupby('energy'):
        print(name, energy)

## Check outliers

In [None]:
df3 = df[  (df.G == False)] 
#\

In [None]:
for name, group in df3.groupby('article'):
    for energy, data in group.groupby('energy'):
        print(name, energy)

# Save temporary files

In [None]:
hdf_fname = os.path.join('tmp','q.h5')
df2.to_hdf(hdf_fname, 'data_6', format='table')