# Import libraries

In [52]:
import os

import pandas as pd
import numpy as np
import scipy.stats

# Read data

In [53]:
fname = os.path.join('tmp','fitted_data.h5')
df = pd.read_hdf(fname, 'data')

In [54]:
#df['diff_alpha']=df['alpha']-df['alpha_fit']
df['div_alpha']=df['alpha']/df['alpha_fit']
#df['diff_beta']=df['beta']-df['beta_fit']
df['div_beta']=df['beta']/df['beta_fit']

# Conditions for the rejection of data

### A. number of experimental points <=4

In [55]:
for name, group in df.groupby('article'):
        for energy, data in group.groupby('energy'):
            if  (len(data.index))>4 :
                 df.loc[(name,energy),'A'] = True
            else:
                df.loc[(name,energy),'A'] = False
                print(name, energy)

### B beta <=0

In [56]:
for name, group in df.groupby('article'):
        for energy, data in group.groupby('energy'):
            if  ((data.beta_fit)>=0.0).all() :
                 df.loc[(name,energy),'B'] = True
            else:
                df.loc[(name,energy),'B'] = False   
                print(name, energy)

### C differences in alfa parameters

In [57]:
for name, group in df.groupby('article'):
        for energy, data in group.groupby('energy'):
            if ( ((data.div_alpha)>0.8)&((data.div_alpha)<1.2) ).all() :
                 df.loc[(name,energy),'C'] = True
            else:                                 
                df.loc[(name,energy),'C'] = False
                print(name, energy)

12 3660
13SQ 0
13SQ 880


### C2 differences in beta parameters

In [58]:
for name, group in df.groupby('article'):
        for energy, data in group.groupby('energy'):
            if ( ((data.div_beta)>=0.8)&((data.div_beta)<1.2) ).all() :
                 df.loc[(name,energy),'C2'] = True
            else:
                
                df.loc[(name,energy),'C2'] = False
                print(name, energy)

10 3200
11 0
12 1070
12 3660
13HF 0
13HF 880
13HF 1490
13HF 5040
13M 0
13M 1350
13M 4050
13SC 0
13SC 880
13SC 1490
13SQ 0
13SQ 880
13SQ 1490
13SQ 5040
16 880
17D 5010
17H 0


### D uncertainties >100%

In [59]:
for name, group in df.groupby('article'):
        for energy, data in group.groupby('energy'):
            if  (data.sf_err_up.max() < 1.2) :
                df.loc[(name,energy),'D'] = True
            else:
                if energy == 0.0 :
                    df.loc[(name,energy),'D'] = True
                else: 
                    df.loc[(name,energy),'D'] = False  
                    print(name, energy)

13M 1350


### E CDF > 10**-5

In [60]:
for name, group in df.groupby('article'):
        for energy, data in group.groupby('energy'):
            if  (energy == 0.0) :
                cdf=scipy.stats.norm.cdf(0,data.alpha_fit.values,data.alpha_fit_err.values)
                if (cdf > 10**-5).any():
                    df.loc[(name),'E'] = False
                    print(name, energy)
                else:
                    df.loc[(name),'E'] = True    
                    

13SQ 0


### F error SF  >50%

In [61]:
for name, group in df.groupby('article'):
        for energy, data in group.groupby('energy'):
            if  (energy == 0.0) :
                er=data.loc[name].sf_error / data.loc[name].sf
                if (er > 0.5).any():
                    df.loc[(name),'F'] = False
                    print(name, energy)
                else:
                    df.loc[(name),'F'] = True

17H 0


### G alfa ref > alfa prot

In [64]:
for name, group in df.groupby('article'):
    alpha_ref = df.loc[name].loc[0].alpha_fit[0]
    for energy, data in group.groupby('energy'):
        if energy > 0 :
            alpha = data.alpha_fit.values[0]
            
            if (alpha_ref > alpha) : 
                df.loc[(name,energy),'G'] = False
                print(name, energy)
            else:
                df.loc[(name,energy),'G'] = True
        else:
            df.loc[(name,energy),'G'] = True


13HF 880
13HF 1490
13HF 5040
13M 1350
13M 4050
13SC 5040
16 5040
17H 5010


# Delete protons data when references=0

In [65]:
for name, group in df.groupby('article'):
    #print(group[group.index.get_level_values('energy') == 0.0])  
    if (group[group.index.get_level_values('energy') == 0.0].A == False).all() :
        df.loc[(name),'A'] = False
        
    if (group[group.index.get_level_values('energy') == 0.0].B == False).all() :       
        df.loc[(name),'B'] = False
        
    if (group[group.index.get_level_values('energy') == 0.0].C2 == False).all() :           
        df.loc[(name),'C2'] = False
      
    if (group[group.index.get_level_values('energy') == 0.0].C == False).all() :         
        df.loc[(name),'C'] = False
        
    if (group[group.index.get_level_values('energy') == 0.0].D == False).all() :         
        df.loc[(name),'D'] = False
        
    if (group[group.index.get_level_values('energy') == 0.0].E == False).all() :         
        df.loc[(name),'E'] = False
        
    if (group[group.index.get_level_values('energy') == 0.0].F == False).all() :         
        df.loc[(name),'F'] = False
        
    if (group[group.index.get_level_values('energy') == 0.0].G == False).all() :         
        df.loc[(name),'G'] = False


## Check data without outliers

In [66]:
df2 = df[ (df.G == True)  ] 
#&

In [70]:
df2 = df2[["alpha_fit","alpha_fit_err","beta_fit","beta_fit_err","covar_fit",'a(fit)/a_ref(fit)','a/a_ref',"LET"]]

In [71]:
for name, group in df2.groupby('article'):
    for energy, data in group.groupby('energy'):
        print(name, energy)

10 0
10 1410
10 3200
10 5010
11 0
11 870
11 1460
11 3180
12 0
12 1070
12 1830
12 3660
13HF 0
13M 0
13SC 0
13SC 880
13SC 1490
13SQ 0
13SQ 880
13SQ 1490
13SQ 5040
16 0
16 880
17D 0
17D 5010
17H 0


## Check outliers

In [72]:
df3 = df[  (df.G == False)] 
#\

In [73]:
for name, group in df3.groupby('article'):
    for energy, data in group.groupby('energy'):
        print(name, energy)

13HF 880
13HF 1490
13HF 5040
13M 1350
13M 4050
13SC 5040
16 5040
17H 5010


# Save temporary files

In [None]:
#hdf_fname = os.path.join('tmp','q.h5')
#df2.to_hdf(hdf_fname, 'data_6', format='table')