In [6]:
from __future__ import division, print_function
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import matplotlib.patches as mpatches
from matplotlib.backends.backend_pdf import PdfPages
from functools import partial, reduce
from itertools import combinations
from scipy.stats import percentileofscore
%matplotlib inline 

In [7]:
class Rfilter(object):

    def __init__(self, hadron='pi+', fudge=[0, 0]):
        self.M = 0.938
        self.M2 = self.M**2
        self.set_Mh(hadron)
        

        self.fudge1 = fudge[0]
        self.fudge2 = fudge[1]

        self.MiT2 = 0.5 + 0.3 * fudge[0]
        self.MfT2 = 0.5 + 0.3 * fudge[1]
        self.kT = 0
    
        self.MX = 1.3
        self.Ma = 1.5
        self.Mb = 0.3
        self.deltaM = 0.3
        self.MJ = 0.3
        
        self.MiT = self.MiT2**0.5
        self.MfT = self.MfT2**0.5
        self.kT2 = self.kT**2
        
        # from paper, we need kf2, deltakT2, ki2
        # we need to have a way to vary them!
        self.kf       = 0
        self.ki       = 0
        self.kf2      = self.kf**2
        self.ki2      = self.ki**2
        self.deltakT  = 0
        self.deltakT2 = self.deltakT**2
    def set_ki(self,ki):
        self.ki = ki
        self.ki2 = ki**2
    def set_kf(self,kf):
        self.kf = kf
        self.kf2 = kf**2
    def set_kT(self,kT):
        self.kT = kT
        self.kT2 = kT**2
    def set_deltakT(self,deltakT):
        self.deltakT = deltakT
        self.deltakT2 = self.deltakT**2
    def set_Mh(self, hadron):
        if hadron == 'pi+':
            self.Mh = 0.135  #  
        if hadron == 'pi-':
            self.Mh = 0.135  #  
        if hadron == 'pi0':
            self.Mh = 0.135  #  
        if hadron == 'k+':
            self.Mh = 0.493
        if hadron == 'k-':
            self.Mh = 0.493
        if hadron == 'k0':
            self.Mh = 0.493
        if hadron == 'h+':
            self.Mh = 0.135
        if hadron == 'h-':
            self.Mh = 0.135
        self.Mh2 = self.Mh**2
    
    def get_ki2(self):
        return self.ki2
    def get_kf(self):
        return self.kf
    
    def get_W2(self, x, Q2):
        W2 = Q2 * (1. - x) / x + self.M2
        return W2

    def get_MBT(self, PhT):
        MBT = np.sqrt( self.Mh2 + PhT**2 )
        return MBT
        
    def get_MiT(self, x, Q2):
        xn = self.get_xn(x, Q2)
#    self.MiT = np.sqrt( (xn*self.kT**2 + xn*self.MX**2 - (1-xn)*xn*self.M2)/(1-xn) )
        self.MiT = np.sqrt((xn * self.kT**2 + xn * (self.Ma + self.Mb / np.sqrt(xn))
                            ** 2 - (1 - xn) * xn * self.M2 + self.deltaM**2 * self.fudge1) / (1 - xn))
        return self.MiT

    def get_MfT(self):
        self.MfT = np.sqrt(self.kT**2 + self.MJ**2 +
                           self.deltaM**2 * self.fudge2)
        return self.MfT

    def get_xn(self, x, Q2):
        return 2 * x / (1 + np.sqrt(1 + 4 * x**2 * self.M2 / Q2))

    def get_yh(self, x, z, Q2, PhT, hadron, sign=-1):
        xn = self.get_xn(x, Q2)
        self.set_Mh(hadron)
        expy = Q2**0.5 * z * (Q2 - xn**2 * self.M2)\
            / (2 * self.M2 * xn**2 * (self.Mh2 + PhT**2)**0.5)\
            + sign * Q2**0.5 / (xn * self.M) * (z**2 * (Q2 - xn**2 * self.M2)**2
                                        / (4 * self.M2 * xn**2 * (self.Mh2 + PhT**2)) - 1)**0.5
        return np.log(expy)


    # zn
    def get_zn(self, x, z, Q2, PhT, hadron):
        xn = self.get_xn(x,Q2)
        self.set_Mh(hadron)
        MBT = self.get_MBT(PhT)
        zn =  xn * z/ (2 * x) * ( 1. + np.sqrt( 1.- 4. * self.M2 * MBT**2 * x**2 /  ( Q2**2 * z**2) ) )
        return zn
        
# rapidity of the target

    def get_yp(self, x, Q2):
        xn = self.get_xn(x, Q2)
        return np.log(np.sqrt(Q2) / (xn * self.M))

    def get_yi(self, Q2):
        return 0.5 * np.log(Q2 / self.MiT**2)

    def get_yf(self, Q2):
        return -0.5 * np.log(Q2 / self.MfT**2)

    def get_MhT(self, PhT):
        return np.sqrt(self.Mh2 + PhT**2)

    def get_R(self, x, z, Q2, PhT, hadron):
        self.set_Mh(hadron)
        MfT = self.get_MfT()
        MiT = self.get_MiT(x, Q2)
        yi = self.get_yi(Q2)
        yf = self.get_yf(Q2)
        MhT = self.get_MhT(PhT)
        yh = self.get_yh(x, z, Q2, PhT, hadron)
        zn = self.get_zn(x, z, Q2, PhT, hadron)
        znhat = zn/z
        # from paper..
        Ph_kf = 0.5 * MhT * MfT * (np.exp(yf - yh) + np.exp(yh - yf)) - znhat/zn * PhT**2 - PhT * self.kT
        Ph_ki = 0.5 * MhT * MiT * (np.exp(yi - yh) - np.exp(yh - yi)) - PhT * self.kT
        return np.abs(Ph_kf / Ph_ki)

# R0, Eq. (4.14)...
    def get_R0(self, Q2):
        """
        Collinearity ratio defined in the paper Eq. (4.15)
        """
        return  np.maximum(np.maximum(self.ki2/Q2,self.kf2/Q2),self.kT2/Q2)
        
        
# We call R1 in the new paper what was R in the previous...
    def get_R1(self, x, z, Q2, PhT, hadron):
        """
        Collinearity ratio defined in the paper Eq. (4.15)
        """
        return  self.get_R( x, z, Q2, PhT, hadron)  

# R2 from Eq. (4.17)        
    def get_R2(self, x, z, Q2, PhT, hadron):
        self.set_Mh(hadron)
        zn = self.get_zn(x, z, Q2, PhT, hadron)
        znhat = zn/z
        qT = -PhT/zn
        # from paper..
        return np.abs( -(1.-znhat) - znhat*qT**2/Q2 - (1.-znhat)*self.kf2/(znhat*Q2) - \
                      self.deltakT2/(znhat*Q2) + 2.*qT*self.deltakT/Q2 )

# Proximity is defined for a sphere of a selected radius(p) by the following function
def Proximity(files,path,domain,p,fudge):
    """
    data   = any Pandas DataFrame should do as long as it contains x,z,Q2, and pT
    domain = an array of values you wish to extract R0,R1,R2 
    p      = radius of sphere representing the demarcation of the proximity
    hadron = any hadron found in the data
    fudge  = idk what fudge is so I did the right thing and passed it along 
    """
    proximity = {}
    for file in files:
        data = pd.read_excel(path + file)
        #print(len(data))
       #proximity.update() = file
        for i in range(0,len(data)):
            hadron = data.loc[i,'hadron']
            #print(hadron)
            #print(file)
            Rval = Rfilter(hadron = hadron,fudge=fudge)
            inside = 0.
            outside = 0.
            
            for ki in domain:
                Rval.set_ki(ki)
                for kf in domain:
                    Rval.set_kf(kf)
                    for kT in domain:
                        Rval.set_kT(kT)
                        Rval.set_deltakT(kT)
            
                        R0 = Rval.get_R0(data.loc[i,'Q2'])
                        R1 = Rval.get_R1(data.loc[i,'x'],data.loc[i,'z'],data.loc[i,'Q2'],data.loc[i,'pT'], \
                                     hadron = hadron)
                        R2 = Rval.get_R2(data.loc[i,'x'],data.loc[i,'z'],data.loc[i,'Q2'],data.loc[i,'pT'], \
                                     hadron = hadron)
                        p2 = R0**2 + R1**2+ R2**2
                        
                        if p2 < 1:
                            inside += 1.
                        
                        else:
                            outside += 1.
            if i == 0:
                proximity.update({file : {i : inside/(inside+outside)}})
            else:
                
                proximity[file].update( { i : inside/(inside+outside)})
        
    return proximity

In [18]:
files = ['1000.xlsx','1001.xlsx','1002.xlsx','1003.xlsx','1004.xlsx','1005.xlsx',
         '1006.xlsx','1007.xlsx','1008.xlsx','1009.xlsx']

path = '~/GIT/collinearity/database/sidis/expdata/'
domain = np.linspace(.05,.8,3)
p = 1
fudge = [0,0]
proximity = Proximity(files,path,domain,p,fudge)

KeyboardInterrupt: 

In [None]:
proximity

In [None]:
# Dictionary keys for the proximity dictionary
proximity.keys()

In [None]:
# Read HERMES data files into dictionary of dataframes and add on proximity column
files = ['1000.xlsx','1001.xlsx','1002.xlsx','1003.xlsx','1004.xlsx','1005.xlsx',
         '1006.xlsx','1007.xlsx','1008.xlsx','1009.xlsx']
path = '~/GIT/collinearity/database/sidis/expdata/'
data = {}
for file in files:
    #data = pd.read_excel(path + file)
    dat = pd.read_excel(path + file)
    data.update({file : dat})
    data[file].loc[:,'proximity'] = proximity[file].values()

In [None]:
#print(data.keys(),'\n\n',data['1000.xlsx'].head())
for key in data.keys():
    print(key,'\n',data[key].head(),'\n\n')

In [14]:
# Selects the proximity data of a particular dataframe 
data['1008.xlsx'].loc[:,'proximity']

0       1.0
1       1.0
2       1.0
3       1.0
4       1.0
       ... 
2327    0.0
2328    0.0
2329    0.0
2330    0.0
2331    0.0
Name: proximity, Length: 2332, dtype: float64

In [15]:
tab = pd.read_excel('1008_Affinity.xlsx')

In [16]:
tab

Unnamed: 0.1,Unnamed: 0,Ebeam,x,Q2,y,z,pT,pT2,obs,value,stat_u,sys_u,target,hadron,col,F2,Aff
0,0,160,0.1570,20.0,0.439,0.2,0.300000,0.09,M_Compass,6.2719,0.3126,0.1051,deuteron,h+,compass,0.64392,0.518
1,1,160,0.1570,20.0,0.439,0.2,0.331662,0.11,M_Compass,6.2175,0.3135,0.1047,deuteron,h+,compass,0.64392,0.475
2,2,160,0.1570,20.0,0.439,0.2,0.360555,0.13,M_Compass,5.0537,0.2711,0.1070,deuteron,h+,compass,0.64392,0.476
3,3,160,0.1570,20.0,0.439,0.2,0.400000,0.16,M_Compass,4.8854,0.2244,0.1067,deuteron,h+,compass,0.64392,0.439
4,4,160,0.1570,20.0,0.439,0.2,0.424264,0.18,M_Compass,4.1757,0.2275,0.1101,deuteron,h+,compass,0.64392,0.430
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2327,2327,160,0.0389,1.4,0.122,0.6,1.330413,1.77,M_Compass,0.0057,0.0009,0.1207,deuteron,h+,compass,0.00000,0.000
2328,2328,160,0.0389,1.4,0.122,0.6,1.396424,1.95,M_Compass,0.0033,0.0006,0.1367,deuteron,h+,compass,0.00000,0.000
2329,2329,160,0.0389,1.4,0.122,0.6,1.483240,2.20,M_Compass,0.0014,0.0003,0.1765,deuteron,h+,compass,0.00000,0.000
2330,2330,160,0.0389,1.4,0.122,0.6,1.581139,2.50,M_Compass,0.0016,0.0004,0.1795,deuteron,h+,compass,0.00000,0.000
