In [None]:
%matplotlib inline
from ROOT import TFile, TTree
import numpy as np
import matplotlib.pylab as plt
import pandas as pd
from root_numpy import root2array
from collections import OrderedDict
from likelihood_fitter import computeHypothesis

In [None]:
filebase = '/Users/davidkaleko/larlite/UserDev/LowEnergyExcess/output/'
filebase += '70KV/perfect_reco/collabmtg_feb2016_nopid/'

filenames = OrderedDict([('nue','singleE_nue_selection_mc.root'),
                         ('numu','singleE_numu_selection_mc.root'),
                         ('nc','singleE_nc_selection_mc.root'),
                         ('cosmic','singleE_cosmic_selection_mc.root'),# ])
                         ('lee','singleE_LEE_selection_mc.root')
                         ])
treenames = { 'nue' : 'beamNuE',
             'cosmic' : 'cosmicShowers',
             'numu' : 'beamNuMu',
             'nc' : 'beamNC',
            'lee' : 'LEETree'}
labels = { 'nue' : 'Beam Intrinsic Nue',
         'cosmic' : 'CRY Cosmic (Scaled to BGW Exposure Time)',#'CRY Cosmic, in-time',
         'numu' : 'Beam Intrinsic Numu',
         'nc' : 'Beam Intrinsic NC', 
         'lee' : 'Scaled Low Energy Excess',
        'nuefrommu' : 'Nues from Muons',
          'nuefromK0' : 'Nues from K0s',
          'nuefromKCharged' : 'Nues from Charged Kaons'#,
#          'nuefrompion' : 'Nues from Pions'
          }
colors = { 'nue' : '#269729', #kGreen-2
         'numu' : '#4B4EAC', #kBlue-5
          'nc' : '#6B70F5', #kBlue-9
          'cosmic' : '#D12C2C', #kRed-3
          'lee' : '#E65C00', #orangish
          'nuefrommu' : '#3F7252',
          'nuefromK0' : '#58A173',
          'nuefromKCharged' : '#76DF9D'#,
  #        'nuefrompion' : '#C6FFDB'
          }

#binning = np.linspace(0.05,3,15)
binning = np.linspace(0.1,3,15)
#binning = np.linspace(0,1,50)
#binning = np.linspace(0.1,3,15)
#binning = np.linspace(-1,1,39)
scaling_weights = { 'nue' : 6.6e20/(2.706e15*99600), #should be 99600, used 96000 for collab meeting
                    'nuefrommu' : 6.6e20/(2.706e15*99600),
                    'nuefromK0' : 6.6e20/(2.706e15*99600),
                    'nuefromKCharged' : 6.6e20/(2.706e15*99600),
                   # 'nuefrompion' : 6.6e20/(2.706e15*99600),
                    'cosmic' : 2.52, #(211,000 ms total exposure)/(6.4ms * 13100 evts generated)
                    'numu' : 6.6e20/(2.706e15*99600),
                    'nc' : 6.6e20/(2.706e15*99600),
                    'lee' : 1}
#5cm from all sides
#fidvolcut = '_x_vtx > 5 and _x_vtx < 251.35 and '
#WARNING WHEN YOU USE XSHIFT YOU CANNOT USE _x_vtx AS FIDUCIAL VOLUME CUT!
fidvolcut = '_y_vtx > -111.5 and _y_vtx < 111.5 and _z_vtx > 5 and _z_vtx < 1031.8'
#10cm from all sides
#fidvolcut = '_x_vtx > 10 and _x_vtx < 246.35 and _y_vtx > -106.5 and _y_vtx < 
#106.5 and _z_vtx > 10. and _z_vtx < 1026.8'
tracklencut = '_longestTrackLen < 100.'
defaultcut = '_e_Edep > 50.'
BGWcut = '_flash_time > 0. and _flash_time < 0.2'
hypothcut = 'hypothesis > 0.69' #optimized
plot_variable = '_e_nuReco'#'ptoverp'


In [None]:
group_nues = True
dfs = OrderedDict()
for key, filename in filenames.iteritems():
    if group_nues:
         dfs.update( { key : pd.DataFrame( root2array( filebase + filename, treenames[key] ) ) } )
    else:
        if key != 'nue':
            dfs.update( { key : pd.DataFrame( root2array( filebase + filename, treenames[key] ) ) } )
        else:
            dfs.update( { 'nuefrommu' : pd.DataFrame(root2array( filebase + filename, treenames[key] ) ).query('_ptype==1 or _ptype==2') } )
            dfs.update( { 'nuefromK0' : pd.DataFrame(root2array( filebase + filename, treenames[key] ) ).query('_ptype==3') } )
            dfs.update( { 'nuefromKCharged' : pd.DataFrame(root2array( filebase + filename, treenames[key] ) ).query('_ptype==4') } )
            #dfs.update( { 'nuefrompion' : pd.DataFrame(root2array( filebase + filename, treenames[key] ) ).query('_ptype==2') } )
            #Keep the grouped nue anyway, used for likelihood
            dfs.update( { 'nue' : pd.DataFrame(root2array( filebase + filename, treenames[key] ) ) } )

In [None]:
#Compute the likelihood column:
fitparams = computeHypothesis(dfs)
#For future crosschecks, my fit params currently are:
#[-0.40574592 -2.4498874  -1.77060787]
print fitparams

In [None]:
def gen_histos(myquery='',plotvar = '_e_nuReco',scalefactor = 1.):
    nphistos = OrderedDict()

    for key, df in dfs.iteritems():
        mydf = df.query(myquery) if myquery else df
        if key == 'cosmic':
            myweights = np.ones(mydf[plot_variable].shape[0])
        else:
            myweights = np.array(mydf['_weight'])
        
        myweights *= scaling_weights[key]

        nphistos.update( {key : np.histogram(mydf[plotvar]/scalefactor,
                                         bins=binning,
                                         weights=myweights)} )
    return nphistos

In [None]:
def plot_fullstack(myhistos,plotvar = '_e_nuReco'):
    fig = plt.figure(figsize=(10,6))
    plt.grid(True)
    lasthist = 0
    for key, (hist, bins) in myhistos.iteritems():
        if not group_nues and key == 'nue': continue
        plt.bar(bins[:-1],hist,
                width=bins[1]-bins[0],
                color=colors[key],
                bottom = lasthist,
                edgecolor = 'k',
                label='%s: %d Events'%(labels[key],sum(hist)))
        lasthist += hist

    #plt.ylim([0,180])
    plt.title('CCSingleE Stacked Backgrounds',fontsize=25)
    plt.ylabel('Events',fontsize=20)
    if plotvar == '_e_nuReco' or plotvar == '_e_nuReco_better':
        xstring = 'Reconstructed Neutrino Energy [GeV]' 
    elif plotvar == '_e_CCQE':
        xstring = 'CCQE Energy [GeV]'
    else:
        xstring = plotvar
    plt.xlabel(xstring,fontsize=20)
    plt.xticks(list(plt.xticks()[0]) + [binning[0]])
    plt.xlim([binning[0],binning[-1]])
    plt.legend()

In [None]:
def computeSigma(histos,POT = 6.6,ignoreCosmics=False):
    
    POTscalefactor = POT/6.6
    #it is just, Δχ2 = (number of events signal in Energy bins in a 1D matrix) 
    #(2D Martix - (statistical uncertainty)^2 in a the diagonal of the matrix)^-1 
    #(number of events signal in Energy bins in a 1D matrix)^Transpose
    signal = 'lee'
    
    backgr = ['nue','numu', 'nc']
    if not group_nues:
        backgr = [ 'nuefrommu', 'nuefromK0', 'nuefromKCharged', 'numu', 'nc' ]
        
    if not ignoreCosmics: backgr.append('cosmic')
    print "backgrs = ",backgr
    nbins = len(histos['lee'][1])-1

    bkvec  = np.zeros((1,nbins))
    emtx   = np.zeros((nbins,nbins))

    sigvec = np.array(np.array(histos[signal][0])).reshape((1,nbins))
    sigvec = sigvec * POTscalefactor
    
    for x in backgr:
        bkvec = bkvec + np.array(histos[x][0])
    bkvec.reshape((1,nbins))
    bkvec = bkvec * POTscalefactor

    for x in xrange(nbins):
        emtx[x][x] = bkvec[0][x]

    emtxinv = np.linalg.inv(emtx)

    chisq = float(sigvec.dot(emtxinv).dot(sigvec.T))
    #print "Sqrt of that (==sigma?) is ",np.sqrt(chisq)
    return np.sqrt(chisq)

In [None]:
def plotFlashtimes():
    
    mykeys = ['cosmic', 'nue']
    if not group_nues:
        mykeys = ['cosmic', 'nuefrommu']
    tmplbl = {'cosmic':'Cosmics', 'nuefrommu':'Nues from Muons', 'nue':'Nues'}
    fig = plt.figure(figsize=(10,6))
    plt.grid(True)
    mybins = np.linspace(-1200,3500,100)
    plt.yscale('log')
    for key in mykeys:
        plt.hist(dfs[key]['_flash_time'],
                 bins=mybins,
                 color=colors[key],
                label=tmplbl[key])
    plt.legend()
    plt.title('Reconstructed Nue Matched Flash Time w.r.t. Trigger',fontsize=16)
    plt.xlabel('Time of Nue Matched Flash w.r.t. Trigger [us]',fontsize=14)
    plt.ylabel('Counts (Not normalized in any way!)',fontsize=14)

In [None]:
plotFlashtimes()

In [None]:
binning = np.linspace(0.1,3,25)
query = defaultcut+' and '+fidvolcut
print query
hists = gen_histos(myquery=query,plotvar='_e_nuReco_better',scalefactor=1000.)
hists.pop('lee',None)
plot_fullstack(hists,plotvar='_e_nuReco_better')

In [None]:
binning = np.linspace(0.1,3,25)
#query = defaultcut+' and '+fidvolcut
query = defaultcut+' and _y_vtx < 111.5 and _y_vtx > -111.5 and _z_vtx > 5. and _z_vtx < 1031.8'
#query += ' and _has_muon_child == 0'
#query += ' and _n_nues_in_evt == 1'
#query += ' and _maybe_pi0_MID == 0'
hists = gen_histos(myquery=query,plotvar='_e_nuReco_better',scalefactor=1000.)
plot_fullstack(hists,plotvar='_e_nuReco_better')
sigma = computeSigma(hists,POT = 6.6,ignoreCosmics=False)
blah = plt.text(1.25,plt.gca().get_ylim()[1]*0.55,'Sigma (Stat only): %0.2f'%sigma,fontsize=24)

In [None]:
binning = np.linspace(0.1,3,25)
print fidvolcut
query = defaultcut+' and _y_vtx < 111.5 and _y_vtx > -111.5 and _z_vtx > 5. and _z_vtx < 1031.8'
query += ' and hypothesis > 0.05'
hists = gen_histos(myquery=query,plotvar='_e_nuReco_better',scalefactor=1000.)
plot_fullstack(hists,plotvar='_e_nuReco_better')
sigma = computeSigma(hists,POT = 6.6,ignoreCosmics=False)
blah = plt.text(1.5,plt.gca().get_ylim()[1]*0.42,'Sigma (Stat only): %0.2f'%sigma,fontsize=24)

In [None]:
binning = np.linspace(0.1,3,25)
query = defaultcut+' and _y_vtx < 111.5 and _y_vtx > -111.5 and _z_vtx > 5. and _z_vtx < 1031.8'
hists = gen_histos(myquery=query,plotvar='_e_nuReco_better',scalefactor=1000.)
hists.pop('cosmic',None)
plot_fullstack(hists,plotvar='_e_nuReco_better')
sigma = computeSigma(hists,POT = 6.6,ignoreCosmics=True)
blah = plt.text(1.25,plt.gca().get_ylim()[1]*0.55,'Sigma (Stat only): %0.2f'%sigma,fontsize=24)

In [None]:
binning = np.linspace(0.1,3,25)
query = defaultcut+' and _y_vtx < 111.5 and _y_vtx > -111.5 and _z_vtx > 5. and _z_vtx < 1031.8'
hists = gen_histos(myquery=query,plotvar='_e_CCQE',scalefactor=1000.)
plot_fullstack(hists,plotvar='_e_CCQE')
sigma = computeSigma(hists,POT = 6.6,ignoreCosmics=False)
blah = plt.text(1.25,plt.gca().get_ylim()[1]*0.55,'Sigma (Stat only): %0.2f'%sigma,fontsize=24)

In [None]:
binning = np.linspace(0.1,3,25)
query = defaultcut+' and _y_vtx < 111.5 and _y_vtx > -111.5 and _z_vtx > 5. and _z_vtx < 1031.8'
query += ' and hypothesis > 0.05'
hists = gen_histos(myquery=query,plotvar='_e_CCQE',scalefactor=1000.)
plot_fullstack(hists,plotvar='_e_CCQE')
sigma = computeSigma(hists,POT = 6.6,ignoreCosmics=False)
blah = plt.text(1.25,plt.gca().get_ylim()[1]*0.45,'Sigma (Stat only): %0.2f'%sigma,fontsize=24)

In [None]:
POTvals = np.linspace(0.1,6.6,10)
binning = np.linspace(0.1,3,25)
query = defaultcut+' and _y_vtx < 111.5 and _y_vtx > -111.5 and _z_vtx > 5. and _z_vtx < 1031.8'
query += ' and hypothesis > 0.05'
myhists = gen_histos(myquery=query,plotvar='_e_nuReco_better',scalefactor=1000.)
sigvals = [ computeSigma(myhists,POT=myPOT) for myPOT in POTvals ]
print "Significance at 6.6 is ",sigvals[-1]
plt.figure(figsize=(10,6))
plt.plot(POTvals,sigvals,'ro--')
plt.grid(True)
plt.xlabel("POT Delivered [e20]",fontsize=16)
plt.ylabel("Sqrt(deltaChiSquare) [sigma]",fontsize=16)
#plt.ylim((0,5))
blah= plt.title("Sigma vs. POT: LEE Analysis",fontsize=20)

In [None]:
from scipy import stats


myhists = gen_histos(myquery='', plotvar='_e_nuReco',scalefactor=1000.)


signal = 'lee'
backgr = [ 'numu', 'nue' ]#, 'nc' ]
nbins = len(myhists['nue'][1])-1
sigvec = np.array(np.array(myhists['lee'][0]))
bkvec  = np.zeros(nbins)
POTscalefactor = 1.
sigvec = sigvec * POTscalefactor
    

for x in backgr:
    bkvec = bkvec + np.array(myhists[x][0])
sigvec = sigvec + bkvec
    
print np.sqrt(stats.chisquare(sigvec,f_exp=bkvec))