In [None]:
%matplotlib inline
from ROOT import TFile, TTree

import numpy as np
import matplotlib.pylab as plt
import pandas as pd
import re

from root_numpy import root2array


In [None]:
filedir = '/Users/davidkaleko/larlite/UserDev/KalekoAna/TrackStudy/mac/polished_output/'
!ls $filedir

In [None]:
mcnu = 'TrackDataMCComp_XiaoEvts_MCC7BNBCosmics_173400evts_out.root'
mcbkg = 'TrackDataMCComp_XiaoEvts_INTIMECOSMICS_20602evts_out.root'
datanu = 'TrackDataMCComp_XiaoEvts_BNBFILTER_545906evts_out.root'
databkg = 'TrackDataMCComp_XiaoEvts_BNBEXT_379637evts_out.root'

files = { 'mcnu' : filedir + mcnu, 
          'mcbkg' : filedir + mcbkg,
         'datanu' : filedir + datanu, 
         'databkg' : filedir + databkg }

n_evts_analyzed = { 'mcbkg' : 20602, 'databkg' : 379637, 'datanu' : 545906, 'mcnu' : 173400 }

In [None]:
df_dict = {}

for sample, filepath in files.iteritems():
    df_dict[sample] = pd.DataFrame( root2array ( filepath, 'trk_tree' ) )

In [None]:
pot_per_sample = {'datanu': (float(n_evts_analyzed['datanu']) / 545906) * 4.88e+19, 
                  'databkg': (float(n_evts_analyzed['databkg']) / 379637) * 5.922589105698338e+19, 
                  'mcbkg': (float(n_evts_analyzed['mcbkg']) / 20602) *9.56562114062697e+18, 
                  'mcnu': (float(n_evts_analyzed['mcnu']) / 173400) *2.099007e+20}

In [None]:
print n_evts_analyzed
print pot_per_sample

In [None]:
print df_dict.keys()

In [None]:
print df_dict['databkg'].columns.values

In [None]:
def plotVariableComparisonBkg(myvar, mybins, myquery, mytitle, myshapeonly = False, myylims = None,\
                           myxlabel = 'test', myylabel = 'test'):

    plt.figure(figsize=(10,6))
    poop = plt.grid(True)
    plt.title(mytitle,fontsize=16)
  
    mydict_intime = df_dict['mcbkg']
    if myquery: mydict_intime = df_dict['mcbkg'].query(myquery)
    myweight_intime = (5.e19) / pot_per_sample['mcbkg']
    myvals_intime = mydict_intime[myvar].values
    nphist = np.histogram(myvals_intime,bins=mybins,
                          weights=[myweight_intime]*len(myvals_intime),
                          normed=myshapeonly)
    integral_intime = np.sum(nphist[0])
    
    poop = plt.hist(myvals_intime,bins=mybins,
                    label='MC: Cosmic Bkg. from In Time Cosmic Entries = %0.2f' % integral_intime,
                    alpha=0.5,
                    weights=[myweight_intime]*len(myvals_intime),
                    color='cyan',
                    stacked=False,
                    rwidth=1.)

    myextvals = df_dict['databkg'][myvar].values
    if myquery: myextvals = df_dict['databkg'].query(myquery)[myvar].values
    myextweight = (5.e19) / pot_per_sample['databkg']
    extintegral = 0.
    if len(myextvals):
        blah = plt.hist(myextvals,bins=mybins,color='g',
                        alpha=0,weights=[myextweight]*len(myextvals),normed=myshapeonly)
    
        yextvals = blah[0]
        xextvals = [blah[1][i]+(blah[1][i+1]-blah[1][i])/2. for i in xrange(len(blah[1][:-1]))]
        yerrs = np.sqrt(np.array(yextvals)*myextweight)
        extintegral = np.sum(blah[0])
        awefia = plt.errorbar(xextvals,yextvals,fmt='ro', yerr=yerrs,
                          label='BNB EXT DATA: Entries = %0.2f' % extintegral
                         )

    plt.ylim((0, plt.ylim()[1]))
    if myylims is not None:
        plt.ylim(myylims)
    leg = plt.legend()
    plt.xlabel(myxlabel,fontsize=16)
    plt.ylabel(myylabel,fontsize=16)
    dummy = leg.get_frame().set_alpha(0.5)

In [None]:
myvar = 'trk_len'
mybins = np.linspace(0,1000,20)
mytitle = 'Length of All Tracks (>3cm) In Event [Xiao Selected Events]'
myshapeonly = False
myquery = ''
myxlabel = 'Reco Track Length [cm]'
myylabel = 'Events (5e19 POT Normalized)'
myylims = (0,2000)
plotVariableComparisonBkg(myvar,mybins,myquery,mytitle,myshapeonly,myylims,myxlabel,myylabel)

In [None]:
def plotVariableComparison(myvar, mybins, myquery, mytitle, myshapeonly = False, myylims = None,\
                           myxlabel = 'test', myylabel = 'test'):

    plt.figure(figsize=(10,6))
    poop = plt.grid(True)
    plt.title(mytitle,fontsize=16)
    #plt.ylabel('Events: 5e19 POT Normalized',fontsize=16)
    
    mydict = df_dict['mcnu']
    if myquery: mydict = df_dict['mcnu'].query(myquery)
    #myvals = mydict[myvar].values
    myweight = (5.e19) / pot_per_sample['mcnu']
    
    myvals = mydict[myvar].values
    
    #integral = float(len(myvals))*myweight
    nphist = np.histogram(myvals,bins=mybins,
                          weights=[myweight]*len(myvals),
                          normed=myshapeonly)
    integral = np.sum(nphist[0])  
    
    poop = plt.hist(myvals,bins=mybins,
                    label='MC: BNB+Cosmic (Signal+Background): Entries = %0.2f' % integral,
                    alpha=0.5,
                    weights= [myweight]*len(myvals),
                    normed=myshapeonly,
                   color='b',
                    stacked=True,
                   rwidth=1.)

    mybnbdict = df_dict['datanu']    
    if myquery: mybnbdict = df_dict['datanu'].query(myquery)
    mybnbvals = mybnbdict[myvar].values
    mybnbweight = (5.e19) / pot_per_sample['datanu']
    
    blah = plt.hist(mybnbvals,bins=mybins,color='g',
                    alpha=0,weights=[mybnbweight]*len(mybnbvals),normed=myshapeonly)
    
    ybnbvals = blah[0]
    xbnbvals = [blah[1][i]+(blah[1][i+1]-blah[1][i])/2. for i in xrange(len(blah[1][:-1]))]
    bnbintegral = np.sum(blah[0])
    #awefia = plt.plot(xbnbvals,ybnbvals,'bo',
    #                  label='BNB DATA: Entries = %0.2f' % bnbintegral
    #                 )

    myextvals = df_dict['databkg'][myvar].values
    if myquery: myextvals = df_dict['databkg'].query(myquery)[myvar].values
    myextweight = (5.e19) / pot_per_sample['databkg']
    extintegral = 0.
    if len(myextvals):
        blah = plt.hist(myextvals,bins=mybins,color='g',
                        alpha=0,weights=[myextweight]*len(myextvals),normed=myshapeonly)
    
        yextvals = blah[0]
        xextvals = [blah[1][i]+(blah[1][i+1]-blah[1][i])/2. for i in xrange(len(blah[1][:-1]))]
        yerrs = np.sqrt(np.array(yextvals)*myextweight)
        #extintegral = float(len(myextvals))*myextweight
        extintegral = np.sum(blah[0])
        #awefia = plt.errorbar(xextvals,yextvals,fmt='ro', yerr=yerrs,
        #                  label='BNB EXT DATA: Entries = %0.2f' % extintegral
        #                 )

        diffintegral = bnbintegral-extintegral
        yerrs = np.sqrt(np.array(ybnbvals)*mybnbweight + np.array(yextvals)*myextweight)
        #if yerrs is zero, set equal to 1 event
        yerrs = [x if x else 1 for x in yerrs]
        awefia = plt.errorbar(xextvals,ybnbvals-yextvals,fmt='mo',yerr = yerrs,
                          label='BNB DATA - BNB EXT DATA (%0.2f)' % diffintegral
                         )
    else:
        diffintegral = bnbintegral
        yerrs = np.sqrt(np.array(ybnbvals)*mybnbweight)
        #if yerrs is zero, set equal to 1 event
        yerrs = [x if x else 1 for x in yerrs]
        awefia = plt.errorbar(xbnbvals,ybnbvals,fmt='mo',yerr = yerrs,
                          label='BNB DATA - BNB EXT DATA (%0.2f)' % diffintegral
                         )
    plt.ylim((0, plt.ylim()[1]))
    if myylims is not None:
        plt.ylim(myylims)
    leg = plt.legend()
    plt.xlabel(myxlabel,fontsize=16)
    plt.ylabel(myylabel,fontsize=16)
    leg.get_frame().set_alpha(0.5)

In [None]:
myvar = 'trk_len'
mybins = np.linspace(0,1000,20)
mytitle = 'Length of All Tracks (>3cm) In Event [Xiao Selected Events]'
myshapeonly = False
myquery = ''
myxlabel = 'Reco Track Length [cm]'
myylabel = 'Events (5e19 POT Normalized)'
myylims = (0,220)
plotVariableComparison(myvar,mybins,myquery,mytitle,myshapeonly,myylims,myxlabel,myylabel)

(onbeam - offbeam)                               vs mcc7bnbcosmic
overall, (onbeam - offbeam) has too many short tracks
just offbeam has too few short tracks
that means onbeam has way too many short tracks

let's say onbeam is 50 short tracks, offbeam has 30 short tracks.
expect 40 short tracks in offbeam from mc
onbeam minus offbeam is 20 short tracks, bnbcosmic has 15 short tracks

RUN ON IN TIME COSMICS
for events Xiao filter selects, histogram: all track lengths, all mctrack lengths
since # tracks and # mctracks don't equal, maybe compare shape
    -- this doesn't work because pandora removes its cosmic tracks automatically