# Get edge parameterization for different MC /data productions

1. Load production lists - filtered data from alien
2. Extract skimmed data - create pandas with subset of information
3. Make parameteratization of performance (as in  PWGPP-567/comparisondEdgePerf.ipynb)
* Variables to describe
  * Ncrossed raw and Ncl
  * Fraction of associated clusters
  * DCA resolution+bias
  * dEdx bias
  * TPC/ITS chi2
  
*  Variables binning resp. for ML:
  * local position (at radii)
  * 1/dEdx
  * multiplicity
  * q/pt
  

4.Visulaize/compare - regression maps  

In [None]:
import ROOT
from RootInteractive.Tools.aliTreePlayer import *
from RootInteractive.InteractiveDrawing.bokeh.bokehDrawSA import *
from bokeh.models import *
from bokeh.io import output_notebook
from RootInteractive.MLpipeline.NDFunctionInterface import  DataContainer, Fitter
import os.path 
import os
import subprocess
output_notebook()
c  = ROOT.TCanvas("canvas","canvas")

# Define parameters of notebook

In [None]:
productions=["LHC19f5b","LHC19f5b_2", "LHC19f5b_3"]
nChunks=10
dataFrames=[]
fittersCr=[]

In [None]:
#os.system("alien-token-init miranov")
ROOT.TGrid.Connect("alien","miranov")

# Prepare production lists
* list created only if does not exist
* only subset of data used

In [None]:
ROOT.gROOT.LoadMacro("$HOME/github/RootInteractiveTest/JIRA/Tools/rdataframeFilter.C")

In [None]:
if not os.path.isfile("filtered"+productions[0]+".list"):
    os.system("alien_find /alice/sim/2019/LHC19f5b_3/296433/SpacePointCalibrationMerge Filter*root  | sed s_/alice_alien:///alice_> filteredLHC19f5b_3.list")
    os.system("alien_find /alice/sim/2019/LHC19f5b_2/296433/SpacePointCalibrationMerge Filter*root  | sed s_/alice_alien:///alice_> filteredLHC19f5b_2.list")
    os.system("alien_find /alice/sim/2019/LHC19f5b/296433/SpacePointCalibrationMerge Filter*root  | sed s_/alice_alien:///alice_> filteredLHC19f5b.list")

### Create snapshots

In [None]:
for prod in productions:
    fname="filtered"+prod+".root"
    finput="filtered"+prod+".list"
    if not os.path.isfile(fname):    
        print("Processing production\t", prod, fname, finput)
        tree = ROOT.AliXRDPROOFtoolkit.MakeChainRandom(finput,"highPt","",nChunks,0)
        ROOT.makeRDFrameSnapshot0(tree,fname,5)

In [None]:
def makeFitsCR(df):
    varList=["NCr0","NCr15", "NCr30", "NCr45"]
    paramListP=["LocalSector0","LocalSector15","LocalSector30","LocalSector45"]
    paramList0=["qPt"]
    paramList0=[]
    fitters=[]
    for i, varFit in enumerate(varList):
        print("Procesing variable", varFit)
        paramList=paramList0 +[paramListP[i]]
        print("Procesing variable", varFit, ":", paramList)
        dfFit=df.query("abs(qP)<0.5").query("abs("+ paramListP[i]+")>0.14")
        dfFit=dfFit.append(df.query("abs(qP)<0.5").sample(dfFit.shape[0]//4))
        npoints=dfFit.shape[0]
        dataContainer = DataContainer(dfFit,  paramList, varFit, [npoints//2,npoints//2]) 
        fitter = Fitter(dataContainer)
        fitter.Register_Method('RF10','RandomForest', 'Regressor', n_estimators=100, max_depth=10,n_jobs=8)
        fitter.Register_Method('RF15','RandomForest', 'Regressor', n_estimators=100, max_depth=15,n_jobs=8)
        fitter.Fit()
        fitter.AppendStatPandas("RF10",df,varFit)
        fitter.AppendStatPandas("RF15",df,varFit)
        fitters.insert(i,fitter)
    return fitters    

###  Make and register regression

In [None]:
dfAll=[]
for i, prod in enumerate(productions):
    fname="filtered"+prod+".root"
    print("Loading tree",fname)
    f = ROOT.TFile.Open(fname)
    tree= f.Get("tree")
    df=tree2Panda(tree,[".*NCr.*",".*qP.*",".*Local.*",".*tgl.*"],"abs(qP)<0.25")
    dfAll.append(df);
    fitters=makeFitsCR(df)
    dataFrames.insert(i,df)
    fittersCr.insert(i,fitters)
    

In [None]:
dfAll[0].shape

# Draw regression result for data of interest 

In [None]:
iProd=2
df=dataFrames[iProd]
tooltips=[("qP","@qP"), ("pz/pt","@tgl"), ("cr0_25","@cr025")]
figureArray= [
    #[['LocalSector0'], ['NCr0'], {"size": 4, "colorZvar":"qPt","errY":"cr0_25RF15RMS"}],
    [['LocalSector0'], ['NCr0RF15Mean'], {"size": 4, "colorZvar":"qPt"}],
    #[['LocalSector15'], ['NCr15'], {"size": 4, "colorZvar":"qPt"}],
    [['LocalSector15'], ['NCr15RF15Mean'], {"size": 4, "colorZvar":"qPt"}],
    #[['LocalSector30'], ['NCr30'], {"size": 4, "colorZvar":"qPt"}],
    [['LocalSector30'], ['NCr30RF15Mean'], {"size": 4, "colorZvar":"qPt"}],
    ['table']
]
figureLayout=[
    [0, {'commonX':1,'y_visible':2}],
    [1, {'commonX':1,'y_visible':2}],
    [2, {'commonX':1,'y_visible':2}],
    [3, {'plot_height':50}],
    {'plot_height':100,'commonX':1, 'sizing_mode':'scale_width'}
]
widgetArray=[
    ['range', ['qPt']],
    ['range', ['tgl']],
    ['range', ['LocalSector0']],
    ['range', ['LocalSector15']],
    ['range', ['LocalSector30']],
]
widgetLayout=[ [0,1], [2,3,4], {'sizing_mode':'scale_width'} ]

output_file("makeEdgePlot"+productions[iProd]+".html")
bokehDrawSA.fromArray(df.head(2000), "abs(qPt)<1", figureArray, widgetArray, layout=figureLayout, tooltips=tooltips,widgetLayout=widgetLayout,sizing_mode="scale_width")

# Backup

In [None]:
df =dataFrames[0]

In [None]:
np.rndm

In [None]:
dfFit=df.query("abs(pos0_25)>0.12")
dfFit=dfFit.append(df.sample(dfFit.shape[0]//2))
dfFit.shape[0]

In [None]:
dfSample.hist("pos0_25",bins=50)

In [None]:
dfFit.shape[0]