**in order to have ROOT available, run:**
1. inside alienv: \$> alisoft
2. with rootInteractive conda env: \$> conda activate rootInteractive
3. with py2: \$> python2 -m IPython notebook &

In [None]:
import ROOT
import uproot 
import root_numpy
import root_pandas
import pandas as pd
import numpy as np

In [None]:
fname='../analysis/data/trending_merged_LHC18r_withStatusTree.root'

## uproot

In [None]:
f = uproot.open(fname)
print f.keys()
tpcQA = f.get('tpcQA')

In [None]:
#dir(tpcQA[name].array()[0])
# tpcQA.allkeys()
# dir(tpcQA)
tpcQA['chunkRMS'].array()

In [None]:
def graph2df(graph_arr, name):
    npoints = graph_arr[0].npoints
    if name.endswith('.'): name = name.replace('.', '')
    col_names = [name+'_X_'+str(i) for i in range(npoints)] + [name+'_Y_'+str(i) for i in range(npoints)]
    data = [g.xvalues.tolist() + g.yvalues.tolist() for g in graph_arr]
    df = pd.DataFrame(data, columns=col_names)
    return df

name = 'grNclSectorPosA.'
graph2df(tpcQA[name].array(), name);

### simple leaves

In [None]:
%%time 

df_numeric = tpcQA.pandas.df(branches=[c for c in tpcQA.allkeys() if '.' not in c or 'fString' in c])

### graphs

In [None]:
%%time

graph_dfs = []
for c in [c for c in tpcQA.keys() if '.' in c]:
    if tpcQA.get(c).keys(): 
        print c
        continue
    if 'Graph' not in str(tpcQA.get(c).array()[0]): continue
    print c, 
    next_df = graph2df(tpcQA[c].array(), c)
    print tpcQA[c].array()[0].npoints
    graph_dfs.append(next_df)

In [None]:
%%time
df_graphs = pd.concat(graph_dfs, axis=1)

In [None]:
print([c for c in df_graphs.columns if 'period' in c])

In [None]:
df_graphs.columns

### aliases

friend "Tstatus" has to be removed, otherwise some flags from statusTree are assigned per run instead of per chunk - it happens due to presence of some flags in the friend tree which is checked first

In [None]:
%%time

froot = ROOT.TFile.Open(fname)
tpcQA_root = froot.Get('tpcQA')
tpcQA_root.RemoveFriend(tpcQA_root.GetFriend('Tstatus'))

aliases_cols = []
aliases_names = []
for c in tpcQA.aliases.keys():
    if c.endswith('Min') or c.endswith('Max') or c.endswith('Mean'): continue
    next_col = root_numpy.tree2array(tpcQA_root, c)    
    print c
    if isinstance(next_col[0], np.ndarray): next_col = [el[0] for el in next_col]
    aliases_cols.append(next_col)
    aliases_names.append('alias_'+c)
    
df_aliases = pd.DataFrame(np.array(aliases_cols).T, columns=aliases_names)

In [None]:
df_aliases.describe()

### merge all

In [None]:
%%time
df_merged = pd.concat([df_numeric, df_aliases, df_graphs], axis=1)
# df_merged.describe()

In [None]:
df_merged[['period.fString', 'run']]

### add interactionRate

In [None]:
fname_evs = fname.replace('trending', 'trending_EVS').replace('_withStatusTree.root', '.root')
trending_evs = uproot.open(fname_evs).get('trending')
df_irate = trending_evs.pandas.df(branches=['interactionRate'])
df_irate.index = trending_evs.pandas.df(branches=['run'])['run']
df_irate = df_irate[~df_irate.index.duplicated(keep='first')]

In [None]:
df_irate.head()

In [None]:
def row2irate(row):
    try:
        return df_irate.loc[row['run']]
    except KeyError:
        print('interactionRate not found for run:', row["run"])
        return -1

df_merged['interactionRate'] = df_merged.apply(row2irate, axis=1)
# df_merged['run']
# df_irate.loc[df_merged.loc[1]['run']]

In [None]:
%%time
print df_merged.shape

periods = df_merged['period.fString'].value_counts().index.tolist()
counts = df_merged['period.fString'].value_counts().values.tolist()
if len(periods) > 1:
    print '\nERROR - there is more than one period within data!!!\n\tthey are: {}\n'.format(zip(periods, counts))
else:
    df_merged.to_csv(fname.replace('_withStatusTree.root', '_withGraphs.csv'))

## Dirty

In [None]:
for xrun in list(set(df_merged['run'].tolist())):
#     print(f'\n *** {xrun} ***')
    aver = np.mean(df_merged.query('run == @xrun')['alias_global_Warning'])
    if aver > 0 and aver < 1: suffix = '\t<---------'
    else: suffix = ''
    print '\n {} -- {} {}'.format(xrun, aver, suffix)

## root_numpy

In [None]:
rn_tpcQA = root_numpy.root2array(fname, 'tpcQA', )

In [None]:
import ROOT
import root_numpy


In [None]:
tpcQA_root.GetListOfAliases()

## root_pandas

In [None]:
root_pandas.read_root(fname, key='tpcQA', columns=['chunkID', 'chunkMedian'], flatten='grNclSectorPosA.')

In [None]:
root_pandas.read_root?

## RootInteractive

## pyROOT

In [None]:
pyr_f = ROOT.TFile.Open(fname)
pyr_tpcQA = pyr_f.Get('tpcQA')
pyr_tpcQA.Draw('chunkMedian')