# Compare histograms of variables in synchronization n-tuples

This script takes given branch names such as `pt_1` or `m_vis` and plots the histograms for each variable combined in one canvas. As well, a ratio is computed respective to a given reference team.

**Contributing:** Before committing your code changes, please run `Cell/All Output/Clear` or `Kernel/Restart & Clear Output`. Otherwise you'll commit the print statements, which cause unnecessary changes to the notebook.

## Setup

**NOTE:** You have to edit only this section to run the synchronization!

For each team, you need to place a `<TEAM>.yaml.txt` file in the `teams/` folder. This file points for the desired model and channel to the correct synchronization ntuple.

In [None]:
# Select the teams and the team leader
teams = ['KIT', 'CERN', 'DESY']
referenceTeam = 'KIT'

# Select the model
model = 'sm'

# Select the channel for the selected model
channel = 'mt'

# Define the variables for the comparisons
variables = {
    'pt_1'  : [20, 0, 800],
    'pt_2'  : [20, 0, 800],
    'eta_1' : [20, -3, 3],
    'eta_2' : [20, -3, 3]} # Set up branch names and histogram parameters [bins, min, max]

# Plot setting
# NOTE: The colors are assigned matching the order in the `teams` list
colorMap = [2, 3, 4, 5, 6]
lineStyleMap = [1, 2, 3, 4, 5]

# Toggle verbosity of notebook
# NOTE: If there are a lot of variables defined, the notebook can create a lot of output
verbose = True

## Histogram variables and compute ratios

### Import modules

In [None]:
import yaml
import numpy as np
from warnings import warn
import os
from sys import stdout
import ROOT

# Enable Javascript in this notebook
%jsroot on

### Get configs from files

In [None]:
# Get configs
# NOTE: It is assumed that the files are named `<TEAM>.yaml.txt`
configs = {}
for team in teams:
    filepath = 'teams/{}.yaml.txt'.format(team) # FIXME: Set an absolute path here!
    if not os.path.isfile(filepath):
        warn('File not found for team {}: {}'.format(team, filepath))
    file_ = open(filepath)
    configs[team] = yaml.load(file_)

### Load trees

In [None]:
# Load files and trees from config files
files = {}
trees = {}
entries = {}

for team in teams:
    # Check validity of config file
    if not model in configs[team]:
        warn('Model `{}` is not found in config of team {}'.format(model, team))
    if not channel in configs[team][model]:
        warn('Channel `{}` is not found for model `{}` in config of team {}'.format(channel, model, team))
    if not 'file' in configs[team][model][channel]:
        warn('Key `file` is not found for channel `{}` and model `{}` in config of team {}'.format(channel, model, team))
    fileName = configs[team][model][channel]['file']
    if not 'tree' in configs[team][model][channel]:
        warn('Key `tree` is not found for channel `{}` and model `{}` in config of team {}'.format(channel, model, team))
    treeName = configs[team][model][channel]['tree']
    
    # Load ROOT file and tree
    files[team] = ROOT.TFile(fileName)
    if files[team] == None:
        warn('Can not open ROOT file with path `{}` for team {}'.format(fileName, team))
    trees[team] = files[team].Get(treeName)
    if trees[team] == None:
        warn('Can not open tree `{}` from ROOT file with path `{}` for team {}'.format(treeName, fileName, team))

### Read data and set up canvases

In [None]:
# Define global plot settings
ROOT.gStyle.SetOptStat(False)
ROOT.gStyle.SetTitleOffset(2.5, 'Y')
fontSize = 14

# Fill plots with overlayed histograms and ratios
plots = {}

for variable in variables:
    # Print progress
    if verbose > 0:
        stdout.write('Processing variable: {}\n'.format(variable))
        stdout.flush() # This forces to print an output immediately
    
    plots[variable] = {}
    
    # Set up canvas
    c = ROOT.TCanvas(variable, variable, 800, 600)
    plots[variable]['canvas'] = c
    
    # Set up upper pad
    padUpper = ROOT.TPad('padUpper', 'padUpper', 0.0, 0.3, 1.0, 1.0)
    plots[variable]['padUpper'] = padUpper
    padUpper.Draw()
    padUpper.cd()
    
    # Go through teams and put histograms in upper pad
    for iTeam, team in enumerate(teams):
        if verbose > 0:
            stdout.write('  Histogram: {}\n'.format(team))
            stdout.flush()
        
        histName = 'h_{}_{}'.format(team, variable)
        hist = ROOT.TH1F(histName, histName,
                         variables[variable][0], variables[variable][1], variables[variable][2])
        plots[variable][histName] = hist
        hist.SetTitle(variable)
        hist.GetXaxis().SetTitle(variable)
        hist.GetXaxis().SetRangeUser(variables[variable][1], variables[variable][2])
        hist.GetYaxis().SetTitle('Entries')
        hist.GetXaxis().SetLabelSize(fontSize)
        hist.GetYaxis().SetLabelSize(fontSize)
        hist.GetXaxis().SetTitleSize(fontSize)
        hist.GetYaxis().SetTitleSize(fontSize)
        hist.SetLineColor(colorMap[iTeam])
        hist.SetLineStyle(lineStyleMap[iTeam])
        for iEvent in range(trees[team].GetEntries()):
            trees[team].GetEntry(iEvent)
            hist.Fill(getattr(trees[team], variable))
        if iTeam == 0:
            hist.Draw()
        else:
            hist.Draw('SAME')
    
    # Set up lower pad
    c.cd()
    padLower = ROOT.TPad('padLower', 'padLower', 0.0, 0.0, 1.0, 0.3)
    padLower.Draw()
    plots[variable]['padLower'] = padLower
    padLower.cd()
    
    # Put ratios in lower pad
    
    # Calculate ratios
    numBins = variables[variable][0]
    ratios = np.zeros((numBins, len(teams)), dtype=np.float)
    maxRatio = 0 # Min and max ratios are used to scale the axis properly
    minRatio = 1e3
    
    if referenceTeam not in teams:
        warn('Reference team {} not in list of teams {}'.format(referenceTeam, teams))
    
    for iBin in range(numBins):
        yReference = plots[variable]['h_{}_{}'.format(referenceTeam, variable)].GetBinContent(iBin+1)
        for iTeam, team in enumerate(teams):
            if yReference != 0: # If denominator is zero, put zero as ratio!
                yCompare = plots[variable]['h_{}_{}'.format(team, variable)].GetBinContent(iBin+1) # Use +1 to skip underflow bin
                ratios[iBin, iTeam] = yCompare/yReference
                if ratios[iBin, iTeam] > maxRatio:
                    maxRatio = ratios[iBin, iTeam]
                if ratios[iBin, iTeam] < minRatio:
                    minRatio = ratios[iBin, iTeam]
                
    # Set up ratio plots
    for iTeam, team in enumerate(teams):
        if verbose > 0:
            stdout.write('  Ratio: {}\n'.format(team))
            stdout.flush()
        
        ratioName = 'r_{}_{}'.format(team, variable)
        r = ROOT.TH1F(ratioName, ratioName,
                      variables[variable][0], variables[variable][1], variables[variable][2])
        plots[variable][ratioName] = r
        for iBin in range(numBins):
            r.AddBinContent(iBin+1, ratios[iBin, iTeam])
        r.SetTitle('')
        r.GetYaxis().SetTitle('Ratio')
        r.GetXaxis().SetRangeUser(variables[variable][1], variables[variable][2])
        r.GetYaxis().SetRangeUser(minRatio - (maxRatio-minRatio)*0.1, maxRatio + (maxRatio-minRatio)*0.1) # Give additional range on y axis
        r.GetXaxis().SetLabelSize(fontSize)
        r.GetYaxis().SetLabelSize(fontSize)
        r.GetXaxis().SetTitleSize(fontSize)
        r.GetYaxis().SetTitleSize(fontSize)
        r.SetMarkerStyle(34)
        r.SetMarkerColor(colorMap[iTeam])
        r.SetLineColor(colorMap[iTeam]) # Needed to set color of interactive legend boxes
        r.SetMarkerSize(1)
        if iTeam == 0:
            r.Draw('P')
        else:
            r.Draw('P SAME')

## Draw histograms

In [None]:
for variable in variables:
    plots[variable]['canvas'].Draw()