# CMS jupyter e-lab test bench 

## Import libraries

In [None]:
import ipywidgets as widgets
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

## Load data from [CERN Open Data](http://opendata.cern.ch)

In [None]:
dimuon_urls = [
    'http://opendata.cern.ch/record/700/files/MuRun2010B_0.csv',
    'http://opendata.cern.ch/record/700/files/MuRun2010B_0.csv',
]
    
higgs_urls = [
    'http://opendata.cern.ch/record/5200/files/2e2mu_2011.csv',
    'http://opendata.cern.ch/record/5200/files/2e2mu_2012.csv',
    'http://opendata.cern.ch/record/5200/files/4e_2011.csv',
    'http://opendata.cern.ch/record/5200/files/4e_2012.csv',
    'http://opendata.cern.ch/record/5200/files/4mu_2011.csv',
    'http://opendata.cern.ch/record/5200/files/4mu_2012.csv',
]

Zmumu_urls = [
    'http://opendata.cern.ch/record/307/files/Zmumu.csv',
]

dataset_names = [
    ('Z to mu mu events', Zmumu_urls),
    ('Di-muon events in the 2-110 GeV invariant mass range', dimuon_urls),
    ('Higgs to 4 lepton events', higgs_urls),             
]

In [None]:
def get_dataset(dataset=''):
    
    if dataset:
        df = pd.concat([pd.read_csv(url) for url in dataset])
        return df
    
data = widgets.interactive(get_dataset, dataset=dataset_names)
print('Select a dataset:')
display(data)

In [None]:
df = data.result
df.head()

In [None]:
df.columns

## Analyze

### 1. Histograms

You can use the following function to make a histogram

In [None]:
def show_histogram(
    column,
    bar=True,
    grid=False,
    nbins=100, 
    xrange=None,
    xlabel=None,
    log=False,
):
    
    fig = plt.figure(figsize=(10,4))
    ax = fig.add_subplot(1,1,1)
    
    data = column.to_list()
    
    if xrange is None:
        xrange=(min(data), max(data))
  
    if grid:
        plt.grid(True)

    if bar:
        
        plt.hist(
            data,
            bins=nbins,
            range=xrange,
            log=log, 
            edgecolor='b',
            color='w'
        )
    
    else:
        
        hist, bins = np.histogram(data, bins=nbins, range=xrange)
        
        width = 1.0*(bins[1] - bins[0])
        center = (bins[:-1] + bins[1:]) / 2
        
        xerrs = [width*0.5 for i in range(0, nbins)]
        yerrs = np.sqrt(hist)

        plt.errorbar(center, 
                     hist, 
                     xerr=xerrs, 
                     yerr=yerrs, 
                     linestyle='None', 
                     color='black', 
                     marker='o')

    if xlabel is None:
        plt.xlabel(column.name)
    else:
        plt.xlabel(xlabel)
  
    plt.show()

Like so, where we look at the invariant mass M (which has units of $\mathrm{GeV/c^{2}}$):

In [None]:
show_histogram(df['M'])

Now let's adjust the number of bins interactively:

In [None]:
@widgets.interact(
    nbins=widgets.IntSlider(min=1, max=100, step=1, value=100),
)
def adjust_histogram(nbins=100):
    show_histogram(df['M'], xrange=(50,150), nbins=nbins)

### 2. Selections

Let's select events in which the transverse mass $p_{t}$ is within a selected range:

In [None]:
pt_select = df[(df.pt1 > 30) & (df.pt2 > 30)]

and see how it changes the invariant mass distribution:

In [None]:
show_histogram(pt_select['M'])