In [1]:
import numpy as np
import pandas as pd
import math as m
import sys
import scipy.stats as stats
from scipy.spatial import distance
from matplotlib import pyplot as plt 
from matplotlib.lines import Line2D
import matplotlib.gridspec as gridspec
import random as rn

In [2]:
plt.style.use('../computermodernstyle.mplstyle')

In [3]:
def make_histogram(df, bins):
    n_data = df.shape
    bin_counts, bin_edges = np.histogram(df, bins=bins)
    bin_errors = bin_counts**0.5
    bin_centers = (bin_edges[:-1] + bin_edges[1:])/2
    bin_widths = np.diff(bin_edges)

    # Normalization
    bin_counts_norm = bin_counts/n_data/bin_widths
    bin_errors_norm = bin_errors/n_data/bin_widths

    return bin_counts_norm, bin_errors_norm, bin_edges

In [4]:
def make_histogram_weighted(obs, weights, bins):
    n_data = obs.shape
    bin_counts, bin_edges = np.histogram(obs, bins=bins, weights=weights)
    bin_errors = bin_counts**0.5
    bin_centers = (bin_edges[:-1] + bin_edges[1:])/2
    bin_widths = np.diff(bin_edges)
    
    #return bin_counts, bin_errors, bin_edges

    # Normalization
    bin_counts_norm = bin_counts/n_data/bin_widths
    bin_errors_norm = bin_errors/n_data/bin_widths

    return bin_counts_norm, bin_errors_norm, bin_edges

In [None]:
def make_histogram_with_ratio(df, df_baseline, bins):
    bin_counts, bin_errors, bin_edges = make_histogram(df, bins)
    bin_counts_baseline, _, _ = make_histogram(df_baseline, bins)

    bin_counts_ratio = bin_counts/bin_counts_baseline
    bin_errors_ratio = bin_errors/bin_counts_baseline

    return bin_counts, bin_errors, bin_counts_ratio, bin_errors_ratio, bin_edges

In [None]:
def make_histogram_with_ratio_weighted(obs, weights, obs_baseline, weights_baseline, bins):
    bin_counts, bin_errors, bin_edges = make_histogram_weighted(obs, weights, bins)
    bin_counts_baseline, _, _ = make_histogram_weighted(obs_baseline, weights_baseline, bins)

    bin_counts_ratio = bin_counts/bin_counts_baseline
    bin_errors_ratio = bin_errors/bin_counts_baseline
    return bin_counts, bin_errors, bin_counts_ratio, bin_errors_ratio, bin_edges

In [None]:
# Use the first file as baseline for ratio plot
file_name_gen_samples = '../../pp_to_ttbar/data/generated_samples.csv'
file_name_mc_samples = '../../pp_to_ttbar/data/negative_weight_samples.csv'
# Assume there are 9 weight files
file_name_mc_weights = '../../pp_to_ttbar/data/negative_weight_weights.csv'
edge_colours = ["#e41a1c", "#377eb8", "#4daf4a", "#984ea3", "#ff7f00", "#ffff33", "#a65628", "#f781bf"]

In [None]:
df_mc_samples = pd.read_csv(file_name_mc_samples, delimiter = ',', header=None, index_col=False).to_numpy()[:,:8]

In [None]:
df_gen_samples = pd.read_csv(file_name_gen_samples, delimiter = ',', header=None, index_col=False).to_numpy()[:,:8]

In [None]:
weights = pd.read_csv(file_name_mc_weights, delimiter = ',', header=None, index_col=False).to_numpy()[:,0]

In [None]:
scaling_factor = np.average(weights)

In [None]:
def construct_momentum(x0, x1, x2, x3, E_min, E_max, m_on_shell):
    m = x0*m_on_shell*2
    pT = np.exp(x1*np.log(E_max/E_min))*E_min
    eta = -np.log(np.tan(x2*np.pi/2))
    phi = 2*np.pi*(x3 + 0.5)
    
    return np.sqrt(pT**2 * np.cosh(eta)**2 + m**2), pT*np.cos(phi), pT*np.sin(phi), pT*np.sinh(eta)

In [None]:
def make_physics_from_abstract_data(df_mc_samples, E_min = 0.1, E_max = 6500, m_t = 173):
    moms = np.empty([df_mc_samples.shape[0], 8])

    moms[:,0], moms[:,1], moms[:,2], moms[:,3] = construct_momentum(df_mc_samples[:,0], df_mc_samples[:,1], df_mc_samples[:,2], df_mc_samples[:,3], E_min=E_min, E_max=E_max, m_on_shell=m_t)
    moms[:,4], moms[:,5], moms[:,6], moms[:,7] = construct_momentum(df_mc_samples[:,4], df_mc_samples[:,5], df_mc_samples[:,6], df_mc_samples[:,7], E_min=E_min, E_max=E_max, m_on_shell=m_t)

    return moms

In [None]:
moms_mc = make_physics_from_abstract_data(df_mc_samples)
moms_gen = make_physics_from_abstract_data(df_gen_samples)

## Define plots

In [None]:
obs_mc = []
obs_gen = []
x_min = []
x_max = []
y_max = []
x_label = []
y_label = []
filename = []

**Top mass**

In [None]:
for obs, moms, in ((obs_mc, moms_mc), (obs_gen, moms_gen)):
    obs.append( np.sqrt(moms[:,0]**2 - moms[:,1]**2 - moms[:,2]**2 - moms[:,3]**2) )
x_min.append(167)
x_max.append(180)
y_max.append(1.7)
x_label.append(r'$\mathrm{m_t} [\mathrm{GeV}]$')
y_label.append(r'$\mathrm{\frac{d\sigma}{dm_t}} [\mathrm{pb/GeV}]$')
filename.append("mt.pdf")

**top pt**

In [None]:
for obs, moms, in ((obs_mc, moms_mc), (obs_gen, moms_gen)):
    obs.append( np.sqrt( moms[:,1]**2 + moms[:,2]**2))
x_min.append(0)
x_max.append(480)
y_max.append(9e-2)
x_label.append(r'$p_{T,t} [\mathrm{GeV}]$')
y_label.append(r'$\frac{\mathrm{d}\sigma}{\mathrm{d}p_{T,t}} [\mathrm{pb/GeV}] $')
filename.append("tpt.pdf")

**mtt**

In [None]:
for obs, moms, in ((obs_mc, moms_mc), (obs_gen, moms_gen)):
    obs.append( np.sqrt(
        (moms[:,0] + moms[:,4])**2 - 
        (moms[:,1] + moms[:,5])**2 -
        (moms[:,2] + moms[:,6])**2 - 
        (moms[:,3] + moms[:,7])**2
    ) )

x_min.append(250)
x_max.append(1000)
y_max.append(6e-2)
x_label.append(r'$m_{tt} [\mathrm{GeV}]$')
y_label.append(r'$\frac{\mathrm{d}\sigma}{\mathrm{d}m_{tt}} [\mathrm{pb/GeV}] $')
filename.append("mtt.pdf")

**pt tt**

In [None]:
for obs, moms, in ((obs_mc, moms_mc), (obs_gen, moms_gen)):
    obs.append( np.sqrt( (moms[:,1] + moms[:,5])**2 + (moms[:,2] + moms[:,6])**2 ))
x_min.append(0)
x_max.append(300)
y_max.append(1.2e-1)
x_label.append(r'$p_{T,tt} [\mathrm{GeV}]$')
y_label.append(r'$\frac{\mathrm{d}\sigma}{\mathrm{d}p_{T,tt}} [\mathrm{pb/GeV}] $')
filename.append("pttt.pdf")

**E tt**

In [None]:
for obs, moms, in ((obs_mc, moms_mc), (obs_gen, moms_gen)):
    obs.append( moms[:,0] + moms[:,4] )
x_min.append(346)
x_max.append(3000)
y_max.append(2e-2)
x_label.append(r'$E_{t\bar{t}} [\mathrm{GeV}]$')
y_label.append(r'$\frac{\mathrm{d}\sigma}{\mathrm{d}E_{t\bar{t}}} [\mathrm{pb/GeV}] $')
filename.append("Ett.pdf")

## Make plots

In [None]:
n_bins = 25

In [None]:
for i in range(len(obs_mc)):
    fig = plt.figure()
    fig.set_size_inches(5,4)
    gs = gridspec.GridSpec(nrows=3,ncols=1)
    gs.update(left=0,right=1,top=1,bottom=0,hspace=0, wspace=0)

    main_plot = fig.add_subplot(gs[:-1,0])
    ratio_plot = fig.add_subplot(gs[2,0])

    main_plot.set_xlim(x_min[i], x_max[i])
    main_plot.set_ylim(0.0001, y_max[i])
    main_plot.set_xticks([])
    ratio_plot.set_xlim(x_min[i], x_max[i])
    ratio_plot.set_ylim(0.8,1.2)
    bins = np.linspace(x_min[i], x_max[i], n_bins+1)
    bin_centers = (bins[:-1] + bins[1:])/2

    counts_train, errors_train, edges = make_histogram_weighted(obs_mc[i], weights, bins)
    print(counts_train)
    counts_abs, errors_abs, _ = make_histogram_weighted(obs_mc[i], scaling_factor*np.ones(len(obs_mc[i])), bins)
    print(counts_abs)
    counts_gen, errors_gen, _ = make_histogram_weighted(obs_gen[i], scaling_factor*np.ones(len(obs_gen[i])), bins)
    print(counts_gen)
    
    # Main plot
    main_plot.hist(edges[:-1], edges, weights=counts_train, edgecolor = edge_colours[0], histtype="step", label="Truth")
    main_plot.hist(edges[:-1], edges, weights=counts_abs, edgecolor = edge_colours[1], histtype="step", label="Abs")
    main_plot.hist(edges[:-1], edges, weights=counts_gen, edgecolor = edge_colours[2], histtype="step", label="Generated")

    # Ratio plot
    counts_train_fixed = counts_train
    counts_train_fixed[counts_train_fixed == 0] = 1

    counts_abs_fixed = counts_abs
    counts_abs_fixed[counts_abs_fixed == 0] = 1
    
    counts_gen_fixed = counts_gen
    counts_gen_fixed[counts_gen_fixed == 0] = 1

    ratio_abs = counts_abs_fixed/counts_train_fixed
    ratio_gen = counts_gen_fixed/counts_train_fixed
    
    ratio_plot.hist(edges[:-1], edges, weights=np.ones(len(counts_train)), edgecolor=edge_colours[0], histtype="step")
    ratio_plot.hist(edges[:-1], edges, weights=ratio_abs, edgecolor=edge_colours[1], histtype="step")
    ratio_plot.hist(edges[:-1], edges, weights=ratio_gen, edgecolor=edge_colours[2], histtype="step")
    
    handles, labels = main_plot.get_legend_handles_labels()
    new_handles = [Line2D([], [], c=h.get_edgecolor()) for h in handles]
    main_plot.legend(loc=1, handles=new_handles, labels=labels) 

    ratio_plot.set_xlabel(x_label[i])
    main_plot.set_ylabel(y_label[i])
    plt.savefig('{}.pdf'.format(filename[i]), format='pdf', dpi=1000, bbox_inches = "tight")