In [1]:
madminer_src_path = '/home/shomiller/madminer'
import sys
import os
sys.path.append(madminer_src_path)

from __future__ import absolute_import, division, print_function, unicode_literals
import logging
import numpy as np
import matplotlib
from matplotlib import pyplot as plt
%matplotlib inline

from madminer.sampling import combine_and_shuffle

from madminer.analysis import DataAnalyzer
from madminer.utils.interfaces.madminer_hdf5 import save_madminer_settings, save_preformatted_events_to_madminer_file

import madminer.__version__
print( 'MadMiner version: {}'.format(madminer.__version__) )

MadMiner version: 0.4.9


In [2]:
# MadMiner output
logging.basicConfig(
    format='%(asctime)-5.5s %(name)-20.20s %(levelname)-7.7s %(message)s',
    datefmt='%H:%M',
    level=logging.INFO
)

# Output of all other modules (e.g. matplotlib)
for key in logging.Logger.manager.loggerDict:
    if "madminer" not in key:
        logging.getLogger(key).setLevel(logging.WARNING)

Here we define a function that modifies the weights manually:
* `remove_systematics` on the other hand, opens just one `data_file`, and loops through, setting all the weights for the different nuisance parameters to the SM value (which should be the same as all the other benchmarks as well). This will let us train on a sample where the nuisance parameters only affect the signal.

In [3]:
def remove_systematics( data_file, output_file, n_true_benchmarks=15,):
    
    data = DataAnalyzer(data_file)
    
    x, weights, sampling_ids = data.event_loader(batch_size=None, return_sampling_ids=True).next()
    
    new_weights = np.zeros((weights.shape[0], data.n_benchmarks), dtype=weights.dtype)
    
    new_weights[:,:n_true_benchmarks] = weights[:,:n_true_benchmarks]
    
    for i in range(n_true_benchmarks, data.n_benchmarks):
        new_weights[:, i] = weights[:, 0]
        
    save_preformatted_events_to_madminer_file(
        filename=output_file,
        observations=x,
        weights=new_weights,
        sampling_benchmarks=sampling_ids,
        copy_setup_from=data_file,
    )
    
    

# Remove Systematics

Here we simply flatten the systematics in all the background events, so that we can train `SALLY` on a sample with systematics only affecting the signal.

## $W^+(\mu)$ (MET)

In [4]:
remove_systematics(
    data_file='data/met/backgrounds/wpbb_mu_background_lhedata_met.h5',
    output_file='data/met/backgrounds/wpbb_mu_background_nosyst_lhedata_met.h5',
)

20:51 madminer.analysis    INFO    Loading data from data/wpbb_mu_background_lhedata_met.h5
20:51 madminer.analysis    INFO    Found 4 parameters
20:51 madminer.analysis    INFO    Found 33 nuisance parameters
20:51 madminer.analysis    INFO    Found 54 benchmarks, of which 15 physical
20:51 madminer.analysis    INFO    Found 48 observables
20:51 madminer.analysis    INFO    Found 2000000 events
20:51 madminer.analysis    INFO      2000000 background events
20:51 madminer.analysis    INFO    Found morphing setup with 15 components
20:51 madminer.analysis    INFO    Found nuisance morphing setup


In [5]:
remove_systematics(
    data_file='data/met/backgrounds/tpb_mu_background_lhedata_met.h5',
    output_file='data/met/backgrounds/tpb_mu_background_nosyst_lhedata_met.h5',
)

20:51 madminer.analysis    INFO    Loading data from data/tpb_mu_background_lhedata_met.h5
20:51 madminer.analysis    INFO    Found 4 parameters
20:51 madminer.analysis    INFO    Found 33 nuisance parameters
20:51 madminer.analysis    INFO    Found 54 benchmarks, of which 15 physical
20:51 madminer.analysis    INFO    Found 48 observables
20:51 madminer.analysis    INFO    Found 1000000 events
20:51 madminer.analysis    INFO      1000000 background events
20:51 madminer.analysis    INFO    Found morphing setup with 15 components
20:51 madminer.analysis    INFO    Found nuisance morphing setup


In [6]:
remove_systematics(
    data_file='data/met/backgrounds/tt_mupjj_background_lhedata_met.h5',
    output_file='data/met/backgrounds/tt_mupjj_background_nosyst_lhedata_met.h5'
)

20:51 madminer.analysis    INFO    Loading data from data/tt_mupjj_background_lhedata_met.h5
20:51 madminer.analysis    INFO    Found 4 parameters
20:51 madminer.analysis    INFO    Found 33 nuisance parameters
20:51 madminer.analysis    INFO    Found 54 benchmarks, of which 15 physical
20:51 madminer.analysis    INFO    Found 48 observables
20:51 madminer.analysis    INFO    Found 1000000 events
20:51 madminer.analysis    INFO      1000000 background events
20:51 madminer.analysis    INFO    Found morphing setup with 15 components
20:51 madminer.analysis    INFO    Found nuisance morphing setup


## $W^+(e)$ (MET)

In [12]:
remove_systematics(
    data_file='data/met/backgrounds/wpbb_e_background_lhedata_met.h5',
    output_file='data/met/backgrounds/wpbb_e_background_nosyst_lhedata_met.h5',
)

08:02 madminer.analysis    INFO    Loading data from data/met/backgrounds/wpbb_e_background_lhedata_met.h5
08:02 madminer.analysis    INFO    Found 4 parameters
08:02 madminer.analysis    INFO    Found 33 nuisance parameters
08:02 madminer.analysis    INFO    Found 54 benchmarks, of which 15 physical
08:02 madminer.analysis    INFO    Found 48 observables
08:02 madminer.analysis    INFO    Found 2000000 events
08:02 madminer.analysis    INFO      2000000 background events
08:02 madminer.analysis    INFO    Found morphing setup with 15 components
08:02 madminer.analysis    INFO    Found nuisance morphing setup


In [13]:
remove_systematics(
    data_file='data/met/backgrounds/tpb_e_background_lhedata_met.h5',
    output_file='data/met/backgrounds/tpb_e_background_nosyst_lhedata_met.h5',
)

08:02 madminer.analysis    INFO    Loading data from data/met/backgrounds/tpb_e_background_lhedata_met.h5
08:02 madminer.analysis    INFO    Found 4 parameters
08:02 madminer.analysis    INFO    Found 33 nuisance parameters
08:02 madminer.analysis    INFO    Found 54 benchmarks, of which 15 physical
08:02 madminer.analysis    INFO    Found 48 observables
08:02 madminer.analysis    INFO    Found 1000000 events
08:02 madminer.analysis    INFO      1000000 background events
08:02 madminer.analysis    INFO    Found morphing setup with 15 components
08:02 madminer.analysis    INFO    Found nuisance morphing setup


In [14]:
remove_systematics(
    data_file='data/met/backgrounds/tt_epjj_background_lhedata_met.h5',
    output_file='data/met/backgrounds/tt_epjj_background_nosyst_lhedata_met.h5'
)

08:02 madminer.analysis    INFO    Loading data from data/met/backgrounds/tt_epjj_background_lhedata_met.h5
08:02 madminer.analysis    INFO    Found 4 parameters
08:02 madminer.analysis    INFO    Found 33 nuisance parameters
08:02 madminer.analysis    INFO    Found 54 benchmarks, of which 15 physical
08:02 madminer.analysis    INFO    Found 48 observables
08:02 madminer.analysis    INFO    Found 1000000 events
08:02 madminer.analysis    INFO      1000000 background events
08:02 madminer.analysis    INFO    Found morphing setup with 15 components
08:02 madminer.analysis    INFO    Found nuisance morphing setup


## $W^-(\mu)$ (MET)

In [17]:
remove_systematics(
    data_file='data/met/backgrounds/wmbb_mu_background_lhedata_met.h5',
    output_file='data/met/backgrounds/wmbb_mu_background_nosyst_lhedata_met.h5',
)

08:03 madminer.analysis    INFO    Loading data from data/met/backgrounds/wmbb_mu_background_lhedata_met.h5
08:03 madminer.analysis    INFO    Found 4 parameters
08:03 madminer.analysis    INFO    Found 33 nuisance parameters
08:03 madminer.analysis    INFO    Found 54 benchmarks, of which 15 physical
08:03 madminer.analysis    INFO    Found 48 observables
08:03 madminer.analysis    INFO    Found 2000000 events
08:03 madminer.analysis    INFO      2000000 background events
08:03 madminer.analysis    INFO    Found morphing setup with 15 components
08:03 madminer.analysis    INFO    Found nuisance morphing setup


In [18]:
remove_systematics(
    data_file='data/met/backgrounds/tmb_mu_background_lhedata_met.h5',
    output_file='data/met/backgrounds/tmb_mu_background_nosyst_lhedata_met.h5',
)

08:04 madminer.analysis    INFO    Loading data from data/met/backgrounds/tmb_mu_background_lhedata_met.h5
08:04 madminer.analysis    INFO    Found 4 parameters
08:04 madminer.analysis    INFO    Found 33 nuisance parameters
08:04 madminer.analysis    INFO    Found 54 benchmarks, of which 15 physical
08:04 madminer.analysis    INFO    Found 48 observables
08:04 madminer.analysis    INFO    Found 1000000 events
08:04 madminer.analysis    INFO      1000000 background events
08:04 madminer.analysis    INFO    Found morphing setup with 15 components
08:04 madminer.analysis    INFO    Found nuisance morphing setup


In [19]:
remove_systematics(
    data_file='data/met/backgrounds/tt_mumjj_background_lhedata_met.h5',
    output_file='data/met/backgrounds/tt_mumjj_background_nosyst_lhedata_met.h5'
)

08:04 madminer.analysis    INFO    Loading data from data/met/backgrounds/tt_mumjj_background_lhedata_met.h5
08:04 madminer.analysis    INFO    Found 4 parameters
08:04 madminer.analysis    INFO    Found 33 nuisance parameters
08:04 madminer.analysis    INFO    Found 54 benchmarks, of which 15 physical
08:04 madminer.analysis    INFO    Found 48 observables
08:04 madminer.analysis    INFO    Found 1000000 events
08:04 madminer.analysis    INFO      1000000 background events
08:04 madminer.analysis    INFO    Found morphing setup with 15 components
08:04 madminer.analysis    INFO    Found nuisance morphing setup


## $W^-(e)$ (MET)

In [20]:
remove_systematics(
    data_file='data/met/backgrounds/wmbb_e_background_lhedata_met.h5',
    output_file='data/met/backgrounds/wmbb_e_background_nosyst_lhedata_met.h5',
)

08:04 madminer.analysis    INFO    Loading data from data/met/backgrounds/wmbb_e_background_lhedata_met.h5
08:04 madminer.analysis    INFO    Found 4 parameters
08:04 madminer.analysis    INFO    Found 33 nuisance parameters
08:04 madminer.analysis    INFO    Found 54 benchmarks, of which 15 physical
08:04 madminer.analysis    INFO    Found 48 observables
08:04 madminer.analysis    INFO    Found 2000000 events
08:04 madminer.analysis    INFO      2000000 background events
08:04 madminer.analysis    INFO    Found morphing setup with 15 components
08:04 madminer.analysis    INFO    Found nuisance morphing setup


In [21]:
remove_systematics(
    data_file='data/met/backgrounds/tmb_e_background_lhedata_met.h5',
    output_file='data/met/backgrounds/tmb_e_background_nosyst_lhedata_met.h5',
)

08:04 madminer.analysis    INFO    Loading data from data/met/backgrounds/tmb_e_background_lhedata_met.h5
08:04 madminer.analysis    INFO    Found 4 parameters
08:04 madminer.analysis    INFO    Found 33 nuisance parameters
08:04 madminer.analysis    INFO    Found 54 benchmarks, of which 15 physical
08:04 madminer.analysis    INFO    Found 48 observables
08:04 madminer.analysis    INFO    Found 1000000 events
08:04 madminer.analysis    INFO      1000000 background events
08:04 madminer.analysis    INFO    Found morphing setup with 15 components
08:04 madminer.analysis    INFO    Found nuisance morphing setup


In [22]:
remove_systematics(
    data_file='data/met/backgrounds/tt_emjj_background_lhedata_met.h5',
    output_file='data/met/backgrounds/tt_emjj_background_nosyst_lhedata_met.h5'
)

08:04 madminer.analysis    INFO    Loading data from data/met/backgrounds/tt_emjj_background_lhedata_met.h5
08:04 madminer.analysis    INFO    Found 4 parameters
08:04 madminer.analysis    INFO    Found 33 nuisance parameters
08:04 madminer.analysis    INFO    Found 54 benchmarks, of which 15 physical
08:04 madminer.analysis    INFO    Found 48 observables
08:04 madminer.analysis    INFO    Found 1000000 events
08:04 madminer.analysis    INFO      1000000 background events
08:04 madminer.analysis    INFO    Found morphing setup with 15 components
08:04 madminer.analysis    INFO    Found nuisance morphing setup


## Combine and Shuffle

In [10]:
lhedatafile_wph_mu_wbkgs_sigsystonly_met = 'data/met/wph_mu_wbkgs_sigsystonly_lhedata_met.h5'

combine_and_shuffle(
    [ 'data/met/signal/wph_mu_smeftsim_lhedata_met.h5', 
    'data/met/backgrounds/wpbb_mu_background_nosyst_lhedata_met.h5', 
    'data/met/backgrounds/tpb_mu_background_nosyst_lhedata_met.h5',
    'data/met/backgrounds/tt_mupjj_background_nosyst_lhedata_met.h5'],
    lhedatafile_wph_mu_wbkgs_sigsystonly_met
)

20:52 madminer.sampling    INFO    Copying setup from data/wph_mu_smeftsim_lhedata_met.h5 to data/wph_mu_wbkgs_sigsystonly_lhedata_met.h5
20:52 madminer.sampling    INFO    Loading samples from file 1 / 4 at data/wph_mu_smeftsim_lhedata_met.h5, multiplying weights with k factor 1.0
20:52 madminer.sampling    INFO    Loading samples from file 2 / 4 at data/wpbb_mu_background_nosyst_lhedata_met.h5, multiplying weights with k factor 1.0
20:52 madminer.sampling    INFO    Loading samples from file 3 / 4 at data/tpb_mu_background_nosyst_lhedata_met.h5, multiplying weights with k factor 1.0
20:52 madminer.sampling    INFO    Loading samples from file 4 / 4 at data/tt_mupjj_background_nosyst_lhedata_met.h5, multiplying weights with k factor 1.0


In [23]:
lhedatafile_wph_e_wbkgs_sigsystonly_met = 'data/met/wph_e_wbkgs_sigsystonly_lhedata_met.h5'

combine_and_shuffle(
    [ 'data/met/signal/wph_e_smeftsim_lhedata_met.h5', 
    'data/met/backgrounds/wpbb_e_background_nosyst_lhedata_met.h5', 
    'data/met/backgrounds/tpb_e_background_nosyst_lhedata_met.h5',
    'data/met/backgrounds/tt_epjj_background_nosyst_lhedata_met.h5'],
    lhedatafile_wph_e_wbkgs_sigsystonly_met
)

08:04 madminer.sampling    INFO    Copying setup from data/met/signal/wph_e_smeftsim_lhedata_met.h5 to data/met/wph_e_wbkgs_sigsystonly_lhedata_met.h5
08:04 madminer.sampling    INFO    Loading samples from file 1 / 4 at data/met/signal/wph_e_smeftsim_lhedata_met.h5, multiplying weights with k factor 1.0
08:04 madminer.sampling    INFO    Loading samples from file 2 / 4 at data/met/backgrounds/wpbb_e_background_nosyst_lhedata_met.h5, multiplying weights with k factor 1.0
08:05 madminer.sampling    INFO    Loading samples from file 3 / 4 at data/met/backgrounds/tpb_e_background_nosyst_lhedata_met.h5, multiplying weights with k factor 1.0
08:07 madminer.sampling    INFO    Loading samples from file 4 / 4 at data/met/backgrounds/tt_epjj_background_nosyst_lhedata_met.h5, multiplying weights with k factor 1.0


In [24]:
lhedatafile_wmh_mu_wbkgs_sigsystonly_met = 'data/met/wmh_mu_wbkgs_sigsystonly_lhedata_met.h5'

combine_and_shuffle(
    [ 'data/met/signal/wmh_mu_smeftsim_lhedata_met.h5', 
    'data/met/backgrounds/wmbb_mu_background_nosyst_lhedata_met.h5', 
    'data/met/backgrounds/tmb_mu_background_nosyst_lhedata_met.h5',
    'data/met/backgrounds/tt_mumjj_background_nosyst_lhedata_met.h5'],
    lhedatafile_wmh_mu_wbkgs_sigsystonly_met
)

08:09 madminer.sampling    INFO    Copying setup from data/met/signal/wmh_mu_smeftsim_lhedata_met.h5 to data/met/wmh_mu_wbkgs_sigsystonly_lhedata_met.h5
08:09 madminer.sampling    INFO    Loading samples from file 1 / 4 at data/met/signal/wmh_mu_smeftsim_lhedata_met.h5, multiplying weights with k factor 1.0
08:09 madminer.sampling    INFO    Loading samples from file 2 / 4 at data/met/backgrounds/wmbb_mu_background_nosyst_lhedata_met.h5, multiplying weights with k factor 1.0
08:11 madminer.sampling    INFO    Loading samples from file 3 / 4 at data/met/backgrounds/tmb_mu_background_nosyst_lhedata_met.h5, multiplying weights with k factor 1.0
08:12 madminer.sampling    INFO    Loading samples from file 4 / 4 at data/met/backgrounds/tt_mumjj_background_nosyst_lhedata_met.h5, multiplying weights with k factor 1.0


In [25]:
lhedatafile_wmh_e_wbkgs_sigsystonly_met = 'data/met/wmh_e_wbkgs_sigsystonly_lhedata_met.h5'

combine_and_shuffle(
    [ 'data/met/signal/wmh_e_smeftsim_lhedata_met.h5', 
    'data/met/backgrounds/wmbb_e_background_nosyst_lhedata_met.h5', 
    'data/met/backgrounds/tmb_e_background_nosyst_lhedata_met.h5',
    'data/met/backgrounds/tt_emjj_background_nosyst_lhedata_met.h5'],
    lhedatafile_wmh_e_wbkgs_sigsystonly_met
)

08:15 madminer.sampling    INFO    Copying setup from data/met/signal/wmh_e_smeftsim_lhedata_met.h5 to data/met/wmh_e_wbkgs_sigsystonly_lhedata_met.h5
08:15 madminer.sampling    INFO    Loading samples from file 1 / 4 at data/met/signal/wmh_e_smeftsim_lhedata_met.h5, multiplying weights with k factor 1.0
08:15 madminer.sampling    INFO    Loading samples from file 2 / 4 at data/met/backgrounds/wmbb_e_background_nosyst_lhedata_met.h5, multiplying weights with k factor 1.0
08:16 madminer.sampling    INFO    Loading samples from file 3 / 4 at data/met/backgrounds/tmb_e_background_nosyst_lhedata_met.h5, multiplying weights with k factor 1.0
08:17 madminer.sampling    INFO    Loading samples from file 4 / 4 at data/met/backgrounds/tt_emjj_background_nosyst_lhedata_met.h5, multiplying weights with k factor 1.0


### Quick Check:

In [16]:
test = DataAnalyzer('data/met/backgrounds/wpbb_e_background_nosyst_lhedata_met.h5')
_, weights, _ = test.event_loader(batch_size=None, return_sampling_ids=True).next()

weights[15]

08:03 madminer.analysis    INFO    Loading data from data/met/backgrounds/wpbb_e_background_nosyst_lhedata_met.h5
08:03 madminer.analysis    INFO    Found 4 parameters
08:03 madminer.analysis    INFO    Found 33 nuisance parameters
08:03 madminer.analysis    INFO    Found 54 benchmarks, of which 15 physical
08:03 madminer.analysis    INFO    Found 48 observables
08:03 madminer.analysis    INFO    Found 2000000 events
08:03 madminer.analysis    INFO      2000000 background events
08:03 madminer.analysis    INFO    Found morphing setup with 15 components
08:03 madminer.analysis    INFO    Found nuisance morphing setup


array([  3.27584600e-08,   3.27584600e-08,   3.27584600e-08,
         3.27584600e-08,   3.27584600e-08,   3.27584600e-08,
         3.27584600e-08,   3.27584600e-08,   3.27584600e-08,
         3.27584600e-08,   3.27584600e-08,   3.27584600e-08,
         3.27584600e-08,   3.27584600e-08,   3.27584600e-08,
         3.27584600e-08,   3.27584600e-08,   3.27584600e-08,
         3.27584600e-08,   3.27584600e-08,   3.27584600e-08,
         3.27584600e-08,   3.27584600e-08,   3.27584600e-08,
         3.27584600e-08,   3.27584600e-08,   3.27584600e-08,
         3.27584600e-08,   3.27584600e-08,   3.27584600e-08,
         3.27584600e-08,   3.27584600e-08,   3.27584600e-08,
         3.27584600e-08,   3.27584600e-08,   3.27584600e-08,
         3.27584600e-08,   3.27584600e-08,   3.27584600e-08,
         3.27584600e-08,   3.27584600e-08,   3.27584600e-08,
         3.27584600e-08,   3.27584600e-08,   3.27584600e-08,
         3.27584600e-08,   3.27584600e-08,   3.27584600e-08,
         3.27584600e-08,