In [1]:
import glob
import os
import json
import numpy as np
import torch
import pandas as pd

import utils.plotting as plot_utils

In [2]:
# Set directories for loading data and saving plots
from data.data_loaders import load_curtains_pd
from data.physics_datasets import preprocess_method
data_directory = '/srv/beegfs/scratch/groups/dpnc/atlas/CURTAINS/'
# save_directory = '/srv/beegfs/scratch/groups/dpnc/atlas/CURTAINS/images'
save_directory = '/home/users/k/kleins/MLproject/CURTAINS/images/test_images_paper/'
os.makedirs(save_directory, exist_ok=True)


def get_files(directory_wildcard):
    """
    Returns a list of directories under which data can be loaded and a list of dictionaries for each directory in each
    file.
    """
    directories = glob.glob(os.path.join(data_directory, f'{directory_wildcard}*'))
    info = [glob.glob(os.path.join(dd, '*.json'))[0] for dd in directories]
    loaded_info = []
    for exp_info in info:
        with open(exp_info, "r") as file_name:
            json_dict = json.load(file_name)
        loaded_info += [json.loads(json_dict)]
    return directories, loaded_info

In [3]:
CATHODE_directories, CATHODE_info = get_files('fixed_widths_cathode_features_CATHODE_features_')
CURTAINS_directories, CURTAINS_info = get_files('fixed_widths_cathode_features_CATHODE_features_')

In [12]:
%%capture
# Load the data and dope appropriately, load feature type 12 as that encompasses all the others
feature_type = 12
sm = load_curtains_pd(feature_type=feature_type)
sm = sm.sample(frac=1).dropna() 
ad = load_curtains_pd(sm='WZ_allhad_pT', feature_type=feature_type)
ad = ad.sample(frac=1).dropna()

In [23]:
feature_keys = {3: ['mj1', 'mj2-mj1', '$\\tau_{21}^{j_1}$', '$\\tau_{21}^{j_2}$', '$dR_{jj}$'],
                10: ['mj1', 'mj2-mj1', '$\\tau_{21}^{j_1}$', '$\\tau_{21}^{j_2}$', '$dR_{jj}$', '$p_t^{j_1}$', 
                     '$p_t^{j_2}$', 'mjj'],
                11:['mj1', 'mj2-mj1', '$\\tau_{21}^{j_1}$', '$\\tau_{21}^{j_2}$', '$dR_{jj}$', 
                    '$p_t^{j_1}$', '$p_t^{j_2}$', 'delEta', 'mjj'], 
                12: ['mj1', 'mj2', 'mj2-mj1', '$\\tau_{21}^{j_1}$', '$\\tau_{32}^{j_1}$', '$\\tau_{21}^{j_2}$', 
                     '$\\tau_{32}^{j_2}$', '$p_t^{j_1}$', '$p_t^{j_2}$', '$dR_{jj}$', 'delPhi', 'delEta', 'mjj']}

In [42]:
# Bin the data
def mx_data(data, bins):
    context_df = data['mjj']
    mx = (context_df >= bins[0]) & (context_df < bins[1])
    return data.loc[mx], data.loc[~mx]

# Plot a feature spread
n_sample_for_plot = 1000 

# for ind in range(len(CATHODE_info)):
for ind in [0]:
    info_dict = CATHODE_info[ind]
    
    # First grab the true data for the signal region
    bins = [int(b) for b in info_dict['bins'].split(',')]
    # Here we only want the SR
    bins = [bins[2], bins[3]]
    doping = info_dict['doping']
    feature_nms = feature_keys[info_dict['feature_type']]
    
    target_qcd, _ = mx_data(sm[feature_nms], bins)
    ad_to_use = ad[:doping]
    target_ad, _ = mx_data(ad_to_use[feature_nms], bins)
    target_sample = torch.tensor(pd.concat((target_qcd, target_ad)).sample(frac=1).to_numpy())
    
    samples = np.load(os.path.join(CATHODE_directories[ind], 'SB2_to_SR_samples.npy'))
    samples = torch.tensor(samples, dtype=torch.float32)
    
    # Apply the data preprocessing to have it plotted on an easy to view range
    ts, info, _ = preprocess_method(target_sample)
    ss, _, _ = preprocess_method(samples, info) 

    # Set the tags for saving
    nm = 'SB2_to_SR'
    tag = f"CATHODE_{doping}_{info_dict['feature_type']}"
    x_bounds= [-1.2, 1.2]

    plot_utils.getFeaturePlot(ts, ss, nm, save_directory, tag, feature_nms[:-1], input_sample=None,
                              n_sample_for_plot=n_sample_for_plot, x_bounds= x_bounds)

<Figure size 1008x936 with 0 Axes>