In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import pandas as pd

os.environ["CUDA_VISIBLE_DEVICES"] = "1"

## Load data

### Proton dataset

In [3]:
DATA_DIR = r"data/"

In [4]:
# load the dataset containing images from proton dataset
data = pd.read_pickle(os.path.join(DATA_DIR, r"data_proton_photonsum_proton_1_2312.pkl"))
print('Loaded: ',  data.shape, "max:", data.max())

# Data containing particle conditional data from particle having responses with proton photon sum
data_cond = pd.read_pickle(os.path.join(DATA_DIR, r"data_cond_photonsum_proton_1_2312.pkl"))
print('Loaded cond: ',  data_cond.shape, "max:",data_cond.values.max(), "min:",data_cond.values.min())

Loaded:  (400196, 44, 44) max: 6.3835066348840055
Loaded cond:  (400196, 12) max: 7000.0 min: -7000.0


In [5]:
data_cond.head()

Unnamed: 0,Energy,Vx,Vy,Vz,Px,Py,Pz,mass,charge,std,group_number,neutron_photon_sum
0,513.318,1.4543e-08,3.65051e-08,-0.002731,0.035456,-0.051821,-513.318,0.0,0.0,0.287416,3405,291.0
1,1304.34,-1.83182e-05,1.55578e-05,0.050293,-0.509192,0.461106,1304.34,0.0,0.0,0.105298,4208,8.0
2,1014.72,-1.83182e-05,1.55578e-05,0.050293,-0.335473,0.256277,1014.72,0.0,0.0,0.125068,4011,130.0
3,3192.38,0.0,0.0,0.0,0.022422,-0.182957,-3192.38,939.565413,0.0,0.820542,4691,1256.0
4,68.7896,-1.74779e-06,-1.68577e-06,-0.00259,-0.004292,-0.029762,-68.7896,0.0,0.0,0.036346,853,5.0


In [6]:
NEUTRON_PROTON_PHOTON_SUM_COLS = ['proton_photon_sum']
neutron_proton_photon_sum_data = data_cond[NEUTRON_PROTON_PHOTON_SUM_COLS]

In [7]:
data_cond.drop(columns=['proton_photon_sum'], inplace=True)

In [8]:
# drop unnecessary columns
data_cond.drop(columns=['proton_photon_sum'], inplace=True)

In [9]:
CONDITIONAL_COLS = list(data_cond.columns)
CONDITIONAL_COLS

['Energy',
 'Vx',
 'Vy',
 'Vz',
 'Px',
 'Py',
 'Pz',
 'mass',
 'charge',
 'std',
 'group_number']

1. Flatten the proton responses dataset from shape (56, 30) to (1680, 1)

In [10]:
flatten_responses = pd.DataFrame(data.reshape(len(data), -1))
flatten_responses.shape

(400196, 1936)

2. Concatenate as columns flatten dataset to each conditional row

In [None]:
data_all = pd.concat([data_cond, flatten_responses], axis=1)
data_all.head()

3. Groupby each unique combination of conditional data and calculate standard deviation for each pixel in the grouped data

In [None]:
stddev_group = data_all.groupby(CONDITIONAL_COLS).transform(lambda x: np.std(x))
groups_numbers = data_all.groupby(CONDITIONAL_COLS).ngroup()
stddev_group.head()

4. Calculate sum of stddevs of pixels

In [None]:
sum_pixels = stddev_group.sum(axis=1)
sum_pixels.shape, sum_pixels.head()

In [None]:
assert data_cond.shape[0] == sum_pixels.shape[0]

5. Divide sum of stddevs by maximum value to normalize it

In [None]:
normalized_stddevs = sum_pixels/sum_pixels.max()

6. Assign calclations to each conditional data

In [None]:
data_cond['std_proton'] = normalized_stddevs

In [None]:
data_cond.head()

#### Add informative columns

In [None]:
data_cond[NEUTRON_PROTON_PHOTON_SUM_COLS] = neutron_proton_photon_sum_data

In [None]:
data_cond['group_number'] = groups_numbers

In [None]:
data_cond['proton_photon_sum'] = np.sum(data, axis=(1,2))

In [None]:
data_cond.head()

In [62]:
# replace the initial version of the `data_cond` with an updated one
data_cond.to_pickle(os.path.join(DATA_DIR, r"data_cond_photonsum_proton_1_2312.pkl"))

### Neutron dataset

In [3]:
DATA_DIR = r"data/"

In [4]:
# load the dataset containing images from proton dataset
data = pd.read_pickle(os.path.join(DATA_DIR, r"data_neutron_photonsum_neutron_1_3360.pkl"))
print('Loaded: ',  data.shape, "max:", data.max())

# Data containing particle conditional data from particle having responses with proton photon sum
data_cond = pd.read_pickle(os.path.join(DATA_DIR, r"data_cond_neutron_photonsum_neutron_1_3360.pkl"))
print('Loaded cond: ',  data_cond.shape, "max:",data_cond.values.max(), "min:",data_cond.values.min())

Loaded:  (400196, 44, 44) max: 6.3835066348840055
Loaded cond:  (400196, 12) max: 7000.0 min: -7000.0


In [23]:
data_cond.head()

Unnamed: 0,Energy,Vx,Vy,Vz,Px,Py,Pz,mass,charge,std,group_number,neutron_photon_sum
0,513.318,1.4543e-08,3.65051e-08,-0.002731,0.035456,-0.051821,-513.318,0.0,0.0,0.287416,3405,291.0
1,1304.34,-1.83182e-05,1.55578e-05,0.050293,-0.509192,0.461106,1304.34,0.0,0.0,0.105298,4208,8.0
2,1014.72,-1.83182e-05,1.55578e-05,0.050293,-0.335473,0.256277,1014.72,0.0,0.0,0.125068,4011,130.0
3,3192.38,0.0,0.0,0.0,0.022422,-0.182957,-3192.38,939.565413,0.0,0.820542,4691,1256.0
4,68.7896,-1.74779e-06,-1.68577e-06,-0.00259,-0.004292,-0.029762,-68.7896,0.0,0.0,0.036346,853,5.0


In [6]:
NEUTRON_PROTON_PHOTON_SUM_COLS = ['neutron_photon_sum']
neutron_proton_photon_sum_data = data_cond[NEUTRON_PROTON_PHOTON_SUM_COLS]

In [7]:
data_cond.drop(columns=['neutron_photon_sum'], inplace=True)

In [9]:
CONDITIONAL_COLS = list(data_cond.columns)
CONDITIONAL_COLS

['Energy',
 'Vx',
 'Vy',
 'Vz',
 'Px',
 'Py',
 'Pz',
 'mass',
 'charge',
 'std',
 'group_number']

1. Flatten the proton responses dataset from shape (56, 30) to (1680, 1)

In [10]:
flatten_responses = pd.DataFrame(data.reshape(len(data), -1))
flatten_responses.shape

(400196, 1936)

2. Concatenate as columns flatten dataset to each conditional row 

In [11]:
data_all = pd.concat([data_cond, flatten_responses], axis=1)
data_all.head()

Unnamed: 0,Energy,Vx,Vy,Vz,Px,Py,Pz,mass,charge,std,...,1926,1927,1928,1929,1930,1931,1932,1933,1934,1935
0,513.318,1.4543e-08,3.65051e-08,-0.002731,0.035456,-0.051821,-513.318,0.0,0.0,0.287416,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1304.34,-1.83182e-05,1.55578e-05,0.050293,-0.509192,0.461106,1304.34,0.0,0.0,0.105298,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1014.72,-1.83182e-05,1.55578e-05,0.050293,-0.335473,0.256277,1014.72,0.0,0.0,0.125068,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,3192.38,0.0,0.0,0.0,0.022422,-0.182957,-3192.38,939.565413,0.0,0.820542,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,68.7896,-1.74779e-06,-1.68577e-06,-0.00259,-0.004292,-0.029762,-68.7896,0.0,0.0,0.036346,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


3. Groupby each unique combination of conditional data and calculate standard deviation for each pixel in the grouped data

In [None]:
stddev_group = data_all.groupby(CONDITIONAL_COLS).transform(lambda x: np.std(x))
groups_numbers = data_all.groupby(CONDITIONAL_COLS).ngroup()
stddev_group.head()

4. Calculate sum of stddevs of pixels

In [None]:
sum_pixels = stddev_group.sum(axis=1)
sum_pixels.shape, sum_pixels.head()

In [None]:
assert data_cond.shape[0] == sum_pixels.shape[0]

5. Divide sum of stddevs by maximum value to normalize it

In [None]:
normalized_stddevs = sum_pixels/sum_pixels.max()

6. Assign calclations to each conditional data

In [None]:
data_cond['std'] = normalized_stddevs

In [None]:
data_cond.head()

#### Add informative columns 

In [None]:
data_cond[NEUTRON_PROTON_PHOTON_SUM_COLS] = neutron_proton_photon_sum_data

In [None]:
data_cond.head()

In [62]:
# replace the initial version of the `data_cond` with an updated one
data_cond.to_pickle(os.path.join(DATA_DIR, r"data_cond_neutron_photonsum_neutron_1_3360.pkl"))