In [None]:
import sys
sys.path.append('..')

In [None]:
import time

import numpy as np
import seaborn as sns; sns.set()

from gerumo.data.dataset import describe_dataset, load_dataset
from gerumo.data.generators import build_generator
from gerumo.utils.engine import (
    setup_cfg, setup_environment, setup_experiment, setup_model,
    build_dataset, build_callbacks, build_metrics, build_optimizer, build_loss)

from gerumo.data.constants import REGRESSION_TARGET_UNITS, REGRESSION_TARGETS

class dotdict(dict):
    """dot.notation access to dictionary attributes"""
    __getattr__ = dict.get
    __setattr__ = dict.__setitem__
    __delattr__ = dict.__delitem__
args = dotdict()

In [None]:
args['config_file'] = '/home/asuka/projects/gerumo2/config/feature_experiments/cnn_regression_cf.yml'
args['opts'] = []

In [None]:
cfg = setup_cfg(args)
logger = setup_environment(cfg)

In [None]:
print(cfg.OUTPUT)

In [None]:
print(cfg.DATASETS)

## Target units

In [None]:
for t, u in zip(REGRESSION_TARGETS[:3], REGRESSION_TARGET_UNITS):
    print(f'{t} [{u}]')

## Build dataset

In [None]:
subset = 'train'

### Load dataset subset

In [None]:
if subset == 'train':
    events_path = cfg.DATASETS.TRAIN.EVENTS
    telescopes_path = cfg.DATASETS.TRAIN.TELESCOPES
    replace_folder = cfg.DATASETS.TRAIN.FOLDER
elif subset == 'validation':
    events_path = cfg.DATASETS.VALIDATION.EVENTS
    telescopes_path = cfg.DATASETS.VALIDATION.TELESCOPES
    replace_folder = cfg.DATASETS.VALIDATION.FOLDER
elif subset == 'test':
    events_path = cfg.DATASETS.TEST.EVENTS
    telescopes_path = cfg.DATASETS.TEST.TELESCOPES
    replace_folder = cfg.DATASETS.TEST.FOLDER
else:
    raise ValueError('Invalid subset', subset)

In [None]:
dataset = load_dataset(events_path, telescopes_path, replace_folder)
dataset[['true_energy', 'true_az', 'true_alt']].describe()

In [None]:
dataset[['true_energy']].hist(log=True);

In [None]:
dataset[['true_az', 'true_alt']].hist(figsize=(12,6));

### Aggregate

In [None]:
center_az = cfg.DATASETS.AGGREGATION.CENTER_AZ
log10_mc_energy = cfg.DATASETS.AGGREGATION.LOG10_ENERGY
hdf5_file = cfg.DATASETS.AGGREGATION.HDF5_FILEPATH
remove_nan = cfg.DATASETS.AGGREGATION.REMOVE_NAN
ignore_particle_types = cfg.DATASETS.AGGREGATION.IGNORE_PARTICLE_TYPES
if cfg.DATASETS.AGGREGATION.IGNORE_BY_DOMAINS:
    domains = {
        k: v for (k, v) in zip(
            cfg.OUTPUT.REGRESSION.TARGETS,
            cfg.OUTPUT.REGRESSION.TARGETS_DOMAINS
        )
    }
else:
    domains = None

#### Center Azimuth

In [None]:
if center_az:
    dataset['true_az'] = np.rad2deg(
        np.deg2rad(dataset['true_az']).apply(
            lambda rad: np.arctan2(np.sin(rad), np.cos(rad))
        )
    )
    center_az = False
dataset[['true_az']].hist();

#### Convert energy into log scale

In [None]:
if log10_mc_energy:
    dataset['true_log10_energy'] = dataset['true_energy'].apply(
        lambda energy: np.log10(energy)
    )
    log10_mc_energy = False
dataset['true_log10_energy'].hist(log=False);

In [None]:
if remove_nan:
    dataset.dropna(inplace=True)
    remove_nan = False
dataset[['true_energy', 'true_az', 'true_alt']].describe()

In [None]:
if domains is not None:
    for target, domain in domains.items():
        dataset = dataset[(dataset[target] >= domain[0]) & (dataset[target] <= domain[1])]
dataset[['true_energy', 'true_az', 'true_alt']].describe()

In [None]:
dataset[['true_az', 'true_alt']].hist(figsize=(12,6));