# I. Set Up

In [2]:
# PYTHON Imports 
import pandas as pd
pd.options.mode.chained_assignment = None
import numpy as np
import math
import statistics
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
from pathlib import Path
import glob
import ipywidgets as widgets
from IPython.display import clear_output
import sys
import time
import json
# ASTROPHY Imports
import astropy 
from astropy.table import Table
from astropy.io import fits
from sherpa.astro import ui
# CIAO Imports
import ciao_contrib.runtool
from ciao_contrib.runtool import *
# CUSTOM Imports
from data_extraction_functions import *
from data_exploration_functions import *
from data_representation_functions import *

# Specify global path
global_path = '/Users/steven/Library/Mobile Documents/com~apple~CloudDocs/0-CfA/4-Data/Datasets'
global_folders = list_folders_fun(global_path)

# Define a custom encoder that knows how to handle NumPy arrays
class NumpyEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.ndarray):
            return obj.tolist()  # convert numpy array to list
        return json.JSONEncoder.default(self, obj)

# Select dataset
set_widget = widgets.Dropdown(options=global_folders[:],value=global_folders[1],description='Set :',disabled=False); set_widget

Dropdown(description='Set :', index=1, options=('All', 'Bona'), value='Bona')

II. Load Data

In [3]:
# Set ID
set_id = set_widget.value
# Load eventfiles and properties
df_eventfiles_input = pd.read_csv(f'{global_path}/{set_id}/eventfiles-input-{set_id}.csv')
df_eventfiles_group = df_eventfiles_input.groupby('obsreg_id')
print("Number of Eventfiles: ", df_eventfiles_group.ngroups)


Number of Eventfiles:  82283


# III. Create features

2D E-t Histogram

In [3]:
# Binning Settings
nbins_E = 20
nbins_t = 30
normalised = 'none'

# Initialise dictionary lists
feature_list = []
id_list = []
# Initialise counters
count = 0
count_limit = df_eventfiles_group.ngroups

for id_name, dfi in df_eventfiles_group:
    id_list.append(id_name)
    #lc_plotter_fun(dfi,id_name,100)
    #fig,axs=plt.subplots(1,3,figsize=(12,2),constrained_layout = True)
    #plt.subplot(1, 3, 1)
    feature_list.append(hist2D_Et(dfi, id_name, nbins_E, nbins_t,norm = normalised,plot=False))
    count = count+1
    clear_output(wait=True)
    print(f'Counter: {count} of {count_limit}')
print(f'DONE!!!')

# hist_dict = dict(zip(id_list, feature_list))
# with open(f'{global_path}/{set_id}/histEt-{set_id}-nE{nbins_E}-nt{nbins_t}-norm{normalised}.json', 'w') as f:
#     json.dump(hist_dict, f,cls=NumpyEncoder)

hist_dict = dict(zip(id_list, feature_list))
with open(f'{global_path}/{set_id}/histEt-{set_id}-nE{nbins_E}-nt{nbins_t}-norm{normalised}.pkl', 'wb') as f:
    pickle.dump(hist_dict, f)


Counter: 82283 of 82283
DONE!!!


3D Histogram

In [4]:
# Binning Settings
nbins_E = 17 #20 #17
nbins_t = 25 #30 #?25
nbins_dt = 26#35 #26
# Initialise dictionary lists
feature_list = []
id_list = []
# Initialise counters
count = 0
count_limit = df_eventfiles_group.ngroups

for id_name, dfi in df_eventfiles_group:
    id_list.append(id_name)
    count = count+1
    clear_output(wait=True)
    feature_list.append(hist3D(dfi, id_name, nbins_E, nbins_t,nbins_dt,plot=False))
    print(f'Counter: {count} of {count_limit}')
print(f'DONE!!!')

hist_dict = dict(zip(id_list, feature_list))
with open(f'{global_path}/{set_id}/hist3D-{set_id}-nE{nbins_E}-nt{nbins_t}-ndt{nbins_dt}.pkl', 'wb') as f:
    pickle.dump(hist_dict, f)

Counter: 82283 of 82283
DONE!!!


In [None]:
hist_dict = dict(zip(id_list, feature_list))
with open(f'{global_path}/{set_id}/hist3D-{set_id}-nE{nbins_E}-nt{nbins_t}-ndt{nbins_dt}.pkl', 'wb') as f:
    pickle.dump(hist_dict, f)

In [None]:
with open(f'{global_path}/{set_id}/hist3D-{set_id}-nE{nbins_E}-nt{nbins_t}-ndt{nbins_dt}.pkl', 'rb') as f:
    hist_dict = pickle.load(f)