# 1. Set Up Notebook and Load Data 

In [1]:
# PYTHON Imports 
import pandas as pd
pd.options.mode.chained_assignment = None
import numpy as np
import math
import statistics
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
from pathlib import Path
import glob
import os
import ipywidgets as widgets
from IPython.display import clear_output
import sys
import time
import json

import umap
import seaborn as sns
import fnmatch
# ML Imports
import tensorflow as tf
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
# ASTROPHY Imports
import astropy 
from astropy.table import Table
from astropy.io import fits
from sherpa.astro import ui
# CIAO Imports
import ciao_contrib.runtool
from ciao_contrib.runtool import *
# CUSTOM Imports
from data_extraction_functions import *
from data_exploration_functions import *
from data_representation_functions import *

# Specify global path
global_path = '/Users/steven/Library/Mobile Documents/com~apple~CloudDocs/0-CfA/4-Data/Datasets'
global_folders = list_folders_fun(global_path)

# Select dataset
set_widget = widgets.Dropdown(options=global_folders[:],value=global_folders[0],description='Set :',disabled=False); set_widget

2023-03-12 17:40:55.923678: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Dropdown(description='Set :', options=('All', 'Bona'), value='All')

In [2]:
# Set ID
set_id = set_widget.value
# Select Input
files = os.listdir(f'{global_path}/{set_id}/')
input_files = [f for f in files if fnmatch.fnmatch(f, 'hist2D*')]
input_widget = widgets.Dropdown(options=input_files[:],value=input_files[0],description='TSNE File :',disabled=False); input_widget

Dropdown(description='TSNE File :', options=('hist2D-All-nE20-nt30-normnone.pkl',), value='hist2D-All-nE20-nt3…

In [3]:
# Load the DataFrame from the CSV file
input_file = input_widget.value
# Load histogram dictionary
with open(f'{global_path}/{set_id}/{input_file}', 'rb') as f:
    hist_dict = pickle.load(f)
# Flatten histograms in the dictionary and get IDs
ids = hist_dict.keys()
histograms = hist_dict.values()
features = np.array([np.array(h) for h in histograms])
#features[np.isnan(features)] = 0.0
# Load properties
df_properties_input = pd.read_csv(f'{global_path}/{set_id}/properties-input-{set_id}.csv')
df_properties_input = df_properties_input[df_properties_input['obsreg_id'].isin(list(ids))]
df_properties = df_properties_input.drop_duplicates('obsreg_id', keep='first').reset_index(drop=True)
# Print eventfiles and properties number of IDs
print("Number of Features: ", len(features))
print("Number of Property Sets: ", len(df_properties))

df_properties.head()

Number of Features:  82283
Number of Property Sets:  82283


Unnamed: 0,cnts_aper_b,cnts_aperbkg_b,src_cnts_aper_b,flux_aper_b,hard_hm,hard_hs,hard_ms,var_prob_b,var_prob_h,var_prob_m,var_prob_s,obsreg_id
0,66,38,68.744294,5.470823e-13,0.22361,0.745784,0.618364,0.310792,0.117484,0.675801,,10003_2
1,53,20,55.350611,5.470823e-13,0.398501,0.488445,0.094941,0.52482,0.12701,0.676527,0.485168,10004_1
2,39,28,41.900915,4.311669e-13,0.873204,0.999375,0.999375,0.765525,0.888159,,,10018_1
3,144,1354,94.226698,5.097861e-14,0.058713,-0.103685,-0.162399,0.105254,0.252282,0.650681,0.283007,10025_72
4,202,2417,118.341424,7.185242e-14,0.339788,0.678326,0.413492,,0.65017,,,10025_74


# 2. Prepare Data

In [5]:
# Prepare Data
df_label = df_properties.copy()
mask_nonan = df_label.notna().all(axis=1)
index_nonan = list(df_label.notna().all(axis=1).index[df_label.notna().all(axis=1)])
df_label = df_label[mask_nonan]
ID = df_label['obsreg_id'].values
X = np.array([features[i] for i in index_nonan])
# X = X/np.max(X)
hr_hm = np.array(df_label['hard_hm'].values)
hr_hs = np.array(df_label['hard_hs'].values)
hr_ms = np.array(df_label['hard_ms'].values)
var_h = np.array(df_label['var_prob_h'].values)
var_m = np.array(df_label['var_prob_m'].values)
var_s = np.array(df_label['var_prob_s'].values)
var_b = np.array(df_label['var_prob_b'].values)
# Split into training and test data
X_train, X_test, hr_hm_train, hr_hm_test, hr_hs_train, hr_hs_test, hr_ms_train, hr_ms_test, var_h_train, var_h_test, var_m_train, var_m_test, var_s_train, var_s_test, var_b_train, var_b_test = train_test_split(X, hr_hm, hr_hs, hr_ms, var_h, var_m, var_s, var_b, test_size=0.2, random_state=42)
# Define Input Shape
in_shape = X[0].shape
in_shape_1 = in_shape[0]
in_shape_2 = in_shape[1]
# Reshape data to 4D tensor for use with CNN
X_train = X_train.reshape(X_train.shape[0], in_shape_1, in_shape_2, 1)
X_test = X_test.reshape(X_test.shape[0], in_shape_1, in_shape_2, 1)


In [6]:
from tensorflow.keras import layers, models
# Define your CNN architecture
model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(in_shape_1,in_shape_2, 1)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(7)  # one output for each label
])

# Compile your model
model.compile(optimizer='adam',
              loss=tf.keras.losses.MeanSquaredError(),
              metrics=['accuracy'])


2023-03-12 17:42:04.311150: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-12 17:42:04.313881: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-03-12 17:42:04.314259: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Metal device set to: Apple M2 Pro

systemMemory: 16.00 GB
maxCacheSize: 5.33 GB



In [7]:
# Train your model
model.fit(X_train, [hr_hm_train, hr_hs_train, hr_ms_train, var_h_train, var_m_train, var_s_train, var_b_train],
          epochs=10, batch_size=32, validation_split=0.2)

Epoch 1/10


AttributeError: in user code:

    File "/opt/anaconda3/envs/ciao-4.15/lib/python3.10/site-packages/keras/engine/training.py", line 1249, in train_function  *
        return step_function(self, iterator)
    File "/opt/anaconda3/envs/ciao-4.15/lib/python3.10/site-packages/keras/engine/training.py", line 1233, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/opt/anaconda3/envs/ciao-4.15/lib/python3.10/site-packages/keras/engine/training.py", line 1222, in run_step  **
        outputs = model.train_step(data)
    File "/opt/anaconda3/envs/ciao-4.15/lib/python3.10/site-packages/keras/engine/training.py", line 1028, in train_step
        return self.compute_metrics(x, y, y_pred, sample_weight)
    File "/opt/anaconda3/envs/ciao-4.15/lib/python3.10/site-packages/keras/engine/training.py", line 1122, in compute_metrics
        self.compiled_metrics.update_state(y, y_pred, sample_weight)
    File "/opt/anaconda3/envs/ciao-4.15/lib/python3.10/site-packages/keras/engine/compile_utils.py", line 577, in update_state
        self.build(y_pred, y_true)
    File "/opt/anaconda3/envs/ciao-4.15/lib/python3.10/site-packages/keras/engine/compile_utils.py", line 483, in build
        self._metrics = tf.__internal__.nest.map_structure_up_to(
    File "/opt/anaconda3/envs/ciao-4.15/lib/python3.10/site-packages/keras/engine/compile_utils.py", line 631, in _get_metric_objects
        return [self._get_metric_object(m, y_t, y_p) for m in metrics]
    File "/opt/anaconda3/envs/ciao-4.15/lib/python3.10/site-packages/keras/engine/compile_utils.py", line 631, in <listcomp>
        return [self._get_metric_object(m, y_t, y_p) for m in metrics]
    File "/opt/anaconda3/envs/ciao-4.15/lib/python3.10/site-packages/keras/engine/compile_utils.py", line 652, in _get_metric_object
        y_t_rank = len(y_t.shape.as_list())

    AttributeError: 'tuple' object has no attribute 'shape'
