# Feature Extraction
## Loading Data

In [1]:
from main import GWAnalyzer

import os
import numpy as np
from itertools import product
import matplotlib.pyplot as plt
from pathlib import Path
import h5py

In [2]:
DATA_DIR = "./data"; file_name = "batch.h5"

with h5py.File(f"{DATA_DIR}/{file_name}", "r") as f:

    # Print the keys (groups and datasets) in the file
    print("Keys:", list(f.keys()))
    arr = f['X'][:]
    labels = f['y'][:]
    
    
    print("X shape:", arr.shape)
    print("y shape:", labels.shape)
    


Keys: ['X', 'y']
X shape: (384, 2, 3072)
y shape: (384, 1)


## Extract Topological Features and Save them as npy 

In [3]:
detector1 = arr[:,0,:]
detector2 = arr[:,1,:]

In [None]:
gwana = GWAnalyzer(detector1)
gwana.obtain_topological_features(True, True)
gwana.save_features(os.getcwd(), "detector1")

In [None]:
gwana = GWAnalyzer(detector2)
gwana.obtain_topological_features(True, True)
gwana.save_features(os.getcwd(), "detector2")

## Classification

In [8]:
from comet_ml import Experiment
import torch
torch.cuda.is_available()


  warn("The `IPython.html` package has been deprecated since IPython 4.0. "
  from .autonotebook import tqdm as notebook_tqdm


True

In [9]:
path = os.getcwd()
feat_detector1 = np.load(f'{os.path.join(path, "detector1")}_topofeatures.npy')
feat_detector2 = np.load(f'{os.path.join(path, "detector2")}_topofeatures.npy')

In [10]:
feat= np.column_stack([feat_detector1, feat_detector2])

In [11]:
feat

array([[1.35699677e+00, 1.23834451e+00, 6.34000000e+02, ...,
        2.86483332e-13, 4.75588372e+03, 2.57630650e+03],
       [1.34163579e+00, 1.22745017e+00, 5.86000000e+02, ...,
        1.49985396e-13, 4.75588372e+03, 2.65608284e+03],
       [1.36242717e+00, 1.25178951e+00, 6.22000000e+02, ...,
        3.06250004e-13, 4.75588372e+03, 2.49183743e+03],
       ...,
       [1.34462065e+00, 1.25264183e+00, 6.35000000e+02, ...,
        1.51315183e-13, 4.75588372e+03, 2.48245198e+03],
       [1.37408867e+00, 1.25510989e+00, 6.50000000e+02, ...,
        1.76925716e-13, 4.75588372e+03, 2.53720045e+03],
       [1.36615228e+00, 1.23406257e+00, 6.83000000e+02, ...,
        2.11721410e-13, 4.75588372e+03, 2.56379256e+03]])

In [12]:
labels = np.squeeze(labels)

In [13]:
input_dim = feat.shape[1]

In [14]:
from train_utils import dataset_split

In [15]:
train_dataset, val_dataset, test_dataset = dataset_split(feat, labels, train_ratio = 0.6, val_ratio = 0.2, test_ratio = 0.2)


In [16]:
file_dict = {'train':train_dataset,
             'val':val_dataset,
             'test':test_dataset,
             'predict':test_dataset}

In [17]:
from model import TabularDataModule, Classifier

In [18]:
tabular_dm = TabularDataModule(file_dict)


In [31]:
model = Classifier("tabular","MLP", 1e-4, [input_dim, 1, [300,300,150,50,50,20,10,5]])


In [32]:
import pytorch_lightning as pl
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.loggers import CometLogger

In [33]:
comet_logger = CometLogger(
  api_key="CkkrVkSk6Vr2WKlbXIzlkhNlE",
  project_name="topogw",
  workspace="sangeonpark"
)

CometLogger will be initialized in online mode


In [34]:
early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=0.00, patience=50, verbose=False)


In [35]:
checkpoint_callback = ModelCheckpoint(
    monitor="val_loss",
    dirpath=os.getcwd(),
    filename="Test-{epoch:02d}-{val_loss:.2f}",
    save_top_k=3,
    mode="min",
)

In [36]:
from pytorch_lightning.callbacks import Callback, TQDMProgressBar

class PrintCallbacks(Callback):
    def on_init_start(self, trainer):
        print("Starting to init trainer!")

    def on_init_end(self, trainer):
        print("Trainer is init now")

    def on_train_end(self, trainer, pl_module):
        print("Training ended")

In [37]:
import sys
class MyProgressBar(TQDMProgressBar):
    def init_validation_tqdm(self):
        bar = super().init_validation_tqdm()
        if not sys.stdout.isatty():
            bar.disable = True
        return bar

    def init_predict_tqdm(self):
        bar = super().init_predict_tqdm()
        if not sys.stdout.isatty():
            bar.disable = True
        return bar

    def init_test_tqdm(self):
        bar = super().init_test_tqdm()
        if not sys.stdout.isatty():
            bar.disable = True
        return bar

In [38]:
import os 
import pprint 
  
# Get the list of user's 
env_var = os.environ 
  
# Print the list of user's 
#print("User's Environment variable:") 
#pprint.pprint(dict(env_var), width = 1) 

# ONLY IF YOU ARE IN SLURM ENVIRONMENT
os.environ['SLURM_NTASKS_PER_NODE'] = '4'

In [39]:
trainer = Trainer(callbacks=[PrintCallbacks(),MyProgressBar(),early_stop_callback,checkpoint_callback],logger=comet_logger)


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [40]:
from pytorch_lightning.tuner import Tuner


In [41]:
tuner = Tuner(trainer)


In [30]:
tuner.lr_find(model, datamodule=tabular_dm)


/nobackup/users/sangeon/condas/anaconda3/envs/studies/lib/python3.8/site-packages/pytorch_lightning/loops/utilities.py:73: `max_epochs` was not set. Setting it to 1000 epochs. To train without an epoch limit, set `max_epochs=-1`.
COMET INFO: Experiment is live on comet.ml https://www.comet.com/sangeonpark/topogw/9b5b49182855472dbb82881809dbfbf2

/nobackup/users/sangeon/condas/anaconda3/envs/studies/lib/python3.8/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:639: Checkpoint directory /home/sangeon/TopologicalAnalysisGravitationalWave exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
/nobackup/users/sangeon/condas/anaconda3/envs/studies/lib/python3.8/site-packages/pytorch_lightning/loops/fit_loop.py:293: The number of training batches (1) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.
`Trainer.fit` stopped: `max_steps=100` reached.00 [00:24<00:00, 

<pytorch_lightning.tuner.lr_finder._LRFinder at 0x2000efed42b0>

In [42]:
trainer.fit(model, datamodule=tabular_dm)

/nobackup/users/sangeon/condas/anaconda3/envs/studies/lib/python3.8/site-packages/pytorch_lightning/loops/utilities.py:73: `max_epochs` was not set. Setting it to 1000 epochs. To train without an epoch limit, set `max_epochs=-1`.
COMET INFO: Experiment is live on comet.ml https://www.comet.com/sangeonpark/topogw/f3e3bfe955fb4041a95e9f74ccb938df

/nobackup/users/sangeon/condas/anaconda3/envs/studies/lib/python3.8/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:639: Checkpoint directory /home/sangeon/TopologicalAnalysisGravitationalWave exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name       | Type    | Params
---------------------------------------
0 | activation | Sigmoid | 0     
1 | loss       | BCELoss | 0     
2 | layers     | MLP     | 170 K 
---------------------------------------
170 K     Trainable params
0         Non-trainable params
170 K     Total params
0.682     Total estimated model params size (MB)
SLURM auto-requeueing enabled. 

                                                                           

/nobackup/users/sangeon/condas/anaconda3/envs/studies/lib/python3.8/site-packages/pytorch_lightning/loops/fit_loop.py:293: The number of training batches (1) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 27.89it/s, v_num=38df, train_loss=0.717]
Epoch 1: 100%|██████████| 1/1 [00:00<00:00,  7.46it/s, v_num=38df, train_loss=0.760, val_loss_step=0.715, val_loss_epoch=0.715]
Epoch 2: 100%|██████████| 1/1 [00:00<00:00,  7.49it/s, v_num=38df, train_loss=0.717, val_loss_step=0.710, val_loss_epoch=0.710]
Epoch 3: 100%|██████████| 1/1 [00:00<00:00,  7.27it/s, v_num=38df, train_loss=0.720, val_loss_step=0.708, val_loss_epoch=0.708]
Epoch 4: 100%|██████████| 1/1 [00:00<00:00,  7.72it/s, v_num=38df, train_loss=0.700, val_loss_step=0.706, val_loss_epoch=0.706]
Epoch 5: 100%|██████████| 1/1 [00:00<00:00,  7.68it/s, v_num=38df, train_loss=0.712, val_loss_step=0.704, val_loss_epoch=0.704]
Epoch 6: 100%|██████████| 1/1 [00:00<00:00,  7.71it/s, v_num=38df, train_loss=0.732, val_loss_step=0.703, val_loss_epoch=0.703]
Epoch 7: 100%|██████████| 1/1 [00:00<00:00,  7.67it/s, v_num=38df, train_loss=0.727, val_loss_step=0.702, val_loss_epoch=0.702]
Epoch 8: 100%|█████

COMET INFO: ---------------------------
COMET INFO: Comet.ml Experiment Summary
COMET INFO: ---------------------------





COMET INFO:   Data:
COMET INFO:     display_summary_level : 1
COMET INFO:     url                   : https://www.comet.com/sangeonpark/topogw/f3e3bfe955fb4041a95e9f74ccb938df
COMET INFO:   Metrics [count] (min, max):
COMET INFO:     loss [8]            : (0.641897976398468, 0.7169286012649536)
COMET INFO:     train_loss          : 0.645523726940155
COMET INFO:     val_loss_epoch [77] : (0.6759973764419556, 0.7149235606193542)
COMET INFO:     val_loss_step [77]  : (0.6759973764419556, 0.7149235606193542)
COMET INFO:   Others:
COMET INFO:     Created from : pytorch-lightning
COMET INFO:   Parameters:
COMET INFO:     backbone_type : MLP
COMET INFO:     data_type     : tabular
COMET INFO:     learning_rate : 0.0001
COMET INFO:     modelparams   : [72, 1, [300, 300, 150, 50, 50, 20, 10, 5]]
COMET INFO:   Uploads:
COMET INFO:     environment details      : 1
COMET INFO:     filename                 : 1
COMET INFO:     git metadata             : 1
COMET INFO:     git-patch (uncompressed) : 1

In [43]:
predicted_list = trainer.predict(model, tabular_dm, ckpt_path='best')

COMET INFO: Experiment is live on comet.ml https://www.comet.com/sangeonpark/topogw/f3e3bfe955fb4041a95e9f74ccb938df

Restoring states from the checkpoint path at /home/sangeon/TopologicalAnalysisGravitationalWave/Test-epoch=26-val_loss=0.68.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/sangeon/TopologicalAnalysisGravitationalWave/Test-epoch=26-val_loss=0.68.ckpt
SLURM auto-requeueing enabled. Setting signal handlers.
/nobackup/users/sangeon/condas/anaconda3/envs/studies/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=3` in the `DataLoader` to improve performance.


Predicting: |          | 0/? [00:00<?, ?it/s]

COMET INFO: -----------------------------------
COMET INFO: Comet.ml ExistingExperiment Summary
COMET INFO: -----------------------------------
COMET INFO:   Data:
COMET INFO:     display_summary_level : 1
COMET INFO:     url                   : https://www.comet.com/sangeonpark/topogw/f3e3bfe955fb4041a95e9f74ccb938df
COMET INFO:   Others:
COMET INFO:     Created from : pytorch-lightning
COMET INFO:   Parameters:
COMET INFO:     backbone_type : MLP
COMET INFO:     data_type     : tabular
COMET INFO:     learning_rate : 0.0001
COMET INFO:     modelparams   : [72, 1, [300, 300, 150, 50, 50, 20, 10, 5]]
COMET INFO:   Uploads:
COMET INFO:     model graph : 1
COMET INFO: -----------------------------------
COMET INFO: Uploading 21 metrics, params and output messages


In [44]:
preds = predicted_list[0][0]
label = predicted_list[0][1]

In [45]:
rounded_preds = torch.round(torch.sigmoid(preds)).squeeze()
correct = (rounded_preds == label).float() 
    # Calculate accuracy
accuracy = correct.sum() / len(rounded_preds) 

In [46]:
rounded_preds

tensor([0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 1., 0., 1.])

In [47]:
label

tensor([0., 1., 1., 1., 0., 1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 1., 0., 1., 1., 0., 0., 0., 0., 0., 1., 0., 0., 1.,
        1., 0., 0., 0., 0., 1., 1., 1., 1., 0., 1., 1., 1., 1., 1., 0., 0., 1.,
        0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        1., 0., 0., 0., 0., 0.])

In [48]:
rounded_preds == label

tensor([ True, False,  True, False,  True, False,  True,  True,  True, False,
         True,  True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True, False, False,  True,  True, False,
         True,  True, False,  True,  True, False, False,  True,  True,  True,
         True, False, False, False,  True,  True, False, False, False, False,
        False,  True,  True, False,  True, False,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True, False,  True,  True, False,  True, False])

In [49]:
accuracy

tensor(0.7051)