# Feature Extraction
## Loading Data

In [1]:
from main import GWAnalyzer

import os
import numpy as np
from itertools import product
import matplotlib.pyplot as plt
from pathlib import Path
import h5py

In [2]:
DATA_DIR = "./data"; file_name = "batch.h5"

with h5py.File(f"{DATA_DIR}/{file_name}", "r") as f:

    # Print the keys (groups and datasets) in the file
    print("Keys:", list(f.keys()))
    arr = f['X'][:]
    labels = f['y'][:]
    
    
    print("X shape:", arr.shape)
    print("y shape:", labels.shape)
    


Keys: ['X', 'y']
X shape: (384, 2, 3072)
y shape: (384, 1)


## Extract Topological Features and Save them as npy 

In [3]:
detector1 = arr[:,0,:]
detector2 = arr[:,1,:]

In [4]:
gwana = GWAnalyzer(detector1)
gwana.obtain_topological_features(True, True)
gwana.save_features(os.getcwd(), "detector1")

start processing spectrograms


0it [00:00, ?it/s]

Processing chunk of shape: (384, 1267, 3)
Chunk Elapsed Time: 1.8660902976989746
start processing point cloud features


0it [00:00, ?it/s]

Processing chunk of shape: (384, 3072)
Chunk Elapsed Time: 99.34615445137024
Shape of the final features is (384, 30)


In [5]:
gwana = GWAnalyzer(detector2)
gwana.obtain_topological_features(True, True)
gwana.save_features(os.getcwd(), "detector2")

start processing spectrograms


0it [00:00, ?it/s]

Processing chunk of shape: (384, 1395, 3)
Chunk Elapsed Time: 2.1024577617645264
start processing point cloud features


0it [00:00, ?it/s]

Processing chunk of shape: (384, 3072)
Chunk Elapsed Time: 101.51838612556458
Shape of the final features is (384, 30)


## Classification

In [3]:
from comet_ml import Experiment
import torch
torch.cuda.is_available()


  warn("The `IPython.html` package has been deprecated since IPython 4.0. "
  from .autonotebook import tqdm as notebook_tqdm


True

In [4]:
path = os.getcwd()
feat_detector1 = np.load(f'{os.path.join(path, "detector1")}_topofeatures.npy')
feat_detector2 = np.load(f'{os.path.join(path, "detector2")}_topofeatures.npy')

In [5]:
feat= np.column_stack([feat_detector1, feat_detector2])

In [6]:
labels = np.squeeze(labels)

In [7]:
feat.shape, labels.shape

((384, 60), (384,))

In [8]:
from train_utils import dataset_split

In [9]:
train_dataset, val_dataset, test_dataset = dataset_split(feat, labels, train_ratio = 0.6, val_ratio = 0.2, test_ratio = 0.2)


In [10]:
file_dict = {'train':train_dataset,
             'val':val_dataset,
             'test':test_dataset,
             'predict':test_dataset}

In [70]:
from model import TabularDataModule, Classifier

In [71]:
tabular_dm = TabularDataModule(file_dict)


In [72]:
model = Classifier("tabular","MLP", 1e-4, [60, 1, [200,100,50,50,50,20,10,5]])


In [73]:
import pytorch_lightning as pl
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.loggers import CometLogger

In [74]:
comet_logger = CometLogger(
  api_key="CkkrVkSk6Vr2WKlbXIzlkhNlE",
  project_name="topogw",
  workspace="sangeonpark"
)

CometLogger will be initialized in online mode


In [85]:
early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=0.00, patience=50, verbose=False)


In [86]:
checkpoint_callback = ModelCheckpoint(
    monitor="val_loss",
    dirpath=os.getcwd(),
    filename="Test-{epoch:02d}-{val_loss:.2f}",
    save_top_k=3,
    mode="min",
)

In [87]:
from pytorch_lightning.callbacks import Callback, TQDMProgressBar

class PrintCallbacks(Callback):
    def on_init_start(self, trainer):
        print("Starting to init trainer!")

    def on_init_end(self, trainer):
        print("Trainer is init now")

    def on_train_end(self, trainer, pl_module):
        print("Training ended")

In [88]:
import sys
class MyProgressBar(TQDMProgressBar):
    def init_validation_tqdm(self):
        bar = super().init_validation_tqdm()
        if not sys.stdout.isatty():
            bar.disable = True
        return bar

    def init_predict_tqdm(self):
        bar = super().init_predict_tqdm()
        if not sys.stdout.isatty():
            bar.disable = True
        return bar

    def init_test_tqdm(self):
        bar = super().init_test_tqdm()
        if not sys.stdout.isatty():
            bar.disable = True
        return bar

In [89]:
#import os 
#import pprint 
  
# Get the list of user's 
#env_var = os.environ 
  
# Print the list of user's 
#print("User's Environment variable:") 
#pprint.pprint(dict(env_var), width = 1) 

# ONLY IF YOU ARE IN SLURM ENVIRONMENT
#os.environ['SLURM_NTASKS_PER_NODE'] = '4'

In [90]:
trainer = Trainer(callbacks=[PrintCallbacks(),MyProgressBar(),early_stop_callback,checkpoint_callback],logger=comet_logger)


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [91]:
from pytorch_lightning.tuner import Tuner


In [92]:
tuner = Tuner(trainer)


In [93]:
tuner.lr_find(model, datamodule=tabular_dm)


/nobackup/users/sangeon/condas/anaconda3/envs/studies/lib/python3.8/site-packages/pytorch_lightning/loops/utilities.py:73: `max_epochs` was not set. Setting it to 1000 epochs. To train without an epoch limit, set `max_epochs=-1`.
COMET INFO: Experiment is live on comet.ml https://www.comet.com/sangeonpark/topogw/975a1e44ad934a96a99d702c5c60c557

/nobackup/users/sangeon/condas/anaconda3/envs/studies/lib/python3.8/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:639: Checkpoint directory /home/sangeon/TopologicalAnalysisGravitationalWave exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
/nobackup/users/sangeon/condas/anaconda3/envs/studies/lib/python3.8/site-packages/pytorch_lightning/loops/fit_loop.py:293: The number of training batches (1) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


[A[Ag best initial lr:   0%|          | 0/100 [00:00<?, ?it/

<pytorch_lightning.tuner.lr_finder._LRFinder at 0x2000ec6a88e0>

In [94]:
trainer.fit(model, datamodule=tabular_dm)

COMET INFO: Experiment is live on comet.ml https://www.comet.com/sangeonpark/topogw/975a1e44ad934a96a99d702c5c60c557

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name       | Type    | Params
---------------------------------------
0 | activation | Sigmoid | 0     
1 | loss       | BCELoss | 0     
2 | layers     | MLP     | 44.7 K
---------------------------------------
44.7 K    Trainable params
0         Non-trainable params
44.7 K    Total params
0.179     Total estimated model params size (MB)
SLURM auto-requeueing enabled. Setting signal handlers.


Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 28.79it/s, v_num=c557, train_loss=0.581]
Epoch 1: 100%|██████████| 1/1 [00:00<00:00,  7.21it/s, v_num=c557, train_loss=0.590, val_loss_step=0.658, val_loss_epoch=0.658]
Epoch 2: 100%|██████████| 1/1 [00:00<00:00,  7.18it/s, v_num=c557, train_loss=0.585, val_loss_step=0.658, val_loss_epoch=0.658]
Epoch 3: 100%|██████████| 1/1 [00:00<00:00,  7.48it/s, v_num=c557, train_loss=0.607, val_loss_step=0.659, val_loss_epoch=0.659]
Epoch 4: 100%|██████████| 1/1 [00:00<00:00,  7.52it/s, v_num=c557, train_loss=0.588, val_loss_step=0.659, val_loss_epoch=0.659]
Epoch 5: 100%|██████████| 1/1 [00:00<00:00,  7.44it/s, v_num=c557, train_loss=0.594, val_loss_step=0.659, val_loss_epoch=0.659]
Epoch 6: 100%|██████████| 1/1 [00:00<00:00,  7.12it/s, v_num=c557, train_loss=0.610, val_loss_step=0.658, val_loss_epoch=0.658]
Epoch 7: 100%|██████████| 1/1 [00:00<00:00,  7.33it/s, v_num=c557, train_loss=0.584, val_loss_step=0.658, val_loss_epoch=0.658]
Epoch 8: 100%|█████

COMET INFO: -----------------------------------
COMET INFO: Comet.ml ExistingExperiment Summary





COMET INFO: -----------------------------------
COMET INFO:   Data:
COMET INFO:     display_summary_level : 1
COMET INFO:     url                   : https://www.comet.com/sangeonpark/topogw/975a1e44ad934a96a99d702c5c60c557
COMET INFO:   Metrics [count] (min, max):
COMET INFO:     loss [7]            : (0.5811454057693481, 0.5984839200973511)
COMET INFO:     train_loss          : 0.5873777866363525
COMET INFO:     val_loss_epoch [70] : (0.6580713987350464, 1.0218634605407715)
COMET INFO:     val_loss_step [70]  : (0.6580713987350464, 1.0218634605407715)
COMET INFO:   Others:
COMET INFO:     Created from : pytorch-lightning
COMET INFO:   Parameters:
COMET INFO:     backbone_type : MLP
COMET INFO:     data_type     : tabular
COMET INFO:     learning_rate : 0.0001
COMET INFO:     modelparams   : [60, 1, [200, 100, 50, 50, 50, 20, 10, 5]]
COMET INFO:   Uploads:
COMET INFO:     model graph : 1
COMET INFO: -----------------------------------
COMET INFO: Uploading 1 metrics, params and output

In [95]:
predicted_list = trainer.predict(model, tabular_dm, ckpt_path='best')

COMET INFO: Experiment is live on comet.ml https://www.comet.com/sangeonpark/topogw/975a1e44ad934a96a99d702c5c60c557

Restoring states from the checkpoint path at /home/sangeon/TopologicalAnalysisGravitationalWave/Test-epoch=19-val_loss=0.66.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/sangeon/TopologicalAnalysisGravitationalWave/Test-epoch=19-val_loss=0.66.ckpt
SLURM auto-requeueing enabled. Setting signal handlers.
/nobackup/users/sangeon/condas/anaconda3/envs/studies/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=3` in the `DataLoader` to improve performance.


Predicting: |          | 0/? [00:00<?, ?it/s]

COMET INFO: -----------------------------------
COMET INFO: Comet.ml ExistingExperiment Summary
COMET INFO: -----------------------------------
COMET INFO:   Data:
COMET INFO:     display_summary_level : 1
COMET INFO:     url                   : https://www.comet.com/sangeonpark/topogw/975a1e44ad934a96a99d702c5c60c557
COMET INFO:   Others:
COMET INFO:     Created from : pytorch-lightning
COMET INFO:   Parameters:
COMET INFO:     backbone_type : MLP
COMET INFO:     data_type     : tabular
COMET INFO:     learning_rate : 0.0001
COMET INFO:     modelparams   : [60, 1, [200, 100, 50, 50, 50, 20, 10, 5]]
COMET INFO:   Uploads:
COMET INFO:     model graph : 1
COMET INFO: -----------------------------------
COMET INFO: Uploading 21 metrics, params and output messages


In [96]:
preds = predicted_list[0][0]
label = predicted_list[0][1]

In [103]:
rounded_preds = torch.round(torch.sigmoid(preds)).squeeze()
correct = (rounded_preds == label).float() 
    # Calculate accuracy
accuracy = correct.sum() / len(rounded_preds) 

In [106]:
rounded_preds == label

tensor([False, False,  True,  True, False,  True,  True,  True,  True,  True,
         True,  True,  True, False,  True, False,  True,  True, False, False,
        False, False,  True, False,  True,  True,  True,  True, False,  True,
         True,  True,  True, False, False, False,  True,  True,  True,  True,
         True, False, False,  True,  True, False,  True,  True,  True,  True,
        False,  True,  True,  True, False, False,  True, False, False,  True,
        False,  True,  True,  True,  True,  True,  True, False,  True,  True,
         True,  True, False,  True,  True, False,  True,  True])

In [104]:
accuracy

tensor(0.6667)

In [99]:
label

tensor([1., 1., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 1., 0., 0.,
        1., 1., 1., 1., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 1., 1.,
        0., 0., 0., 0., 0., 1., 1., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0.,
        1., 1., 0., 1., 1., 0., 1., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
        1., 0., 0., 1., 0., 0.])