In [1]:
import os

os.chdir("../")

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import random_split, DataLoader
import timm
import numpy as np

import argparse
import torch_optimizer as optimizer
import wandb
from pathlib import Path
from config import settings

import models.spinalnet_resnet as spinalnet_resnet
import models.effnet as effnet
import models.densenet as densenet
import models.spinalnet_vgg as spinalnet_vgg
import models.vitL16 as vitL16
import models.alexnet_vgg as alexnet_vgg
import models.resnet18 as resnet18

import  data
# import data.segmentation as segmentation
# import metrics.metrics as metrics
from data import DataPart
from train import Trainer
import metrics


all_models = [
    ('ResNet18', resnet18),
    ('EfficientNet', effnet),
    # ('DenseNet', densenet),
    # ('SpinalNet_ResNet', spinalnet_resnet),
    # ('SpinalNet_VGG', spinalnet_vgg),
    # ('ViTL16', vitL16),
    # ('AlexNet_VGG', alexnet_vgg)
]

all_optimizers = [
    ('SGD', optim.SGD),
    ('Rprop', optim.Rprop),
    ('Adam', optim.Adam),
    ('NAdam', optim.NAdam),
    ('RAdam', optim.RAdam),
    ('AdamW', optim.AdamW),
    #('Adagrad', optim.Adagrad),
    ('RMSprop', optim.RMSprop),
    #('Adadelta', optim.Adadelta),
    ('DiffGrad', optimizer.DiffGrad),
    # ('LBFGS', optim.LBFGS)
]

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
datasets, dataloaders = data.create_dataloaders()

train_loader = dataloaders[DataPart.TRAIN]
val_loader = dataloaders[DataPart.VALIDATE]
test_loader = dataloaders[DataPart.TEST_DR5]


  return bound(*args, **kwds)


INFO: Query finished. [astroquery.utils.tap.core]
252
84
84
244
85


In [3]:

# parser = argparse.ArgumentParser(description='Model training')
# parser.add_argument('--models', nargs='+', default=['ResNet18', 'EfficientNet', 'DenseNet', 'SpinalNet_ResNet', 'SpinalNet_VGG', 'ViTL16', 'AlexNet_VGG'],
#                     help='List of models to train (default: all)')
# parser.add_argument('--epochs', type=int, default=5, help='Number of epochs to train (default: 5)')
# parser.add_argument('--lr', type=float, default=0.0001, help='Learning rate for optimizer (default: 0.0001)')
# parser.add_argument('--mm', type=float, default=0.9, help='Momentum for optimizer (default: 0.9)')
# parser.add_argument('--optimizer', choices=[name for name, _ in all_optimizers], default='Adam', help='Optimizer to use (default: Adam)')

# args = parser.parse_args()

# selected_models = [(model_name, model) for model_name, model in models if model_name in args.models]

# num_epochs = args.epochs
# lr = args.lr
# momentum = args.mm
# optimizer_name = args.optimizer


In [4]:
selected_models = all_models[:2]

num_epochs = 1
lr = 0.0001
momentum = 0.9
optimizer_name = "Adam"




In [5]:
if settings.wandb_api_token:
    wandb.login(key=settings.wandb_api_token)
    wandb.init(project='cluster-search', config={}, reinit=True)
else:
    wandb.init(project='cluster-search', config={}, reinit=True)


wandb.config.models = [name for name, _ in selected_models]
wandb.config.num_epochs = num_epochs
wandb.config.lr = lr
wandb.config.momentum = momentum
wandb.config.optimizer = optimizer_name

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mzehov1[0m ([33mmzekhov[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /Users/mszekhov/.netrc


In [6]:

# criterion = nn.CrossEntropyLoss()
criterion = nn.BCELoss()

results = {}
val_results = {}

classes = ('random', 'clusters')


In [7]:
for model_name, model in selected_models:

     model = model.load_model()

     optimizer_class = dict(all_optimizers)[optimizer_name]

     if optimizer_name in ['SGD', 'RMSprop']:
          optimizer = optimizer_class(model.parameters(), lr=lr, momentum=momentum) 
     else:
          optimizer = optimizer_class(model.parameters(), lr=lr)
         
     trainer = Trainer(
          model=model,
          criterion=criterion,
          optimizer=optimizer,
          train_dataloader=train_loader,
          val_dataloader=val_loader,

     )

     trainer.train(num_epochs)

     for step in range(trainer.global_step):
          wandb.log(
               {
                    f'{model_name}_{optimizer_name}_train_loss': trainer.history['train_loss'][step], 
                    f'{model_name}_{optimizer_name}_train_accuracy':trainer.history['train_acc'][step], 
                    'global_step': step + 1})
          
     for epoch in range(num_epochs):
          wandb.log(
               {
                    f'{model_name}_{optimizer_name}_val_loss': trainer.history['val_loss'][epoch], 
                    f'{model_name}_{optimizer_name}_val_accuracy': trainer.history['val_acc'][epoch], 
                    'epoch': epoch})

     
     train_table = wandb.Table(
          data=[
               [
                    step, 
                    trainer.history['train_loss'][step], 
                    trainer.history['train_acc'][step]
               ] for step in range(trainer.global_step)],
          columns=["Epoch", "Loss", "Accuracy"])

     val_table = wandb.Table(
          data=[
               [
                    epoch, 
                    trainer.history['val_loss'][epoch], 
                    trainer.history['val_acc'][epoch]
               ] for epoch in range(num_epochs)],
          columns=["Epoch", "Loss", "Accuracy"])

     wandb.log({"Train Metrics": train_table, "Validation Metrics": val_table})

     predictions, *_ = trainer.test(test_loader)

     metrics.modelPerformance(model_name, optimizer_name, predictions, classes)


metrics.combine_metrics(selected_models, optimizer_name)

100%|██████████| 4/4 [00:37<00:00,  9.30s/batch]      | 0/1 [00:00<?, ?epoch/s]
100%|██████████| 4/4 [00:14<00:00,  3.51s/it]
100%|██████████| 4/4 [00:14<00:00,  3.65s/it]                                          
  recall_per_bin_type = red_shift_predictions.groupby(['bucket', 'red_shift_type']).apply(lambda x: recall_score(x['y_true'], x['y_pred'])).unstack()


RuntimeError: No active exception to reraise

In [None]:
wandb.finish()

wandb_run = wandb.run
if wandb_run:
    logged_metrics = wandb_run.history()
    print("Logged Metrics:")
    for key, value in logged_metrics.items():
        print(key, ":", value)
else:
    print("No wandb run found.")

VBox(children=(Label(value='0.004 MB of 0.009 MB uploaded\r'), FloatProgress(value=0.4286352967475131, max=1.0…

0,1
ResNet18_Adam_train_accuracy,▆▆▅▆▆▆▇▇▇█▇▇▇▇█▁
ResNet18_Adam_train_loss,█▇█▇▇▇▅▅▅▄▆▅▃▂▁▇
ResNet18_Adam_val_accuracy,▁
ResNet18_Adam_val_loss,▁
epoch,▁
global_step,▁▁▂▂▃▃▄▄▅▅▆▆▇▇██

0,1
ResNet18_Adam_train_accuracy,0.0
ResNet18_Adam_train_loss,0.6936
ResNet18_Adam_val_accuracy,0.72852
ResNet18_Adam_val_loss,0.62251
epoch,0.0
global_step,16.0


No wandb run found.


In [5]:
import segmentation
import train
from importlib import reload 
reload(data)
reload(train)
reload(segmentation)

model_name, model = selected_models[1]
segmentation.create_segmentation_plots(
    model,
    model_name,
    optimizer_name=optimizer_name
)

  loaded_model = torch.load(weights_path, map_location=device)
100%|██████████| 1/1 [00:01<00:00,  1.98s/it]


23
23
23
23
23
23
23
23
23
23


100%|██████████| 7/7 [02:16<00:00, 19.50s/it]
100%|██████████| 7/7 [02:41<00:00, 23.11s/it]
100%|██████████| 7/7 [17:55<00:00, 153.67s/it]
100%|██████████| 7/7 [02:37<00:00, 22.51s/it]
100%|██████████| 7/7 [02:32<00:00, 21.83s/it]
100%|██████████| 7/7 [19:21<00:00, 165.93s/it]
100%|██████████| 7/7 [18:52<00:00, 161.76s/it]   
100%|██████████| 7/7 [02:02<00:00, 17.53s/it]
100%|██████████| 7/7 [01:41<00:00, 14.52s/it]
100%|██████████| 7/7 [01:50<00:00, 15.80s/it]
100%|██████████| 1/1 [00:00<00:00,  2.59it/s]


23
23
23
23
23


100%|██████████| 7/7 [01:55<00:00, 16.52s/it]
100%|██████████| 7/7 [01:40<00:00, 14.33s/it]
100%|██████████| 7/7 [01:34<00:00, 13.51s/it]
100%|██████████| 7/7 [01:43<00:00, 14.78s/it]
100%|██████████| 7/7 [02:04<00:00, 17.77s/it]
100%|██████████| 1/1 [00:00<00:00,  2.18it/s]


23
23
23
23
23


100%|██████████| 7/7 [02:16<00:00, 19.45s/it]
100%|██████████| 7/7 [02:06<00:00, 18.01s/it]
100%|██████████| 7/7 [02:57<00:00, 25.37s/it]
100%|██████████| 7/7 [02:01<00:00, 17.35s/it]
100%|██████████| 7/7 [01:39<00:00, 14.18s/it]
100%|██████████| 1/1 [00:00<00:00,  4.21it/s]


49


100%|██████████| 16/16 [03:55<00:00, 14.72s/it]
100%|██████████| 1/1 [00:00<00:00,  7.54it/s]


49


100%|██████████| 16/16 [04:38<00:00, 17.41s/it]


In [21]:
model = model.load_model()

In [23]:
model.eval()

EfficientNet(
  (conv_stem): Conv2d(2, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
  (bn1): BatchNormAct2d(
    32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
    (drop): Identity()
    (act): SiLU(inplace=True)
  )
  (blocks): Sequential(
    (0): Sequential(
      (0): DepthwiseSeparableConv(
        (conv_dw): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
        (bn1): BatchNormAct2d(
          32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): SiLU(inplace=True)
        )
        (aa): Identity()
        (se): SqueezeExcite(
          (conv_reduce): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
          (act1): SiLU(inplace=True)
          (conv_expand): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
          (gate): Sigmoid()
        )
        (conv_pw): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn2