# Imports

In [1]:
from __future__ import annotations

%matplotlib inline
import git
import copy
import glob
import os
import random
import warnings
from argparse import ArgumentParser
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pytorch_lightning as pl
import seaborn as sns
import torch
import torch.nn as nn
import torch.optim as optim
from PIL import Image
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader
from torchvision import models
from tqdm import tqdm_notebook as tqdm

warnings.filterwarnings('ignore')
from collections import Counter
from copy import copy
from pathlib import Path
from contextlib import redirect_stdout
import io

import matplotlib.pyplot as plt
import neptune.new as neptune
import numpy as np
import pytorch_lightning as pl
# torch and lightning imports
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
from neptune.new.types import File
from PIL import Image, ImageEnhance
from pytorch_lightning.callbacks.finetuning import BaseFinetuning
from pytorch_lightning.loggers import NeptuneLogger
from pytorch_lightning.utilities.rank_zero import rank_zero_info
from torch.optim.lr_scheduler import MultiStepLR
from torch.optim.optimizer import Optimizer
from torch.utils.data import DataLoader, WeightedRandomSampler
from torchmetrics import Accuracy, ConfusionMatrix
from torchvision import transforms as T
from torchvision.datasets import ImageFolder
from torchvision.utils import _log_api_usage_once


# Settings

In [2]:

# Ensure that all operations are deterministic for reproducibility
seed = 42
pl.seed_everything(seed)
torch.backends.cudnn.determinstic = True
torch.backends.cudnn.benchmark = False

device = (
    torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
)

Global seed set to 42


# Class and method definitions

### General-purpose

In [3]:
def pad(spectrogram: np.ndarray, pad_length: int) -> np.ndarray:
    """
    Centre pads an RGB spectrogram to a given length.

    Args:
        spectrogram (np.ndarray): Spectrogram to pad.
        pad_length (int): Full length of padded spectrogram

    Returns:
        np.ndarray: Padded spectrogram
    """
    spec_shape = np.shape(spectrogram)
    excess_needed = pad_length - spec_shape[1]
    pad_left = int(np.floor(float(excess_needed) / 2))
    pad_right = int(np.ceil(float(excess_needed) / 2))
    padded_spec = np.full((spec_shape[0], pad_length, 3), np.min(spectrogram))
    padded_spec[:, pad_left : pad_length - pad_right, :] = spectrogram
    return padded_spec


class AttrDict(dict):
    def __init__(self, *args, **kwargs):
        super(AttrDict, self).__init__(*args, **kwargs)
        self.__dict__ = self

### Data augmentation classes

### Custom transformations

In [4]:
class TimeCrop(torch.nn.Module):
    """
    Crops the given image at a random point in the time domain
    greater than its height and smaller than its maximum
    length minus its height.

    Note:
        Does not work with tensors.
    """

    def __init__(self):
        super().__init__()
        _log_api_usage_once(self)

    def forward(self, img):
        """
        Args:
            img (PIL Image): Image to be cropped.

        Returns:
            PIL Image: Cropped image.
        """
        img = np.asarray(img)
        H, W = img.shape[:2]
        if W < H:
            # Pads adding some extra width
            # so that the img is not always
            # in the same position
            img = pad(img, int(H + H * 0.25))

        H, W = img.shape[:2]
        r_idx = random.randint(0, W - H)
        img = img[:, r_idx : r_idx + H]
        return Image.fromarray(img)

    def __repr__(self) -> str:
        return f"{self.__class__.__name__}()"


class ChangeBrightness(torch.nn.Module):
    """
    Changes the brightness by a factor drawn from a uniform
    distribution between provided numbers.

    Args:
        factor (tuple): A tuple containing a range of
            brightness (e.g. 0.5 means 50% brightness).
        p (float): Probability with which the
            transformation will be applied.

    Warning:
        Does not work with tensors.

    """

    def __init__(self, factor: tuple = (0.9, 1.6), p: float = 0.5):
        super().__init__()
        _log_api_usage_once(self)
        self.factor = factor
        self.p = p

    def forward(self, img):
        """
        Args:
            img (PIL Image): Image to be modified.

        Returns:
            PIL Image: Modified image.
        """
        if self.p < torch.rand(1):
            return img
        f = random.uniform(self.factor[0], self.factor[1])
        enhancer = ImageEnhance.Brightness(img)
        img = enhancer.enhance(f)
        return img

    def __repr__(self) -> str:
        return f"{self.__class__.__name__}(factor={self.factor}, p={self.p})"


### Train/val/test transformations

In [5]:
class ImgTransform:
    def __init__(
        self,
        img_size=224,
        mean=(0.485, 0.456, 0.406),
        std=(0.229, 0.224, 0.225),
    ):
        self.stage = {
            "train": T.Compose(
                [
                    TimeCrop(),
                    T.RandomRotation(degrees=(-2, 2)),
                    T.RandomAdjustSharpness(sharpness_factor=6, p=0.2),
                    T.GaussianBlur(kernel_size=(3, 3), sigma=(0.005, 4)),
                    ChangeBrightness(factor=(0.8, 1.6), p=0.5),
                    T.ToTensor(),
                    # T.Normalize(mean, std),
                    T.RandomErasing(
                        p=0.2, scale=(0.02, 0.05), ratio=(0.3, 3.3)
                    ),
                ]
            ),
            "validate": T.Compose(
                [
                    TimeCrop(),
                    T.ToTensor(),
                    # T.Normalize(mean, std),
                ]
            ),
            "test": T.Compose(
                [
                    TimeCrop(),
                    T.ToTensor(),
                    # T.Normalize(mean, std),
                ]
            ),
        }


### Define data module

In [6]:
class GreatTitDataModule(pl.LightningDataModule):
    def __init__(self, train_path, test_path, batch_size=16, seed=42):
        super().__init__()
        self.batch_size = batch_size
        self.train_path = train_path
        self.test_path = test_path
        self.seed = seed

    def setup(self, stage=None):

        # Load and split training set
        d = ImageFolder(self.train_path)

        # Prepare weighted sampler for training data (oversample)
        class_count = Counter(d.targets)
        class_weights = torch.Tensor(
            [
                len(d.targets) / c
                for c in pd.Series(class_count).sort_index().values
            ]
        )

        sample_weights = [0] * len(d)
        for idx, (image, label) in enumerate(d):
            class_weight = class_weights[label]
            sample_weights[idx] = class_weight

        self.train_sampler = WeightedRandomSampler(
            weights=sample_weights, num_samples=len(d), replacement=True
        )

        # Stratified split for validation
        train_idx, valid_idx = train_test_split(
            np.arange(len(d.targets)),
            test_size=0.2,
            shuffle=True,
            random_state=self.seed,
            stratify=d.targets,
        )

        # Prepare train/validation/test datasets
        self.train, self.validate = copy(d), copy(d)
        self.train.imgs = np.array(d.imgs)[train_idx].tolist()
        self.train.targets = np.array(d.targets)[train_idx].tolist()
        self.validate.imgs = np.array(d.imgs)[valid_idx].tolist()
        self.validate.targets = np.array(d.targets)[valid_idx].tolist()
        self.test = ImageFolder(self.test_path)

        # Transforms
        self.train.transform = ImgTransform().stage["train"]
        self.validate.transform = ImgTransform().stage["validate"]
        self.test.transform = ImgTransform().stage["test"]

    def train_dataloader(self):
        return DataLoader(
            self.train,
            batch_size=self.batch_size,
            num_workers=8,
            pin_memory=True,
            sampler=self.train_sampler,
        )

    def val_dataloader(self):
        return DataLoader(
            self.validate,
            batch_size=100,
            shuffle=True,
            num_workers=8,
            pin_memory=True,
        )

    def test_dataloader(self):
        return DataLoader(
            self.test,
            batch_size=100,
            shuffle=True,
            num_workers=8,
            pin_memory=True,
        )

### Fine tuning module

In [7]:
# See [1] for more details on the training regime:

class MilestonesFinetuning(BaseFinetuning):
    def __init__(self, milestones: tuple = (5, 10), train_bn: bool = False):
        super().__init__()
        self.milestones = milestones
        self.train_bn = train_bn

    def freeze_before_training(self, pl_module: pl.LightningModule):
        self.freeze(modules=pl_module.feature_extractor, train_bn=self.train_bn)

    def finetune_function(
        self,
        pl_module: pl.LightningModule,
        epoch: int,
        optimizer: Optimizer,
        opt_idx: int,
    ):
        if epoch == self.milestones[0]:
            # unfreeze 5 last layers
            self.unfreeze_and_add_param_group(
                modules=pl_module.feature_extractor[-5:],
                optimizer=optimizer,
                train_bn=self.train_bn,
            )

        elif epoch == self.milestones[1]:
            # unfreeze remaining layers
            self.unfreeze_and_add_param_group(
                modules=pl_module.feature_extractor[:-5],
                optimizer=optimizer,
                train_bn=self.train_bn,
            )

### Main model module

In [8]:
logg_params = {
    "on_step": True,
    "on_epoch": True,
    "prog_bar": True,
    "logger": True,
}

class ResNetClassifier(pl.LightningModule):
    def __init__(
        self,
        num_classes: int | None = None,
        backbone: str = "resnet50",
        train_bn: bool = False,
        batch_size: int = 16,
        transfer=True,
        milestones: tuple = (2, 4),
        lr: float = 1e-3,
        lr_scheduler_gamma: float = 1e-1,
    ):
        super().__init__()

        self.__dict__.update(locals())
        self.num_classes = num_classes
        self.backbone = backbone
        self.transfer = transfer = (True,)
        self.lr = lr
        self.milestones = milestones
        self.lr_scheduler_gamma = lr_scheduler_gamma
        self.batch_size = batch_size

        self.__build_model()

        self.train_acc = Accuracy()
        self.valid_acc = Accuracy()
        self.test_acc = Accuracy()
        self.save_hyperparameters()

        self.val_confusion = ConfusionMatrix(num_classes=self.num_classes)
        self.test_confusion = ConfusionMatrix(num_classes=self.num_classes)

    def __build_model(self):
        """Define model layers & loss."""

        # 1. Load pre-trained network:
        model_func = getattr(models, self.backbone)
        self.backbone = model_func(pretrained=self.transfer)

        _layers = list(self.backbone.children())[:-1]
        self.feature_extractor = nn.Sequential(*_layers)
        linear_size = list(self.backbone.children())[-1].in_features

        # 2. Classifier:
        # _fc_layers = [nn.Linear(2048, 1000), nn.ReLU(), nn.Linear(1000, self.num_classes)]
        # self.backbone.fc = nn.Sequential(*_fc_layers)
        self.backbone.fc = nn.Linear(linear_size, self.num_classes)

        # 3. Loss:
        self.loss_func = (
            nn.BCEWithLogitsLoss()
            if self.num_classes == 2
            else nn.CrossEntropyLoss()
        )

    def forward(self, x):
        return self.backbone(x)

    def configure_optimizers(self):
        parameters = list(self.parameters())
        trainab_params = list(filter(lambda p: p.requires_grad, parameters))
        rank_zero_info(
            f"The model will start training with only {len(trainab_params)} "
            f"trainable parameters out of {len(parameters)}."
        )
        optimizer = optim.Adam(trainab_params, lr=self.lr)
        scheduler = MultiStepLR(
            optimizer, milestones=self.milestones, gamma=self.lr_scheduler_gamma
        )
        return [optimizer], [scheduler]

    def training_step(self, batch, batch_idx):
        x, y = batch
        preds = self(x)

        loss = self.loss_func(preds, y)
        self.train_acc(preds, y)
        self.log("train/loss", loss, **logg_params)
        self.log("train/acc", self.train_acc, **logg_params)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        preds = self(x)

        loss = self.loss_func(preds, y)
        self.train_acc(preds, y)
        self.log("val/loss", loss, **logg_params)
        self.log("val/acc", self.train_acc, **logg_params)
        self.val_confusion.update(preds, batch[1])

    def test_step(self, batch, batch_idx):
        x, y = batch
        preds = self(x)

        loss = self.loss_func(preds, y)
        self.test_acc(preds, y)
        self.log("test/loss", loss, **logg_params)
        self.log("test/acc", self.test_acc, **logg_params)
        self.test_confusion.update(preds, batch[1])

        # Save wrong prediction images
        y_true = y.cpu().detach().numpy()
        y_pred = preds.argmax(axis=1).cpu().detach().numpy()

        for j in np.where(np.not_equal(y_true, y_pred))[0]:
            img = np.squeeze(x[j].cpu().detach().numpy())
            img[img < 0] = 0
            img = img / np.amax(img)
            labs = list(
                self.trainer.datamodule.val_dataloader().dataset.class_to_idx.keys()
            )
            neptune_logger.experiment["test/misclassified_images"].log(
                neptune.types.File.as_image(img.transpose((1, 2, 0))),
                description=f"y_pred = {labs[y_pred[j]]}, y_true = {labs[y_true[j]]}",
            )

    # Output graphs and extra metrics
    def plot_conf_matrix(self, conf_mat):
        labs = (
            self.trainer.datamodule.val_dataloader().dataset.class_to_idx.keys()
        )
        df_cm = pd.DataFrame(conf_mat, index=labs, columns=labs)
        plt.figure(figsize=(13, 10))
        fig_ = sns.heatmap(
            df_cm, annot=True, cmap="magma", fmt="g"
        ).get_figure()
        plt.close(fig_)
        return fig_

    def validation_epoch_end(self, outputs):
        conf_mat = (
            self.val_confusion.compute().detach().cpu().numpy().astype(np.int)
        )
        fig_ = self.plot_conf_matrix(conf_mat)
        self.logger.experiment["train/confusion_matrix"].log(
            File.as_image(fig_)
        )

    def test_epoch_end(self, outputs):
        conf_mat = (
            self.test_confusion.compute().detach().cpu().numpy().astype(np.int)
        )
        fig_ = self.plot_conf_matrix(conf_mat)
        self.logger.experiment["test/confusion_matrix"].log(File.as_image(fig_))


# Data ingest

In [9]:

# Project settings/directories
DATASET =  "pykanto-example"

PROJECT_ROOT = Path(
    git.Repo(".", search_parent_directories=True).working_tree_dir
)

data_path = PROJECT_ROOT / "data" / "datasets" / DATASET / "ML"
train_path, test_path = data_path / "train", data_path / "test"
n_classes = sum([1 for i in test_path.glob("**/")]) - 1


hparams = AttrDict(
    {
        "batch_size": 64,
        "num_classes": n_classes,
        "lr": 0.001,
        "lr_scheduler_gamma": 0.1,
        "milestones": (10, 15),
        "transfer": True,
        "train_bn": False,
    }
)

dm = GreatTitDataModule(
    train_path=train_path,
    test_path=test_path,
    batch_size=hparams.batch_size,
)

dm.setup()


# Training

### Start logger and checkpoints

In [52]:
# To run this you will need neptune if you want to monitor model training,
# see [2] for more details.

# IMPORTANT: Load existing model? [None/model ID]
use_existing = "GRET-72"


def _init_neptune():
    run = neptune.init(
        project="nilomr/greti",
        flush_period=5,
        with_id=use_existing,
        mode="sync",  # async won't work in Oxford HPC
    )
    return run

def init_neptune_logger():
    try:
        return _init_neptune()
    except:
        try:
            return _init_neptune()
        except Exception as e:
            raise e

neptune_logger = NeptuneLogger(run=init_neptune_logger())

CHECKPOINTS_DIR = Path("checkpoints") / neptune_logger._run_short_id
model_checkpoint = pl.callbacks.ModelCheckpoint(
    dirpath=CHECKPOINTS_DIR,
    monitor="val/acc_epoch",
    mode="max",
    save_top_k=2,
    save_weights_only=True,
)

https://app.neptune.ai/nilomr/greti/e/GRET-72
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#.stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.


## Instantiate model and trainer

In [20]:
model = ResNetClassifier(**hparams)

trainer = pl.Trainer(
    max_epochs=30,
    logger=neptune_logger,
    callbacks=[
        model_checkpoint,
        EarlyStopping(monitor="val/loss_epoch", mode="min", patience=4),
        MilestonesFinetuning(milestones=(5, 10), train_bn=hparams.train_bn),
    ],
    log_every_n_steps=1,
    accelerator="gpu",
    devices=1,
    precision=16, # If supported use 16-bit precision
    num_sanity_val_steps=0,  #FIXME #BUG @nilomr Validation gets stuck with full dataset
)


Using 16bit native Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


## Train model

Skip this if you have already trained a model

In [30]:
trainer.fit(model, dm)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
The model will start training with only 2 trainable parameters out of 161.

  | Name              | Type             | Params
-------------------------------------------------------
0 | backbone          | ResNet           | 23.7 M
1 | feature_extractor | Sequential       | 23.5 M
2 | loss_func         | CrossEntropyLoss | 0     
3 | train_acc         | Accuracy         | 0     
4 | valid_acc         | Accuracy         | 0     
5 | test_acc          | Accuracy         | 0     
6 | val_confusion     | ConfusionMatrix  | 0     
7 | test_confusion    | ConfusionMatrix  | 0     
-------------------------------------------------------
174 K     Trainable params
23.5 M    Non-trainable params
23.7 M    Total params
47.364    Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Exception in thread NeptuneReporting:
Traceback (most recent call last):
  File "/data/zool-songbird/shil5293/envs/pykanto-example/lib/python3.8/threading.py", line 932, in _bootstrap_inner
    self.run()
  File "/data/zool-songbird/shil5293/envs/pykanto-example/lib/python3.8/site-packages/neptune/new/internal/threading/daemon.py", line 51, in run
    self.work()
  File "/data/zool-songbird/shil5293/envs/pykanto-example/lib/python3.8/site-packages/neptune/new/internal/hardware/hardware_metric_reporting_job.py", line 125, in work
    attr.log(value=metric_value.value, timestamp=metric_value.timestamp)
  File "/data/zool-songbird/shil5293/envs/pykanto-example/lib/python3.8/site-packages/neptune/new/handler.py", line 66, in inner_fun
    return fun(self, *args, **kwargs)
  File "/data/zool-songbird/shil5293/envs/pykanto-example/lib/python3.8/site-packages/neptune/new/handler.py", line 276, in log
    attr.log(value, step=step, timestamp=timestamp, wait=wait, **kwargs)
  File "/data/zool-s

Validation: 0it [00:00, ?it/s]

    self._container._op_processor.enqueue_operation(operation, wait)
  File "/data/zool-songbird/shil5293/envs/pykanto-example/lib/python3.8/site-packages/neptune/new/internal/operation_processors/sync_operation_processor.py", line 36, in enqueue_operation
    raise errors[0]
neptune.new.exceptions.MetadataInconsistency: Timestamp must be non-decreasing for series attribute: monitoring/memory. Invalid point: 2022-10-19T15:37:19.028Z


Validation: 0it [00:00, ?it/s]

Exception in thread NeptuneReporting:
Traceback (most recent call last):
  File "/data/zool-songbird/shil5293/envs/pykanto-example/lib/python3.8/threading.py", line 932, in _bootstrap_inner
    self.run()
  File "/data/zool-songbird/shil5293/envs/pykanto-example/lib/python3.8/site-packages/neptune/new/internal/threading/daemon.py", line 51, in run
    self.work()
Exception in thread NeptuneReporting  File "/data/zool-songbird/shil5293/envs/pykanto-example/lib/python3.8/site-packages/neptune/new/internal/hardware/hardware_metric_reporting_job.py", line 125, in work
:
    Traceback (most recent call last):
attr.log(value=metric_value.value, timestamp=metric_value.timestamp)  File "/data/zool-songbird/shil5293/envs/pykanto-example/lib/python3.8/threading.py", line 932, in _bootstrap_inner

      File "/data/zool-songbird/shil5293/envs/pykanto-example/lib/python3.8/site-packages/neptune/new/handler.py", line 66, in inner_fun
self.run()    
return fun(self, *args, **kwargs)  File "/data/zoo

Validation: 0it [00:00, ?it/s]

      File "/data/zool-songbird/shil5293/envs/pykanto-example/lib/python3.8/site-packages/neptune/new/internal/operation_processors/sync_operation_processor.py", line 36, in enqueue_operation
attr.log(value, step=step, timestamp=timestamp, wait=wait, **kwargs)    
raise errors[0]  File "/data/zool-songbird/shil5293/envs/pykanto-example/lib/python3.8/site-packages/neptune/new/attributes/series/series.py", line 102, in log

    neptune.new.exceptionsself._enqueue_operation(op, wait).
MetadataInconsistency  File "/data/zool-songbird/shil5293/envs/pykanto-example/lib/python3.8/site-packages/neptune/new/attributes/attribute.py", line 41, in _enqueue_operation
:     Timestamp must be non-decreasing for series attribute: monitoring/memory. Invalid point: 2022-10-19T15:38:24.016Zself._container._op_processor.enqueue_operation(operation, wait)

  File "/data/zool-songbird/shil5293/envs/pykanto-example/lib/python3.8/site-packages/neptune/new/internal/operation_processors/sync_operation_processor

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

# Test best model in held out (test) dataset

## Retrieve model

In [62]:
# best_model = ResNetClassifier.load_from_checkpoint(trainer.checkpoint_callback.best_model_path)
# If from fresh session:

lckpt = list(
    (
        Path(neptune_logger.save_dir).parent
        / "checkpoints"
        / neptune_logger._run_short_id
    ).glob("*ckpt")
)[-1]
best_model = ResNetClassifier.load_from_checkpoint(lckpt)

## Get model predictions for test set

In [63]:
trainer.test(best_model, datamodule=dm)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
     test/acc_epoch         0.9215686321258545
     test/loss_epoch        0.25644221901893616
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'test/loss_epoch': 0.25644221901893616,
  'test/acc_epoch': 0.9215686321258545}]

Exception in thread NeptuneReporting:
Traceback (most recent call last):
  File "/data/zool-songbird/shil5293/envs/pykanto-example/lib/python3.8/threading.py", line 932, in _bootstrap_inner
    self.run()
  File "/data/zool-songbird/shil5293/envs/pykanto-example/lib/python3.8/site-packages/neptune/new/internal/threading/daemon.py", line 51, in run
    self.work()
  File "/data/zool-songbird/shil5293/envs/pykanto-example/lib/python3.8/site-packages/neptune/new/internal/hardware/hardware_metric_reporting_job.py", line 125, in work
    attr.log(value=metric_value.value, timestamp=metric_value.timestamp)
  File "/data/zool-songbird/shil5293/envs/pykanto-example/lib/python3.8/site-packages/neptune/new/handler.py", line 66, in inner_fun
    return fun(self, *args, **kwargs)
  File "/data/zool-songbird/shil5293/envs/pykanto-example/lib/python3.8/site-packages/neptune/new/handler.py", line 276, in log
    attr.log(value, step=step, timestamp=timestamp, wait=wait, **kwargs)
  File "/data/zool-s

In [65]:
trainer.callback_metrics

{'test/loss': tensor(0.2564),
 'test/loss_epoch': tensor(0.2564),
 'test/acc': tensor(0.9216),
 'test/acc_epoch': tensor(0.9216)}

# Extract feature vectors from the entire dataset

Here we just use the training set for simplicity

In [None]:
imgpaths = [i for i in train_path.glob("*/*.jpg")]
test_imgs = [i for i in test_path.glob("*/*.jpg")]
best_model.eval()
best_model.to(device)
vectors = {}

# Test transformation includes random crop in the 'time' domain:
# Running multiple times and averaging may increase robustness:
niters = 5

for i in range(niters):
    vectors[str(i)] = {}
    for path in tqdm(imgpaths, total=len(imgpaths)):
        img = Image.open(path)
        rgb_img = TimeCrop()(img)
        tens_img = T.ToTensor()(rgb_img).unsqueeze_(0)
        vectors[str(i)][f"{path.stem}"] = (
            best_model.feature_extractor(tens_img.to(device))
            .cpu()
            .detach()
            .numpy()[0, :, 0, 0]
        )

  0%|          | 0/595 [00:00<?, ?it/s]

  0%|          | 0/595 [00:00<?, ?it/s]

  0%|          | 0/595 [00:00<?, ?it/s]

  0%|          | 0/595 [00:00<?, ?it/s]

  0%|          | 0/595 [00:00<?, ?it/s]

## Export feature vectors

In [None]:
alldfs = pd.concat(
    [pd.DataFrame.from_dict(d, orient='index') for d in vectors.values()])
vocmean = alldfs.groupby(alldfs.index).mean()

vector_dir = (PROJECT_ROOT / "data" / "datasets" / DATASET / 'ML' / 
       'output' / 'feat_vectors.csv')
vector_dir.parent.mkdir(parents=True, exist_ok=True)
vocmean.to_csv(vector_dir)

# References
[1] https://github.com/PyTorchLightning/pytorch-lightning/blob/master/pl_examples/domain_templates/computer_vision_fine_tuning.py

[2]
https://docs.neptune.ai/integrations/lightning/ <br>
https://docs.neptune.ai/usage/best_practices/#configuring-your-credentials