In [1]:
import sys
sys.executable

'/home/ksmehrab/miniconda/envs/data_env/bin/python'

In [5]:
# Setup identification dataset

import torch
from torch import nn
from pathlib import Path
import pandas as pd 
import json
import matplotlib.pyplot as plt 
import numpy as np
import os
import torch.nn.functional as F
from tqdm import tqdm

from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image

class FishAirDatasetProcessed(Dataset):
    def __init__(
        self,
        data_file:Path,
        img_dir:Path,
        transform,
        traits_to_detect = ['adipose_fin', 'pelvic_fin', 'barbel', 'multiple_dorsal_fin']
    ):
        self.data_file = data_file
        self.transform = transform
        self.img_dir = img_dir
        if str(self.data_file).endswith('csv'):
            self.df = pd.read_csv(data_file)
        self.traits_to_detect = traits_to_detect
        self.num_classes = len(self.traits_to_detect)
        self.samples = self.df.to_dict('records') # List of samples, where each each sample is a dict 
        # adipose_fin,pelvic_fin,barbel,multiple_dorsal_fin
        
        
    def __len__(self):
        return len(self.samples)
    
    def __getitem__(self, idx):
        """
        sample is a dict with keys: ARKID, original_filename, arkid_filename, species_name, label 
        """
        sample = self.samples[idx] 
        
        filename = sample['filename']

        sample_img_dir = self.img_dir
        img_path = sample_img_dir / filename
        img = Image.open(img_path).convert('RGB')

        img = self.transform(img) # torchvision transform
        label = [float(sample[t]) for t in self.traits_to_detect]
        label = np.array(label)
        
        return img, label

    def get_img_filenames(self, indices):
        return [self.samples[i] for i in indices]

In [None]:
"""
This file uses torchvision pretrained models, but modifies the final fc layer to the number of classes
"""
import torch
import torch.nn as nn 

from torchvision.models import resnet18, ResNet18_Weights
from torchvision.models import resnet34, ResNet34_Weights
from torchvision.models import resnet50, ResNet50_Weights
from torchvision.models import vit_b_32, ViT_B_32_Weights
from torchvision.models import vgg19_bn, VGG19_BN_Weights
from torchvision.models import vit_b_16, ViT_B_16_Weights
from torchvision.models import swin_b, Swin_B_Weights
from torchvision.models import inception_v3, Inception_V3_Weights


def get_custom_model(
    model_name:str,
    num_classes:int,
    pretrained:bool=True
):
    if model_name == 'resnet18':
        if pretrained:
            weights = ResNet18_Weights.DEFAULT
            model = resnet18(weights=weights)
            model.fc = nn.Linear(in_features=512, out_features=num_classes, bias=True)
        else:
            model = resnet18()
            model.fc = nn.Linear(in_features=512, out_features=num_classes, bias=True)
    elif model_name == 'resnet34':
        if pretrained:
                weights = ResNet34_Weights.DEFAULT
                model = resnet34(weights=weights)
                model.fc = nn.Linear(in_features=512, out_features=num_classes, bias=True)
        else:
            model = resnet34()
            model.fc = nn.Linear(in_features=512, out_features=num_classes, bias=True)
    elif model_name == "resnet50":
        if pretrained:
                weights = ResNet50_Weights.DEFAULT
                model = resnet50(weights=weights)
                model.fc = nn.Linear(in_features=2048, out_features=num_classes, bias=True)
        else:
            model = resnet50()
            model.fc = nn.Linear(in_features=2048, out_features=num_classes, bias=True)
    elif model_name == 'vit_b_32':
        if pretrained:
            weights = ViT_B_32_Weights.DEFAULT
            model = vit_b_32(weights=weights)
            model.heads = nn.Linear(in_features=768, out_features=num_classes, bias=True)
        else:
            model = vit_b_32()
            model.heads = nn.Linear(in_features=768, out_features=num_classes, bias=True)
    elif model_name == 'vit_b_16':
        if pretrained:
            weights = ViT_B_16_Weights.DEFAULT
            model = vit_b_16(weights=weights)
            model.heads = nn.Linear(in_features=768, out_features=num_classes, bias=True)
        else:
            model = vit_b_32()
            model.heads = nn.Linear(in_features=768, out_features=num_classes, bias=True)
    elif model_name == 'vgg19':
        if pretrained:
            weights = VGG19_BN_Weights.DEFAULT
            model = vgg19_bn(weights=weights)
            model.classifier[6] = nn.Linear(in_features=4096, out_features=num_classes, bias=True)
        else:
            model = vgg19_bn()
            model.classifier[6] = nn.Linear(in_features=4096, out_features=num_classes, bias=True)
    elif model_name == 'swin_b':
        if pretrained:
            weights = Swin_B_Weights.DEFAULT
            model = swin_b(weights=weights)
            model.head = nn.Linear(in_features=1024, out_features=num_classes, bias=True)
        else:
            model = swin_b()
            model.head = nn.Linear(in_features=1024, out_features=num_classes, bias=True)
    elif model_name == 'inception_v3':
        if pretrained:
            weights = Inception_V3_Weights.DEFAULT
            model = inception_v3(weights=weights)
            model.fc = nn.Linear(in_features=2048, out_features=num_classes, bias=True)
        else:
            model = inception_v3()
            model.fc = nn.Linear(in_features=2048, out_features=num_classes, bias=True)
    

    return model 

In [8]:
# Setup config.py
import argparse

import numpy as np
import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
import torchvision.transforms as transforms
import torch.optim.lr_scheduler as lr_scheduler

import json
import os
import sys
import wandb

from pathlib import Path
from sklearn.metrics import f1_score, precision_score, recall_score, average_precision_score

from data_setup import get_transform, get_dataset_and_dataloader

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
cudnn.benchmark = True
if torch.cuda.is_available():
    N_GPUS = torch.cuda.device_count()
else:
    N_GPUS = 0


def parse_args():
    parser = argparse.ArgumentParser(description='Trait Identification Pipeline')
    parser.add_argument('--lr', default=0.1, type=float, help='learning rate')
    parser.add_argument('--model', default='resnet18', type=str, choices=['resnet34', 'resnet18', 'resnet50', 'vit_b_32', 'vgg19', 'swin_b', 'inception_v3'], help='model type (default: ResNet18)')
    parser.add_argument('--batch-size', default=128, type=int, help='batch size')
    parser.add_argument('--epoch', default=200, type=int,
                        help='total epochs to run')
    parser.add_argument('--output_path', default=None, type=str,
                        help='path to save all outputs')
    parser.add_argument('--seed', default=None, type=int, help='random seed')
    parser.add_argument('--dataset', required=True,
                        choices=['fishair130-bal-50', 'fishair130-imb-low50', 'fishair130-overs-500', 'fishair_processed'], help='Dataset')
    parser.add_argument('--decay', default=2e-4, type=float, help='weight decay')
    parser.add_argument('--no-augment', dest='augment', action='store_false',
                        help='use standard augmentation (default: True)')

    parser.add_argument('--name', default='0', type=str, help='name of experiment or run')
    parser.add_argument('--resume', '-r', action='store_true',
                        help='resume from checkpoint')

    parser.add_argument('--checkpoint_path', default=None, type=str,
                        help='checkpoint path of network for train')

    parser.add_argument('--focal_gamma', default=1.0, type=float, help='Hyper-parameter for Focal Loss')

    parser.add_argument('--loss_type', default='BCE', type=str,
                        choices=['BCE', 'WBCE', 'Focal'],
                        help='Type of loss for imbalance')

    parser.add_argument('--wandb', action='store_true', help='wandb logging')
    parser.add_argument('--cosine_annealing', action='store_true', help='Use cosine annealing')
    parser.add_argument('--num_workers', default=1, type=int, help='Number of dataloader workers')

    return parser.parse_args()

ARGS = parse_args()
if ARGS.seed is not None:
    SEED = ARGS.seed
else:
    SEED = np.random.randint(10000)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)

DATASET = ARGS.dataset
BATCH_SIZE = ARGS.batch_size
MODEL = ARGS.model
WANDB = ARGS.wandb

LR = ARGS.lr
EPOCH = ARGS.epoch
NUM_WORKERS = ARGS.num_workers
START_EPOCH = 0

# Setup logging 
if os.path.exists(ARGS.output_path):
    print(f"Output path: {ARGS.output_path} exists. Continuing will overwrite results. Continue? [Y/n]")
    c = input()
    if c == 'Y':
        pass
    elif c == 'n':
        print('Quitting execution')
        sys.exit()
    else:
        print('Invalid selection. Quitting execution')
        sys.exit()
else:
    os.mkdir(path, exist_ok=True)

BASE_FILENAME = f"S{SEED}_{ARGS.name}_" \
    f"{DATASET}_{MODEL}"

if WANDB:
    os.environ['WANDB_DIR'] = str(BASE_FILENAME)
    wandb.init(project=BASE_FILENAME)
    config = {
        "model_name": ARGS.name,
        "learning_rate": LR,
        "batch_size": BATCH_SIZE,
        "epochs": EPOCH,
    }
    wandb.config.update(config)
    
    
# Data
print('==> Preparing data: %s' % DATASET)
if DATASET == 'fishair_processed':
    mean = torch.tensor([0.9353, 0.9175, 0.8923])
    std = torch.tensor([0.1535, 0.1933, 0.2464])
    transform = get_transform(224, mean, std, 'squarepad_augment_normalize')
    test_transform = get_transform(224, mean, std, 'squarepad_no_augment_normalize')
    train_file = Path('/data/DatasetTrackFinalData/Identification/trait_identification_train.csv')
    val_file = Path('/data/DatasetTrackFinalData/Identification/trait_identification_val.csv')
    test_file = Path('/data/DatasetTrackFinalData/Identification/trait_identification_test_filtered.csv')
    lv_sp_normal_test_file = Path('/data/DatasetTrackFinalData/Identification/trait_identification_test_leave_out_filtered.csv')
    lv_sp_difficult_test_file = Path('/data/DatasetTrackFinalData/Identification/trait_identification_test_leave_out_difficult.csv')
    img_dir = Path('/data/BGRemovedCropped/all')
else:    
    raise NotImplementedError('Dataset not implemented')

train_dataset, train_loader = get_dataset_and_dataloader(
    data_file=train_file,
    img_dir=img_dir,
    transform=transform,
    batch_size=BATCH_SIZE,
    num_workers=ARGS.num_workers
)

val_dataset, val_loader = get_dataset_and_dataloader(
    data_file=val_file,
    img_dir=img_dir,
    transform=test_transform,
    batch_size=BATCH_SIZE,
    num_workers=ARGS.num_workers
)

test_dataset, test_loader = get_dataset_and_dataloader(
    data_file=test_file,
    img_dir=img_dir,
    transform=test_transform,
    batch_size=BATCH_SIZE,
    num_workers=ARGS.num_workers
)

if lv_sp_normal_test_file:
    lv_sp_normal_dataset, lv_sp_normal_loader = get_dataset_and_dataloader(
        data_file=lv_sp_normal_test_file,
        img_dir=img_dir,
        transform=test_transform,
        batch_size=BATCH_SIZE,
        num_workers=ARGS.num_workers
    )

if lv_sp_difficult_test_file:
    lv_sp_dif_dataset, lv_sp_dif_loader = get_dataset_and_dataloader(
        data_file=lv_sp_difficult_test_file,
        img_dir=img_dir,
        transform=test_transform,
        batch_size=BATCH_SIZE,
        num_workers=ARGS.num_workers
    )

TRAITS_TO_DETECT = train_dataset.traits_to_detect

def adjust_learning_rate(optimizer, lr_init, epoch, scheduler):
    lr = lr_init
    if epoch < 5:
        lr = (epoch + 1) * lr_init / 5
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr
    else:
        if ARGS.cosine_annealing:
            assert scheduler != None, "Scheduler cannot be None if cosine annealing is set"
            scheduler.step()

def evaluate(net, dataloader, epoch, type):
    is_training = net.training
    net.eval()
    criterion = nn.BCEWithLogitsLoss(reduce=True, reduction='mean')

    total_loss = 0.0
    total = 0.0

    all_predicted = []
    all_targets = []

    for inputs, targets in dataloader:
        batch_size = inputs.size(0)
        inputs, targets = inputs.to(device), targets.to(device)

        outputs = net(inputs)
        loss = criterion(outputs, targets)
        breakpoint()
        total_loss += loss.item() * batch_size

        predicted = torch.sigmoid(outputs) # Get probabilities
        
        all_predicted.append(predicted.cpu().detach().numpy())
        all_targets.append(targets.cpu().numpy())
        
        total += batch_size

    all_predicted = np.vstack(all_predicted)
    all_targets = np.vstack(all_targets)

    average_precisions = []
    for i in range(all_targets.shape[1]):
        ap = average_precision_score(all_targets[:, i], all_predicted[:, i])
        average_precisions.append(ap)
        print(f'{TRAITS_TO_DETECT[i]} - AP: {ap}')

    mean_ap = np.mean(average_precisions)
    print(f'Mean Average Precision: {mean_ap}')

    all_preds_threshold = all_predicted >= 0.50
    all_preds_threshold = all_preds_threshold.astype(int)

    f1s = []
    precisions = []
    recalls = []
    for i in range(all_targets.shape[1]):
        f1 = f1_score(all_targets, all_preds_threshold[:, i], average='macro')
        f1s.append(f1)
        precision = precision_score(all_targets, all_preds_threshold[:, i], average='macro')
        precisions.append(precision)
        recall = recall_score(all_targets, all_preds_threshold[:, i], average='macro')
        recalls.append(recall)
        

    results = {
        'loss': total_loss / (total + eps),
        'aps': average_precisions,
        'map': mean_ap,
        'f1_score': f1s,
        'precision': precisions,
        'recall': recalls
    }

    msg = f"{type} | Epoch: {epoch}/{EPOCH} | Loss: {total_loss / (total + eps)} | AP: {average_precisions} | MAP: {map} | f1: {f1s} | Precs: {precisions} | Recs: {recalls}"
    
    print(msg)

    net.train(is_training)
    return results

usage: ipykernel_launcher.py [-h] [--lr LR]
                             [--model {resnet34,resnet18,resnet50,vit_b_32,vgg19,swin_b,inception_v3}]
                             [--batch-size BATCH_SIZE] [--epoch EPOCH]
                             [--seed SEED] --dataset
                             {fishair130-bal-50,fishair130-imb-low50,fishair130-overs-500,fishair_processed}
                             [--decay DECAY] [--no-augment] [--name NAME]
                             [--resume] [--net_t NET_T]
                             [--focal_gamma FOCAL_GAMMA]
                             [--loss_type {CE,Focal}] [--wandb]
                             [--cosine_annealing] [--num_workers NUM_WORKERS]
ipykernel_launcher.py: error: the following arguments are required: --dataset


SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [None]:
# Setup train.py
from __future__ import print_function

import csv
import os

import numpy as np
import torch
from torch.autograd import Variable, grad
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from tqdm import tqdm
import wandb
import json

from config import *

from model import get_custom_model

def train_epoch(model, criterion, optimizer, data_loader):
    model.train()

    train_loss = 0
    all_predicted = []
    all_targets = []
    for inputs, targets in tqdm(data_loader):
        inputs, targets = inputs.to(device), targets.to(device)
        batch_size = inputs.size(0)

        # outputs, _ = model(normalizer(inputs))
        if 'inception' in MODEL:
            outputs, _ = model(inputs)
        else:
            outputs = model(inputs)
        loss = criterion(outputs, targets)

        breakpoint()

        train_loss += loss.item() * batch_size
        total += batch_size

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        predicted = torch.sigmoid(outputs) # Get probabilities
        all_predicted.append(predicted.cpu().detach().numpy())
        all_targets.append(targets.cpu().numpy())

    all_predicted = np.vstack(all_predicted)
    all_targets = np.vstack(all_targets)

    average_precisions = []
    for i in range(all_targets.shape[1]):
        ap = average_precision_score(all_targets[:, i], all_predicted[:, i])
        average_precisions.append(ap)
        print(f'{TRAITS_TO_DETECT[i]} - AP: {ap}')

    mean_ap = np.mean(average_precisions)
    
    eps = 0.000001
    msg = f"Training Loss: {train_loss / (total + eps)} | Training AP: {average_precisions} | Training  MAP: {map}"
    print(msg)

    return train_loss / (total + eps), map

def save_checkpoint(map, model, optim, epoch, index=False, checkpoint_stats=None):
    # Save checkpoint.
    print('Saving..')

    state = {
        'net': model.state_dict(),
        'optimizer': optim.state_dict(),
        'map': map,
        'epoch': epoch,
        'rng_state': torch.get_rng_state()
    }

    if checkpoint_stats:
        for k, v in checkpoint_stats.items():
            state[k] = v

    if index:
        ckpt_name = 'ckpt_epoch' + str(epoch) + '_' + str(SEED) + '.t7'
    else:
        ckpt_name = 'ckpt_' + str(SEED) + '_' + str(BASE_FILENAME) + '.t7'
    # ARGS.output_path
    ckpt_path = os.path.join(ARGS.output_path, ckpt_name)
    torch.save(state, ckpt_path)

def save_checkpoint_stats_json(checkpoint_stats, epoch, index=True, type='test'):
    checkpoint_stats['epoch'] = epoch
    if index:
        ckpt_stat_name = 'ckpt_stats_' + type + "_" + BASE_FILENAME + "_" + str(epoch) + '.json'
    else:
        ckpt_stat_name = 'ckpt_stats_' + type + "_" + BASE_FILENAME + "_" + '.json'
    ckpt_stat_path = os.path.join(ARGS.output_path, ckpt_stat_name)
    with open(ckpt_stat_path, 'w') as f:
        json.dump(checkpoint_stats, f)

#######################################################################################


BEST_VAL = 0  # best validation accuracy

print(f"==> Building model from custom model: {MODEL}")

# Setup Model
model = get_custom_model(
    model_name=MODEL,
    num_classes=N_CLASSES,
    pretrained=True
)

model = model.to(device)

# Setup optimizer and scheduler
optimizer = optim.SGD(net.parameters(), lr=ARGS.lr, momentum=0.9, weight_decay=ARGS.decay)
    if ARGS.cosine_annealing:
        scheduler =torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCH)
    else:
        scheduler = None

# Check if we need to start from checkpoint
if ARGS.resume:
    # Load checkpoint.
    print('==> Resuming from checkpoint..')

    if ARGS.checkpoint_path is not None:
        ckpt_t = torch.load(ARGS.checkpoint_path)
        model.load_state_dict(ckpt_t['net'])
        optimizer.load_state_dict(ckpt_t['optimizer'])
        START_EPOCH = ckpt_t['epoch'] + 1

# Set loss function
if ARGS.loss_type == 'BCE':
    criterion = nn.BCEWithLogitsLoss(reduce=True, reduction='mean').to(device)
elif ARGS.loss_type == 'WBCE':
    pos_weight = get_pos_weight(train_file)
    criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight, reduce=True, reduction='mean').to(device)
else:
    raise NotImplementedError("Loss not implemented")
    
# Training loop
for epoch in range(START_EPOCH, EPOCH):
    logger.log(' * Epoch %d: %s' % (epoch, LOGDIR))
    
    # Adjust learning rate:
    #   epoch < 5: linear warmup of learning rate
    #   epoch > 5 and cosine_annealing: cosine annealing scheduler
    #   epoch > 5 and no cosine_annealing: reduce lr linearly beyond epoch 160 and 180
    #       Following LDAM-DRW
    adjust_learning_rate(optimizer, LR, epoch, scheduler)
    
    train_loss, train_map = train_epoch(
        model=model,
        criterion=criterion, 
        optimizer=optimizer,
        data_loader=train_loader
    )
    train_stats = {'train_loss': train_loss, 'train_map': train_map}

    # Validate
    val_eval = evaluate(
        net=model,
        dataloader=val_loader,
        type='Val',
        epoch=epoch
    )
    
    val_map = val_eval['map']
    if val_map >= BEST_VAL:
        BEST_VAL = val_map
        # test_loader, lv_sp_normal_loader, lv_sp_dif_loader, lv_sp_normal_test_file, lv_sp_difficult_test_file
        checkpoint_stats_k =  ['loss', 'aps', 'map', 'f1_score', 'precision', 'recall']

        def _convert_scalar(x):
            if hasattr(x, 'item'):
                x = x.item()
            return x

        test_stats = evaluate(
            net=model,
            dataloader=test_loader,
            type='Test',
            epoch=epoch
        )
        
        test_stats = {k: _convert_scalar(v) for k, v in test_stats.items() if k in checkpoint_stats_k}
        
        all_test_stats = {
            'normal_test_stats': test_stats
        }

        if lv_sp_normal_test_file:
            lv_sp_normal_test_stats = evaluate(
                net=model,
                dataloader=lv_sp_normal_loader,
                type='Test Lv Sp Normal',
                epoch=epoch
            )
            lv_sp_normal_test_stats = {k: _convert_scalar(v) for k, v in lv_sp_normal_test_stats.items() if k in checkpoint_stats_k}
            all_test_stats['leave_sp_normal_test_stats'] = lv_sp_normal_test_stats

        if lv_sp_difficult_test_file:
            lv_sp_dif_test_stats = evaluate(
                net=model,
                dataloader=lv_sp_dif_loader,
                type='Test Lv Sp Difficult',
                epoch=epoch
            )
            lv_sp_dif_test_stats = {k: _convert_scalar(v) for k, v in lv_sp_dif_test_stats.items() if k in checkpoint_stats_k}
            all_test_stats['lv_sp_dif_test_stats'] = lv_sp_dif_test_stats
            

        save_checkpoint(test_stats['map'], model, optimizer, epoch, False, all_test_stats)

        # Save checkpoint stats in json format for ease of viewing
        save_checkpoint_stats_json(all_test_stats, epoch, index=False)

    if WANDB:
        wandb.log({'train_loss': train_stats['train_loss'], 'val_loss': val_eval['loss']}, step=epoch)    
