In [1]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import time
import copy


import torch
import torchvision.models as models
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.datasets as ds
from torchvision import models, transforms, utils, datasets
from torch.utils import data
from torchsummary import summary
from torch.optim import lr_scheduler

# Data Read-In and Loading

In [2]:
fish_df = pd.read_csv("data/fish_df.csv")

is_fish_df = pd.read_csv("data/is_fish.csv").iloc[:,1:]
is_fish_df["local_paths"] = is_fish_df["Species"].astype(str) + "/" + is_fish_df["Filename"]
path_set = set(is_fish_df["local_paths"])

In [3]:
try:
    acc_df = pd.read_csv("data/accuracies.csv")
except:
    acc_df = pd.DataFrame()

In [4]:
species_count = 92 #len(is_fish_df["Species"].unique())

In [5]:
# We normalize to imagenet mean for the data (https://stackoverflow.com/questions/58151507/why-pytorch-officially-use-mean-0-485-0-456-0-406-and-std-0-229-0-224-0-2)
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        transforms.Resize([224, 224])]),
    'val': transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        transforms.Resize([224, 224])]),
    'test': transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        transforms.Resize([224, 224])])}

In [6]:
data_dirs = ["/home/shivaram/DS/Projects/FishID/data/model_data/is_fish_scientific/", "/home/shivaram/DS/Projects/FishID/data/model_data/is_fish_common/", "/home/shivaram/DS/Projects/FishID/data/model_data/is_fish_mixed/"]
diff_image_datasets = {dd:{x: datasets.ImageFolder(os.path.join(dd, x),
                                          data_transforms[x])
                  for x in ['train', 'val', 'test']} for dd in data_dirs}

In [7]:
image_datasets = diff_image_datasets["/home/shivaram/DS/Projects/FishID/data/model_data/is_fish_scientific/"]

In [8]:
batch_size = 64#8#32#64
#epoch_samples = 2560# len(samples_weight)

In [9]:
weighted_samplers = {}
for subset in ["train", "val", "test"]:
    target = image_datasets[subset].targets
    
    if True:#subset == "train":
        class_sample_count =np.array([ len(np.where(target == t)[0]) for t in np.unique(target)])
        weight = 1. / class_sample_count
        samples_weight = np.array([weight[t] for t in target])
        samples_weight = torch.from_numpy(samples_weight)
        epoch_samples = len(samples_weight)
        sampler = data.WeightedRandomSampler(samples_weight, epoch_samples)
        weighted_samplers[subset] = sampler
    else:
        sampler = data.RandomSampler(image_datasets[subset])
        weighted_samplers[subset] = sampler
    


dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], sampler = weighted_samplers[x], 
                                              batch_size=batch_size, num_workers=4)
              for x in ['train', 'val', 'test']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val', 'test']}
class_names = image_datasets['train'].classes

# Model Training and Evaluation Methods

In [10]:
def train_model(dataloaders, model, criterion, optimizer, scheduler, num_epochs=25, verbose = True, plateau = False, early_stop = 15):
    since = time.time()
    
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    epoch_accs = []
    
    curr_val_acc = 0
    stop_count = 0
    

    for epoch in range(num_epochs):
        epa = {"Epoch": epoch}
        
        if verbose:
            print(f'Epoch {epoch + 1}/{num_epochs}')
            print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]
            
            if not plateau and phase == 'train':
                scheduler.step()
            elif plateau and phase == 'val':
                scheduler.step(epoch_loss)

            
            epa[f'{phase} Loss'] = float(epoch_loss)
            epa[f'{phase} Accuracy'] = float(epoch_acc)
            if verbose:
                print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
                
                
            if phase == 'val':
                if curr_val_acc < epoch_acc:
                    curr_val_acc = epoch_acc
                    stop_count = 0
                else:
                    stop_count += 1
                

        if verbose:
            print()
            
        epoch_accs.append(epa)
        
        if stop_count >= early_stop:
            print(f"No Validation Accuracy decrease over the last {early_stop} epochs. Stopping training...")
            break

    time_elapsed = time.time() - since
    if verbose:
        print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
        print(f'Best val Acc: {best_acc:4f}')

    # load best model weights
    model.load_state_dict(best_model_wts)
    epoch_accs = pd.DataFrame(epoch_accs)
    return model, best_acc, epoch_accs


In [11]:
def test_model(model, criterion, phase = "test", verbose = True):
    model.eval()
    running_loss = 0.0
    running_corrects = 0
    
    for inputs, labels in dataloaders[phase]:
        inputs = inputs.to(device)
        labels = labels.to(device)

        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        loss = criterion(outputs, labels)
        
        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)

    epoch_loss = running_loss / dataset_sizes[phase]
    epoch_acc = running_corrects.double() / dataset_sizes[phase]

    if verbose:
        print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')
        
    return epoch_acc

In [12]:
def train_test_save_model(save_path, dataloaders, model, criterion, optimizer, scheduler, num_epochs = 25, verbose = True, plateau = False, test = False, early_stop = 10):
    
    trained_model, val_acc, epoch_accs = train_model(dataloaders, model, criterion, optimizer, scheduler, num_epochs, verbose, plateau, early_stop = early_stop) 
    torch.save(model.state_dict(), save_path)
    if test:
        test_acc = test_model(trained_model, criterion)
    else:
        test_acc = -1
    if verbose:
        print(f"Val Accuracy: {val_acc}")
        print(f"Test Accuracy: {test_acc}")        

    return trained_model, val_acc, test_acc, epoch_accs

In [13]:
def build_from_path(model_ft, path, image_datasets=image_datasets, weighted_samplers = weighted_samplers, decay = False, train_batch=64, test_batch=64, epochs = 30, early_stop = 10): #criterion = criterion, optimizer_ft = optimizer_ft, exp_lr_scheduler = exp_lr_scheduler, 
    if os.path.exists(path):
        dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], sampler = weighted_samplers[x], batch_size=train_batch, num_workers=4) for x in ['train', 'val', 'test']}
        model_ft.load_state_dict(torch.load(path))
        test_model(model_ft, criterion)
    else:
        dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], sampler = weighted_samplers[x], batch_size=test_batch, num_workers=4) for x in ['train', 'val', 'test']}
        if not decay:
            model_ft, val_acc, test_acc, epoch_accs = train_test_save_model(path, dataloaders, model_ft, criterion, optimizer_ft, exp_lr_scheduler, epochs, True, early_stop = early_stop)
        else:
            model_ft, val_acc, test_acc, epoch_accs = train_test_save_model(path, dataloaders, model_ft, criterion, optimizer_ft, exp_lr_scheduler, epochs, True, True, early_stop = early_stop)            

# Determining the Best Transfer Model

In [14]:
# setting device on GPU if available, else CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)
print()

#Additional Info when using cuda
if device.type == 'cuda':
    print(torch.cuda.get_device_name(0))
    print('Memory Usage:')
    print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3,1), 'GB')
    print('Cached:   ', round(torch.cuda.memory_reserved(0)/1024**3,1), 'GB')

Using device: cuda

NVIDIA GeForce RTX 3060 Ti
Memory Usage:
Allocated: 0.0 GB
Cached:    0.0 GB


## No Frozen Layers
### ResNet18 

In [16]:
resnet18_path = "models/92_classifier/resnet18.pt"

model_ft = models.resnet18(pretrained=True)
#for param in model_ft.parameters():
#    param.requires_grad = False
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, species_count)
model_ft = model_ft.to(device)
criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.1, momentum=0.9)

In [17]:
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

In [14]:
if os.path.exists(resnet18_path):
    dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], sampler = weighted_samplers[x], batch_size=64, num_workers=4) for x in ['train', 'val', 'test']}
    model_ft.load_state_dict(torch.load(resnet18_path))
    test_model(model_ft, criterion)
else:
    dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], sampler = weighted_samplers[x], batch_size=128, num_workers=4) for x in ['train', 'val', 'test']}
    model_ft, val_acc, test_acc, epoch_accs = train_test_save_model(resnet18_path, dataloaders, model_ft, criterion, optimizer_ft, exp_lr_scheduler, 30, True)

test Loss: 1.8165 Acc: 0.6646


Decaying Learning Rate

In [15]:
exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer_ft, factor = .2, patience = 5)
resnet18_path = "models/92_classifier/resnet18_decay.pt"

if os.path.exists(resnet18_path):
    dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], sampler = weighted_samplers[x], batch_size=64, num_workers=4) for x in ['train', 'val', 'test']}
    model_ft.load_state_dict(torch.load(resnet18_path))
    test_model(model_ft, criterion)
else:
    dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], sampler = weighted_samplers[x], batch_size=128, num_workers=4) for x in ['train', 'val', 'test']}
    model_ft, val_acc, test_acc, epoch_accs = train_test_save_model(resnet18_path, dataloaders, model_ft, criterion, optimizer_ft, exp_lr_scheduler, 30, True, True)

test Loss: 2.0868 Acc: 0.6691


### Efficient Net B0
Batch size for train 32 and batch size for eval 8

In [15]:
efficientnetb0_path = "models/92_classifier/efficient_netb0.pt"

model_ft =  models.efficientnet_b0(pretrained = True)
#for param in model_ft.parameters():
#    param.requires_grad = False
num_ftrs = model_ft.classifier[1].in_features
model_ft.fc = nn.Linear(num_ftrs, species_count)
model_ft = model_ft.to(device)
criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.1, momentum=0.9)

In [18]:
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

if os.path.exists(efficientnetb0_path):
    dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], sampler = weighted_samplers[x], batch_size=8, num_workers=4) for x in ['train', 'val', 'test']}
    model_ft.load_state_dict(torch.load(efficientnetb0_path))
    test_model(model_ft, criterion)
else:
    dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], sampler = weighted_samplers[x], batch_size=32, num_workers=4) for x in ['train', 'val', 'test']}
    model_ft, val_acc, test_acc, epoch_accs = train_test_save_model(efficientnetb0_path, dataloaders, model_ft, criterion, optimizer_ft, exp_lr_scheduler, 30, True)

Epoch 1/30
----------


KeyboardInterrupt: 

Decaying Learning Rate

In [17]:
exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer_ft, factor = .2, patience = 5)
efficientnetb0_path = "models/92_classifier/efficient_netb0_decay.pt"
if os.path.exists(efficientnetb0_path):
    dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], sampler = weighted_samplers[x], batch_size=8, num_workers=4) for x in ['train', 'val', 'test']}
    model_ft.load_state_dict(torch.load(efficientnetb0_path))
    test_model(model_ft, criterion)
else:
    dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], sampler = weighted_samplers[x], batch_size=32, num_workers=4) for x in ['train', 'val', 'test']}
    model_ft, val_acc, test_acc, epoch_accs = train_test_save_model(efficientnetb0_path, dataloaders, model_ft, criterion, optimizer_ft, exp_lr_scheduler, 30, True, True)

test Loss: 2.0512 Acc: 0.6808


### Convnet Tiny
Batch size for train 32 and for test 8

In [17]:
conv_path = "models/92_classifier/conv_tiny.pt"

model_ft =  models.convnext_tiny(pretrained = True)
#for param in model_ft.parameters():
#    param.requires_grad = False
num_ftrs = model_ft.classifier[2].in_features
model_ft.fc = nn.Linear(num_ftrs, species_count)
model_ft = model_ft.to(device)
criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.01, momentum=0.9)

In [16]:

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
#exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer_ft, factor = .2, patience = 3)

if os.path.exists(conv_path):
    dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], sampler = weighted_samplers[x], batch_size=8, num_workers=4) for x in ['train', 'val', 'test']}
    model_ft.load_state_dict(torch.load(conv_path))
    test_model(model_ft, criterion)
else:
    dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], sampler = weighted_samplers[x], batch_size=32, num_workers=4) for x in ['train', 'val', 'test']}
    model_ft, val_acc, test_acc, epoch_accs = train_test_save_model(conv_path, dataloaders, model_ft, criterion, optimizer_ft, exp_lr_scheduler, 30, True)

test Loss: 1.3758 Acc: 0.7081


Decaying Learning Rate

In [18]:
exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer_ft, factor = .2, patience = 7)
conv_path = "models/92_classifier/conv_tiny_decay.pt"
if os.path.exists(conv_path):
    dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], sampler = weighted_samplers[x], batch_size=8, num_workers=4) for x in ['train', 'val', 'test']}
    model_ft.load_state_dict(torch.load(conv_path))
    test_model(model_ft, criterion)
else:
    dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], sampler = weighted_samplers[x], batch_size=32, num_workers=4) for x in ['train', 'val', 'test']}
    model_ft, val_acc, test_acc, epoch_accs = train_test_save_model(conv_path, dataloaders, model_ft, criterion, optimizer_ft, exp_lr_scheduler, 30, True, True)

test Loss: 1.7669 Acc: 0.7359


# All But Last Layer Frozen
### ResNet18

In [31]:
resnet18_path = "models/92_classifier/rn18_frozen.pt"

model_ft = models.resnet18(pretrained=True)
for param in model_ft.parameters():
    param.requires_grad = False
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, species_count)
model_ft = model_ft.to(device)
criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.1, momentum=0.9)
# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
#exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer_ft, factor = .2, patience = 7)

### Efficient Net B0

https://discuss.pytorch.org/t/how-the-pytorch-freeze-network-in-some-layers-only-the-rest-of-the-training/7088/3

In [32]:
efficientnetb0_path = "models/92_classifier/en_netb0_frozen_decay.pt"

model_ft =  models.efficientnet_b0(pretrained = True)

for param in model_ft.parameters():
    param.requires_grad = False

    
num_ftrs = model_ft.classifier[1].in_features
model_ft.classifier[1] = nn.Linear(num_ftrs, species_count)

model_ft = model_ft.to(device)
criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.1, momentum=0.9)
# Decay LR by a factor of 0.1 every 7 epochs
#exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer_ft, factor = .2, patience = 10)

if os.path.exists(efficientnetb0_path):
    dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], sampler = weighted_samplers[x], batch_size=8, num_workers=4) for x in ['train', 'val', 'test']}
    model_ft.load_state_dict(torch.load(efficientnetb0_path))
    test_model(model_ft, criterion)
else:
    dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], sampler = weighted_samplers[x], batch_size=32, num_workers=4) for x in ['train', 'val', 'test']}
    model_ft, val_acc, test_acc, epoch_accs = train_test_save_model(efficientnetb0_path, dataloaders, model_ft, criterion, optimizer_ft, exp_lr_scheduler, 30, True)

Epoch 1/30
----------


TypeError: step() missing 1 required positional argument: 'metrics'

### Convnet Tiny

In [16]:
conv_path = "models/92_classifier/conv_tiny_frozen.pt"

model_ft =  models.convnext_tiny(pretrained = True)

for param in model_ft.parameters():
    param.requires_grad = False
num_ftrs = model_ft.classifier[2].in_features

# Greate Last Layer
model_ft.classifier[2] = nn.Linear(num_ftrs, species_count)

model_ft = model_ft.to(device)

criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.01, momentum=0.9)
# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
#exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer_ft, factor = .2, patience = 3)

if os.path.exists(conv_path):
    dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], sampler = weighted_samplers[x], batch_size=8, num_workers=4) for x in ['train', 'val', 'test']}
    model_ft.load_state_dict(torch.load(conv_path))
    test_model(model_ft, criterion)
else:
    dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], sampler = weighted_samplers[x], batch_size=32, num_workers=4) for x in ['train', 'val', 'test']}
    model_ft, val_acc, test_acc, epoch_accs = train_test_save_model(conv_path, dataloaders, model_ft, criterion, optimizer_ft, exp_lr_scheduler, 30, True)

KeyboardInterrupt: 

# Several Layers UnFrozen
### Resnet18

In [41]:
resnet18_path = "models/92_classifier/rn18_frozen_partial.pt"

model_ft = models.resnet18(pretrained=True)
for param in model_ft.parameters():
    param.requires_grad = False
for param in model_ft.layer4.parameters():
    param.requires_grad = True
    
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, species_count)
model_ft = model_ft.to(device)
criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.1, momentum=0.9)
# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
#exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer_ft, factor = .2, patience = 7)

In [42]:
if os.path.exists(resnet18_path):
    dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], sampler = weighted_samplers[x], batch_size=64, num_workers=4) for x in ['train', 'val', 'test']}
    model_ft.load_state_dict(torch.load(resnet18_path))
    test_model(model_ft, criterion)
else:
    dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], sampler = weighted_samplers[x], batch_size=128, num_workers=4) for x in ['train', 'val', 'test']}
    model_ft, val_acc, test_acc, epoch_accs = train_test_save_model(resnet18_path, dataloaders, model_ft, criterion, optimizer_ft, exp_lr_scheduler, 30, True)

RuntimeError: PytorchStreamReader failed reading zip archive: failed finding central directory

### Efficient Net

In [34]:
efficientnetb0_path = "models/92_classifier/en_netb0_frozen_partial.pt"

model_ft =  models.efficientnet_b0(pretrained = True)

for param in model_ft.parameters():
    param.requires_grad = False
for param in model_ft.classifier.parameters():
    param.requires_grad = True
for param in model_ft.avgpool.parameters():
    param.requires_grad = True
for param in model_ft.features[7].parameters():
    param.requires_grad = True
# Create Last Layer
    
num_ftrs = model_ft.classifier[1].in_features
model_ft.classifier[1] = nn.Linear(num_ftrs, species_count)

model_ft = model_ft.to(device)
criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.1, momentum=0.9)
# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
#exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer_ft, factor = .2, patience = 10)

In [35]:
build_from_path(model_ft, efficientnetb0_path, train_batch=8, test_batch=32)

test Loss: 1.9392 Acc: 0.6396


### Convnet Tiny

In [27]:
model_ft =  models.convnext_tiny(pretrained = True)
for param in model_ft.parameters():
    param.requires_grad = False
for param in model_ft.classifier.parameters():
    param.requires_grad = True
for param in model_ft.avgpool.parameters():
    param.requires_grad = True
for param in model_ft.features[7].parameters():
    param.requires_grad = True
# Create Last Layer
num_ftrs = model_ft.classifier[2].in_features
model_ft.classifier[2] = nn.Linear(num_ftrs, species_count)
model_ft = model_ft.to(device)

criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.01, momentum=0.9)
# Decay LR by a factor of 0.1 every 7 epochs


In [19]:
conv_path = "models/92_classifier/conv_tiny_frozen_partial.pt"
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
build_from_path(model_ft, conv_path, train_batch=8, test_batch=32)

test Loss: 1.2114 Acc: 0.7242


In [28]:
conv_path = "models/92_classifier/conv_tiny_frozen_partial_decay.pt"
exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer_ft, factor = .2, patience = 5)
build_from_path(model_ft, conv_path, train_batch=8, test_batch=32, decay = True)

test Loss: 1.1975 Acc: 0.7387


# Freezing Variations with Convnet Tiny

In [23]:
# Load already trained model
model_ft =  models.convnext_tiny(pretrained = True)
for param in model_ft.parameters():
    param.requires_grad = False
for param in model_ft.classifier.parameters():
    param.requires_grad = True
for param in model_ft.avgpool.parameters():
    param.requires_grad = True
for param in model_ft.features[7].parameters():
    param.requires_grad = True
# Create Last Layer
num_ftrs = model_ft.classifier[2].in_features
model_ft.classifier[2] = nn.Linear(num_ftrs, species_count)
model_ft = model_ft.to(device)

model_ft.load_state_dict(torch.load("models/92_classifier/conv_tiny_frozen_partial_decay.pt"))

<All keys matched successfully>

### 6th Block+ Unfrozen

In [31]:
#Unfreeze the next convolutional block
for param in model_ft.features[6].parameters():
    param.requires_grad = True

In [34]:
criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

conv_path = "models/92_classifier/conv_tiny_frozen_partial2_decay.pt"

exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer_ft, factor = .2, patience = 5)
build_from_path(model_ft, conv_path, train_batch=8, test_batch=32, decay = True)

test Loss: 1.1063 Acc: 0.7632


### 5th Block+ Unfrozen

In [15]:
# Load already trained model
model_ft =  models.convnext_tiny(pretrained = True)
for param in model_ft.parameters():
    param.requires_grad = False
for param in model_ft.classifier.parameters():
    param.requires_grad = True
for param in model_ft.avgpool.parameters():
    param.requires_grad = True
for param in model_ft.features[7].parameters():
    param.requires_grad = True
for param in model_ft.features[6].parameters():
    param.requires_grad = True
for param in model_ft.features[5].parameters():
    param.requires_grad = True
# Create Last Layer
num_ftrs = model_ft.classifier[2].in_features
model_ft.classifier[2] = nn.Linear(num_ftrs, species_count)
model_ft = model_ft.to(device)

model_ft.load_state_dict(torch.load("models/92_classifier/conv_tiny_frozen_partial2_decay.pt"))

<All keys matched successfully>

In [16]:
criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

conv_path = "models/92_classifier/conv_tiny_frozen_partial3_decay.pt"

exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer_ft, factor = .2, patience = 5)
build_from_path(model_ft, conv_path, train_batch=8, test_batch=32, decay = True)

test Loss: 1.1083 Acc: 0.7493


### 6th Block+ Unfrozen from the Outset

In [18]:
# Load already trained model
model_ft =  models.convnext_tiny(pretrained = True)
for param in model_ft.parameters():
    param.requires_grad = False
for param in model_ft.classifier.parameters():
    param.requires_grad = True
for param in model_ft.avgpool.parameters():
    param.requires_grad = True
for param in model_ft.features[7].parameters():
    param.requires_grad = True
#Unfreeze the next convolutional block
for param in model_ft.features[6].parameters():
    param.requires_grad = True
    
# Create Last Layer
num_ftrs = model_ft.classifier[2].in_features
model_ft.classifier[2] = nn.Linear(num_ftrs, species_count)
model_ft = model_ft.to(device)

criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.01, momentum=0.9)

In [31]:
conv_path = "models/92_classifier/conv_tiny_frozen_partial2a_decay.pt"
exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer_ft, factor = .2, patience = 5)
build_from_path(model_ft, conv_path, train_batch=8, test_batch=32, decay = True)

test Loss: 0.9501 Acc: 0.7894


# Combatting Overfitting
### Ridge Regression
https://stackoverflow.com/questions/42704283/adding-l1-l2-regularization-in-pytorch

In [20]:
conv_path = "models/92_classifier/ctf_p2ad_dropout.pt"
exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer_ft, factor = .2, patience = 5)
for wd in [0.0002, 0.00007]:#[0.0005, 0.0003, 0.0001, 0.00005, 0.00001]:
    torch.cuda.empty_cache()
    print(f"WD = {wd}")
    print(wd)
    try:
        conv_path_s = "models/92_classifier/ctf_p2ad_dropout_" + str(str(wd).split(".")[1] )+ ".pt"
    except:
         conv_path_s = "models/92_classifier/ctf_p2ad_dropout_" + str(wd)+ ".pt"
    
    # Load already trained model
    model_ft =  models.convnext_tiny(pretrained = True)
    for param in model_ft.parameters():
        param.requires_grad = False
    for param in model_ft.classifier.parameters():
        param.requires_grad = True
    for param in model_ft.avgpool.parameters():
        param.requires_grad = True
    for param in model_ft.features[7].parameters():
        param.requires_grad = True
    #Unfreeze the next convolutional block
    for param in model_ft.features[6].parameters():
        param.requires_grad = True



    # Create Last Layer
    num_ftrs = model_ft.classifier[2].in_features
    model_ft.classifier[2] = nn.Linear(num_ftrs, species_count)
    model_ft = model_ft.to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.01, momentum=0.9, weight_decay=wd)
    
    build_from_path(model_ft, conv_path_s, train_batch=8, test_batch=32, decay = True)
    build_from_path(model_ft, conv_path_s, train_batch=8, test_batch=32, decay = True)


WD = 0.0002
0.0002
Epoch 1/30
----------
train Loss: 2.1078 Acc: 0.4803
val Loss: 1.2150 Acc: 0.6619

Epoch 2/30
----------
train Loss: 1.1754 Acc: 0.6995
val Loss: 1.0206 Acc: 0.7039

Epoch 3/30
----------
train Loss: 0.8713 Acc: 0.7732
val Loss: 0.9808 Acc: 0.7425

Epoch 4/30
----------
train Loss: 0.6974 Acc: 0.8164
val Loss: 1.0513 Acc: 0.7209

Epoch 5/30
----------
train Loss: 0.5519 Acc: 0.8538
val Loss: 0.9750 Acc: 0.7493

Epoch 6/30
----------
train Loss: 0.4534 Acc: 0.8785
val Loss: 0.9472 Acc: 0.7589

Epoch 7/30
----------
train Loss: 0.3714 Acc: 0.8992
val Loss: 0.9437 Acc: 0.7657

Epoch 8/30
----------
train Loss: 0.3205 Acc: 0.9144
val Loss: 0.9066 Acc: 0.7771

Epoch 9/30
----------
train Loss: 0.2759 Acc: 0.9253
val Loss: 0.9239 Acc: 0.7896

Epoch 10/30
----------
train Loss: 0.2558 Acc: 0.9320
val Loss: 0.8371 Acc: 0.7839

Epoch 11/30
----------
train Loss: 0.2270 Acc: 0.9397
val Loss: 1.0880 Acc: 0.7357

Epoch 12/30
----------
train Loss: 0.2017 Acc: 0.9468
val Loss: 1.

# Different Training Sets
### Add Random Crops
- https://journalofbigdata.springeropen.com/articles/10.1186/s40537-019-0197-0 
- https://journalofbigdata.springeropen.com/articles/10.1186/s40537-019-0197-0/tables/1

In [15]:
# We normalize to imagenet mean for the data (https://stackoverflow.com/questions/58151507/why-pytorch-officially-use-mean-0-485-0-456-0-406-and-std-0-229-0-224-0-2)
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.RandomResizedCrop(size=(224, 224)),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),
    'val': transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        transforms.Resize([224, 224])]),
    'test': transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        transforms.Resize([224, 224])])}

In [16]:
data_dirs = ["/home/shivaram/DS/Projects/FishID/data/model_data/is_fish_scientific/", "/home/shivaram/DS/Projects/FishID/data/model_data/is_fish_common/", "/home/shivaram/DS/Projects/FishID/data/model_data/is_fish_mixed/"]
diff_image_datasets = {dd:{x: datasets.ImageFolder(os.path.join(dd, x),
                                          data_transforms[x])
                  for x in ['train', 'val', 'test']} for dd in data_dirs}

In [28]:
image_datasets = diff_image_datasets["/home/shivaram/DS/Projects/FishID/data/model_data/is_fish_scientific/"]

In [30]:
weighted_samplers = {}
for subset in ["train", "val", "test"]:
    target = image_datasets[subset].targets
    
    if True:#subset == "train":
        class_sample_count =np.array([ len(np.where(target == t)[0]) for t in np.unique(target)])
        weight = 1. / class_sample_count
        samples_weight = np.array([weight[t] for t in target])
        samples_weight = torch.from_numpy(samples_weight)
        epoch_samples = 2560#len(samples_weight)
        sampler = data.WeightedRandomSampler(samples_weight, epoch_samples)
        weighted_samplers[subset] = sampler
    else:
        sampler = data.RandomSampler(image_datasets[subset])
        weighted_samplers[subset] = sampler
    


dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], sampler = weighted_samplers[x], 
                                              batch_size=batch_size, num_workers=4)
              for x in ['train', 'val', 'test']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val', 'test']}
class_names = image_datasets['train'].classes

In [31]:
# Load already trained model
model_ft =  models.convnext_tiny(pretrained = True)
for param in model_ft.parameters():
    param.requires_grad = False
for param in model_ft.classifier.parameters():
    param.requires_grad = True
for param in model_ft.avgpool.parameters():
    param.requires_grad = True
for param in model_ft.features[7].parameters():
    param.requires_grad = True
#Unfreeze the next convolutional block
for param in model_ft.features[6].parameters():
    param.requires_grad = True
    
# Create Last Layer
num_ftrs = model_ft.classifier[2].in_features
model_ft.classifier[2] = nn.Linear(num_ftrs, species_count)
model_ft = model_ft.to(device)

criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.01, momentum=0.9)

In [37]:
conv_path = "models/92_classifier/ct_fp2ad_random_crop.pt"
exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer_ft, factor = .2, patience = 5)
build_from_path(model_ft, conv_path, train_batch=8, test_batch=32, decay = True)

test Loss: 0.9154 Acc: 0.8045


### Common

In [38]:
image_datasets = diff_image_datasets["/home/shivaram/DS/Projects/FishID/data/model_data/is_fish_common/"]

In [39]:
weighted_samplers = {}
for subset in ["train", "val", "test"]:
    target = image_datasets[subset].targets
    
    if True:#subset == "train":
        class_sample_count =np.array([ len(np.where(target == t)[0]) for t in np.unique(target)])
        weight = 1. / class_sample_count
        samples_weight = np.array([weight[t] for t in target])
        samples_weight = torch.from_numpy(samples_weight)
        epoch_samples = len(samples_weight)
        sampler = data.WeightedRandomSampler(samples_weight, epoch_samples)
        weighted_samplers[subset] = sampler
    else:
        sampler = data.RandomSampler(image_datasets[subset])
        weighted_samplers[subset] = sampler
    


dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], sampler = weighted_samplers[x], 
                                              batch_size=batch_size, num_workers=4)
              for x in ['train', 'val', 'test']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val', 'test']}
class_names = image_datasets['train'].classes

In [40]:
# Load already trained model
model_ft =  models.convnext_tiny(pretrained = True)
for param in model_ft.parameters():
    param.requires_grad = False
for param in model_ft.classifier.parameters():
    param.requires_grad = True
for param in model_ft.avgpool.parameters():
    param.requires_grad = True
for param in model_ft.features[7].parameters():
    param.requires_grad = True
#Unfreeze the next convolutional block
for param in model_ft.features[6].parameters():
    param.requires_grad = True
    
# Create Last Layer
num_ftrs = model_ft.classifier[2].in_features
model_ft.classifier[2] = nn.Linear(num_ftrs, species_count)
model_ft = model_ft.to(device)

criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.01, momentum=0.9)

In [41]:
conv_path = "models/92_classifier/ct_fp2ad_common.pt"
exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer_ft, factor = .2, patience = 5)
build_from_path(model_ft, conv_path, train_batch=8, test_batch=32, decay = True)

Epoch 1/30
----------
train Loss: 2.0278 Acc: 0.4943
val Loss: 1.2706 Acc: 0.6500

Epoch 2/30
----------
train Loss: 1.1480 Acc: 0.7038
val Loss: 1.0384 Acc: 0.7198

Epoch 3/30
----------
train Loss: 0.8629 Acc: 0.7733
val Loss: 0.9458 Acc: 0.7374

Epoch 4/30
----------
train Loss: 0.6925 Acc: 0.8175
val Loss: 1.0147 Acc: 0.7408

Epoch 5/30
----------
train Loss: 0.5393 Acc: 0.8558
val Loss: 0.9460 Acc: 0.7572

Epoch 6/30
----------
train Loss: 0.4410 Acc: 0.8807
val Loss: 0.9844 Acc: 0.7561

Epoch 7/30
----------
train Loss: 0.3489 Acc: 0.9050
val Loss: 1.0599 Acc: 0.7805

Epoch 8/30
----------
train Loss: 0.2982 Acc: 0.9199
val Loss: 1.1262 Acc: 0.7499

Epoch 9/30
----------
train Loss: 0.2612 Acc: 0.9294
val Loss: 0.9631 Acc: 0.7794

Epoch 10/30
----------
train Loss: 0.1683 Acc: 0.9537
val Loss: 0.9246 Acc: 0.7867

Epoch 11/30
----------
train Loss: 0.1389 Acc: 0.9605
val Loss: 0.9687 Acc: 0.7771

Epoch 12/30
----------
train Loss: 0.1198 Acc: 0.9653
val Loss: 0.9808 Acc: 0.7737

E

In [42]:
build_from_path(model_ft, conv_path, train_batch=8, test_batch=32, decay = True)

test Loss: 0.9042 Acc: 0.8050


### Mixed

In [17]:
image_datasets = diff_image_datasets["/home/shivaram/DS/Projects/FishID/data/model_data/is_fish_mixed/"]

In [18]:
weighted_samplers = {}
for subset in ["train", "val", "test"]:
    target = image_datasets[subset].targets
    
    if True:#subset == "train":
        class_sample_count =np.array([ len(np.where(target == t)[0]) for t in np.unique(target)])
        weight = 1. / class_sample_count
        samples_weight = np.array([weight[t] for t in target])
        samples_weight = torch.from_numpy(samples_weight)
        epoch_samples = len(samples_weight)
        sampler = data.WeightedRandomSampler(samples_weight, epoch_samples)
        weighted_samplers[subset] = sampler
    else:
        sampler = data.RandomSampler(image_datasets[subset])
        weighted_samplers[subset] = sampler
    


dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], sampler = weighted_samplers[x], 
                                              batch_size=batch_size, num_workers=4)
              for x in ['train', 'val', 'test']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val', 'test']}
class_names = image_datasets['train'].classes

In [19]:
# Load already trained model
model_ft =  models.convnext_tiny(pretrained = True)
for param in model_ft.parameters():
    param.requires_grad = False
for param in model_ft.classifier.parameters():
    param.requires_grad = True
for param in model_ft.avgpool.parameters():
    param.requires_grad = True
for param in model_ft.features[7].parameters():
    param.requires_grad = True
#Unfreeze the next convolutional block
for param in model_ft.features[6].parameters():
    param.requires_grad = True
    
# Create Last Layer
num_ftrs = model_ft.classifier[2].in_features
model_ft.classifier[2] = nn.Linear(num_ftrs, species_count)
model_ft = model_ft.to(device)

criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.01, momentum=0.9)

In [20]:
conv_path = "models/92_classifier/ct_fp2ad_mixed.pt"
exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer_ft, factor = .2, patience = 10)
build_from_path(model_ft, conv_path, train_batch=8, test_batch=32, decay = True, epochs = 200, early_stop = 30)

Epoch 1/200
----------
train Loss: 1.0990 Acc: 0.2720
val Loss: 1.1849 Acc: 0.6988

Epoch 2/200
----------
train Loss: 0.6199 Acc: 0.3863
val Loss: 1.0847 Acc: 0.7050

Epoch 3/200
----------
train Loss: 0.4900 Acc: 0.4177
val Loss: 0.9415 Acc: 0.7425

Epoch 4/200
----------
train Loss: 0.3692 Acc: 0.4473
val Loss: 0.9718 Acc: 0.7629

Epoch 5/200
----------
train Loss: 0.2837 Acc: 0.4692
val Loss: 1.0345 Acc: 0.7436

Epoch 6/200
----------
train Loss: 0.2442 Acc: 0.4799
val Loss: 1.0146 Acc: 0.7533

Epoch 7/200
----------
train Loss: 0.1993 Acc: 0.4926
val Loss: 0.9447 Acc: 0.7805

Epoch 8/200
----------
train Loss: 0.1711 Acc: 0.4997
val Loss: 0.9685 Acc: 0.7652

Epoch 9/200
----------
train Loss: 0.1437 Acc: 0.5072
val Loss: 0.9601 Acc: 0.7788

Epoch 10/200
----------
train Loss: 0.1230 Acc: 0.5128
val Loss: 1.0572 Acc: 0.7731

Epoch 11/200
----------
train Loss: 0.1154 Acc: 0.5150
val Loss: 0.9641 Acc: 0.7760

Epoch 12/200
----------
train Loss: 0.1012 Acc: 0.5181
val Loss: 1.0127 Ac

In [21]:
build_from_path(model_ft, conv_path, train_batch=8, test_batch=32, decay = True)

test Loss: 1.0090 Acc: 0.7916


## Exploring Mixed in Greater Depth

In [24]:
# Load already trained model
model_ft =  models.convnext_tiny(pretrained = True)
for param in model_ft.parameters():
    param.requires_grad = False
for param in model_ft.classifier.parameters():
    param.requires_grad = True
for param in model_ft.avgpool.parameters():
    param.requires_grad = True
for param in model_ft.features[7].parameters():
    param.requires_grad = True
#Unfreeze the next convolutional block
for param in model_ft.features[6].parameters():
    param.requires_grad = True
for param in model_ft.features[5].parameters():
    param.requires_grad = True
    
# Create Last Layer
num_ftrs = model_ft.classifier[2].in_features
model_ft.classifier[2] = nn.Linear(num_ftrs, species_count)
model_ft = model_ft.to(device)

criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.01, momentum=0.9)

In [25]:
conv_path = "models/92_classifier/ct_fpd_mixed_.pt"
exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer_ft, factor = .2, patience = 5)
build_from_path(model_ft, conv_path, train_batch=8, test_batch=32, decay = True, epochs = 30, early_stop = 15)

Epoch 1/30
----------


KeyboardInterrupt: 

In [None]:
build_from_path(model_ft, conv_path, train_batch=8, test_batch=32, decay = True)