In [1]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import time
import copy


import torch
import torchvision.models as models
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.datasets as ds
from torchvision import models, transforms, utils, datasets
from torch.utils import data
from torchsummary import summary
from torch.optim import lr_scheduler

# Data Read-In and Loading

In [2]:
fish_df = pd.read_csv("data/fish_df.csv")

is_fish_df = pd.read_csv("data/is_fish.csv").iloc[:,1:]
is_fish_df["local_paths"] = is_fish_df["Species"].astype(str) + "/" + is_fish_df["Filename"]
path_set = set(is_fish_df["local_paths"])

In [3]:
try:
    acc_df = pd.read_csv("data/accuracies.csv")
except:
    acc_df = pd.DataFrame()

In [4]:
species_count = 92 #len(is_fish_df["Species"].unique())

In [5]:
# We normalize to imagenet mean for the data (https://stackoverflow.com/questions/58151507/why-pytorch-officially-use-mean-0-485-0-456-0-406-and-std-0-229-0-224-0-2)
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        transforms.Resize([224, 224])]),
    'val': transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        transforms.Resize([224, 224])]),
    'test': transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        transforms.Resize([224, 224])])}

In [6]:
data_dirs = ["/home/shivaram/DS/Projects/FishID/data/model_data/is_fish_scientific/", "/home/shivaram/DS/Projects/FishID/data/model_data/is_fish_common/", "/home/shivaram/DS/Projects/FishID/data/model_data/is_fish_mixed/"]
diff_image_datasets = {dd:{x: datasets.ImageFolder(os.path.join(dd, x),
                                          data_transforms[x])
                  for x in ['train', 'val', 'test']} for dd in data_dirs}

In [7]:
image_datasets = diff_image_datasets["/home/shivaram/DS/Projects/FishID/data/model_data/is_fish_scientific/"]

In [8]:
batch_size = 64#8#32#64
#epoch_samples = 2560# len(samples_weight)

In [9]:
weighted_samplers = {}
for subset in ["train", "val", "test"]:
    target = image_datasets[subset].targets
    
    if subset != "test":
        class_sample_count =np.array([ len(np.where(target == t)[0]) for t in np.unique(target)])
        weight = 1. / class_sample_count
        samples_weight = np.array([weight[t] for t in target])
        samples_weight = torch.from_numpy(samples_weight)
        epoch_samples = len(samples_weight)

        sampler = data.WeightedRandomSampler(samples_weight, epoch_samples)
        weighted_samplers[subset] = sampler
    else:
        sampler = data.SequentialSampler(image_datasets[subset])
        weighted_samplers[subset] = sampler
    


dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], sampler = weighted_samplers[x], 
                                              batch_size=batch_size, num_workers=4)
              for x in ['train', 'val', 'test']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val', 'test']}
class_names = image_datasets['train'].classes

# Model Training and Evaluation Methods

In [10]:
def train_model(dataloaders, model, criterion, optimizer, scheduler, num_epochs=25, verbose = True, plateau = False, early_stop = 15):
    since = time.time()
    
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    epoch_accs = []
    
    curr_val_acc = 0
    stop_count = 0
    

    for epoch in range(num_epochs):
        epa = {"Epoch": epoch}
        
        if verbose:
            print(f'Epoch {epoch + 1}/{num_epochs}')
            print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]
            
            if not plateau and phase == 'train':
                scheduler.step()
            elif plateau and phase == 'val':
                scheduler.step(epoch_loss)

            
            epa[f'{phase} Loss'] = float(epoch_loss)
            epa[f'{phase} Accuracy'] = float(epoch_acc)
            if verbose:
                print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
                
                
            if phase == 'val':
                if curr_val_acc < epoch_acc:
                    curr_val_acc = epoch_acc
                    stop_count = 0
                else:
                    stop_count += 1
                

        if verbose:
            print()
            
        epoch_accs.append(epa)
        
        if stop_count >= early_stop:
            print(f"No Validation Accuracy decrease over the last {early_stop} epochs. Stopping training...")
            break

    time_elapsed = time.time() - since
    if verbose:
        print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
        print(f'Best val Acc: {best_acc:4f}')

    # load best model weights
    model.load_state_dict(best_model_wts)
    epoch_accs = pd.DataFrame(epoch_accs)
    return model, best_acc, epoch_accs


In [11]:
def test_model(model, criterion, phase = "test", verbose = True):
    model.eval()
    running_loss = 0.0
    running_corrects = 0
    ts = 0
    
    for inputs, labels in dataloaders[phase]:
        inputs = inputs.to(device)
        labels = labels.to(device)

        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        loss = criterion(outputs, labels)
        
        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)
        ts += len(preds)

    epoch_loss = running_loss / ts
    epoch_acc = running_corrects.double() / ts

    if verbose:
        print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')
        
    return epoch_acc

In [12]:
def train_test_save_model(save_path, dataloaders, model, criterion, optimizer, scheduler, num_epochs = 25, verbose = True, plateau = False, test = False, early_stop = 10):
    
    trained_model, val_acc, epoch_accs = train_model(dataloaders, model, criterion, optimizer, scheduler, num_epochs, verbose, plateau, early_stop = early_stop) 
    torch.save(model.state_dict(), save_path)
    if test:
        test_acc = test_model(trained_model, criterion)
    else:
        test_acc = -1
    if verbose:
        print(f"Val Accuracy: {val_acc}")
        print(f"Test Accuracy: {test_acc}")        

    return trained_model, val_acc, test_acc, epoch_accs

In [13]:
def build_from_path(model_ft, path, image_datasets=image_datasets, weighted_samplers = weighted_samplers, decay = False, train_batch=64, test_batch=64, epochs = 30, early_stop = 10): #criterion = criterion, optimizer_ft = optimizer_ft, exp_lr_scheduler = exp_lr_scheduler, 
    if os.path.exists(path):
        dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], sampler = weighted_samplers[x], batch_size=test_batch, num_workers=4) for x in ['train', 'val', 'test']}
        model_ft.load_state_dict(torch.load(path))
        test_model(model_ft, criterion)
    else:
        dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], sampler = weighted_samplers[x], batch_size=train_batch, num_workers=4) for x in ['train', 'val', 'test']}
        if not decay:
            model_ft, val_acc, test_acc, epoch_accs = train_test_save_model(path, dataloaders, model_ft, criterion, optimizer_ft, exp_lr_scheduler, epochs, True, early_stop = early_stop)
        else:
            model_ft, val_acc, test_acc, epoch_accs = train_test_save_model(path, dataloaders, model_ft, criterion, optimizer_ft, exp_lr_scheduler, epochs, True, True, early_stop = early_stop)            

In [14]:
def test_model_k(model, criterion, phase = "test", verbose = True, k = 5):
    model.eval()
    running_loss = 0.0
    running_corrects = 0
    ts = 0
    if k > 1:
        print(f"Evaluating Top {k} Accuracy...")
    
    for inputs, labels in dataloaders[phase]:
        inputs = inputs.to(device)
        labels = labels.to(device)

        outputs = model(inputs)
        item_ind = 0
        
        for ind_preds in outputs:
            item_class_rankings = sorted(range(92), key=lambda k: ind_preds[k], reverse = True)
            correct_label = labels.data[item_ind]
            rank = item_class_rankings.index(correct_label)
            item_ind += 1
            if rank < k:
                running_corrects += 1
            
        _, preds = torch.max(outputs, 1)
        loss = criterion(outputs, labels)
        
        running_loss += loss.item() * inputs.size(0)
        #running_corrects += torch.sum(preds == labels.data)
        ts += len(preds)

    epoch_loss = running_loss / ts
    epoch_acc = running_corrects / ts

    if verbose:
        print(f'{k} Acc: {epoch_acc:.4f}')
        
    return epoch_acc

In [15]:
# setting device on GPU if available, else CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)
print()

#Additional Info when using cuda
if device.type == 'cuda':
    print(torch.cuda.get_device_name(0))
    print('Memory Usage:')
    print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3,1), 'GB')
    print('Cached:   ', round(torch.cuda.memory_reserved(0)/1024**3,1), 'GB')

Using device: cuda

NVIDIA GeForce RTX 3060 Ti
Memory Usage:
Allocated: 0.0 GB
Cached:    0.0 GB


# Determining the Best Transfer Model

## No Frozen Layers
### ResNet18 

In [31]:
resnet18_path = "models/92_classifier/resnet18.pt"

model_ft = models.resnet18(pretrained=True)
#for param in model_ft.parameters():
#    param.requires_grad = False
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, species_count)
model_ft = model_ft.to(device)
criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.1, momentum=0.9)

In [33]:
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
build_from_path(model_ft, resnet18_path, train_batch=128, test_batch=64)

test Loss: 2.0053 Acc: 0.6368


Decaying Learning Rate

In [34]:
exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer_ft, factor = .2, patience = 5)
resnet18_path = "models/92_classifier/resnet18_decay.pt"
build_from_path(model_ft, resnet18_path, train_batch=128, test_batch=64)

test Loss: 2.2803 Acc: 0.6418


### Efficient Net B0
Batch size for train 32 and batch size for eval 8

In [17]:
efficientnetb0_path = "models/92_classifier/efficient_netb0.pt"

model_ft =  models.efficientnet_b0(pretrained = True)
#for param in model_ft.parameters():
#    param.requires_grad = False
num_ftrs = model_ft.classifier[1].in_features
model_ft.fc = nn.Linear(num_ftrs, species_count)
model_ft = model_ft.to(device)
criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.1, momentum=0.9)

In [18]:
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

if os.path.exists(efficientnetb0_path):
    dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], sampler = weighted_samplers[x], batch_size=8, num_workers=4) for x in ['train', 'val', 'test']}
    model_ft.load_state_dict(torch.load(efficientnetb0_path))
    test_model(model_ft, criterion)
else:
    dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], sampler = weighted_samplers[x], batch_size=32, num_workers=4) for x in ['train', 'val', 'test']}
    model_ft, val_acc, test_acc, epoch_accs = train_test_save_model(efficientnetb0_path, dataloaders, model_ft, criterion, optimizer_ft, exp_lr_scheduler, 30, True)

test Loss: 1.6630 Acc: 0.6657


Decaying Learning Rate

In [22]:
exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer_ft, factor = .2, patience = 5)
efficientnetb0_path = "models/92_classifier/efficient_netb0_decay.pt"
if os.path.exists(efficientnetb0_path):
    dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], sampler = weighted_samplers[x], batch_size=8, num_workers=4) for x in ['train', 'val', 'test']}
    model_ft.load_state_dict(torch.load(efficientnetb0_path))
    test_model(model_ft, criterion)
else:
    dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], sampler = weighted_samplers[x], batch_size=32, num_workers=4) for x in ['train', 'val', 'test']}
    model_ft, val_acc, test_acc, epoch_accs = train_test_save_model(efficientnetb0_path, dataloaders, model_ft, criterion, optimizer_ft, exp_lr_scheduler, 30, True, True)

test Loss: 2.0763 Acc: 0.6568


### Convnet Tiny

In [18]:
conv_path = "models/92_classifier/conv_tiny.pt"

model_ft =  models.convnext_tiny(pretrained = True)
#for param in model_ft.parameters():
#    param.requires_grad = False
num_ftrs = model_ft.classifier[2].in_features
model_ft.fc = nn.Linear(num_ftrs, species_count)
model_ft = model_ft.to(device)
criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.01, momentum=0.9)

In [19]:
# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

In [20]:
if os.path.exists(conv_path):
    dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], sampler = weighted_samplers[x], batch_size=8, num_workers=4) for x in ['train', 'val', 'test']}
    model_ft.load_state_dict(torch.load(conv_path))
    test_model(model_ft, criterion)
else:
    dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], sampler = weighted_samplers[x], batch_size=32, num_workers=4) for x in ['train', 'val', 'test']}
    model_ft, val_acc, test_acc, epoch_accs = train_test_save_model(conv_path, dataloaders, model_ft, criterion, optimizer_ft, exp_lr_scheduler, 30, True, True)

test Loss: 1.5263 Acc: 0.6886


Decaying Learning Rate

In [25]:
exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer_ft, factor = .2, patience = 7)
conv_path = "models/92_classifier/conv_tiny_decay.pt"

In [17]:
if os.path.exists(conv_path):
    dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], sampler = weighted_samplers[x], batch_size=8, num_workers=4) for x in ['train', 'val', 'test']}
    model_ft.load_state_dict(torch.load(conv_path))
    test_model(model_ft, criterion)
else:
    dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], sampler = weighted_samplers[x], batch_size=32, num_workers=4) for x in ['train', 'val', 'test']}
    model_ft, val_acc, test_acc, epoch_accs = train_test_save_model(conv_path, dataloaders, model_ft, criterion, optimizer_ft, exp_lr_scheduler, 30, True, True)

test Loss: 1.5263 Acc: 0.6886


# All But Last Layer Frozen
### ResNet18

In [23]:
resnet18_path = "models/92_classifier/rn18_frozen.pt"

model_ft = models.resnet18(pretrained=True)
for param in model_ft.parameters():
    param.requires_grad = False
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, species_count)
model_ft = model_ft.to(device)
criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.1, momentum=0.9)
# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

In [24]:
build_from_path(model_ft, resnet18_path, train_batch=32, test_batch=8)

test Loss: 15.0800 Acc: 0.4139


### Efficient Net B0

In [21]:
efficientnetb0_path = "models/92_classifier/en_netb0_frozen_decay.pt"

model_ft =  models.efficientnet_b0(pretrained = True)

for param in model_ft.parameters():
    param.requires_grad = False

    
num_ftrs = model_ft.classifier[1].in_features
model_ft.classifier[1] = nn.Linear(num_ftrs, species_count)

model_ft = model_ft.to(device)
criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.1, momentum=0.9)
# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
#exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer_ft, factor = .2, patience = 10)

In [22]:
build_from_path(model_ft, efficientnetb0_path, train_batch=32, test_batch=8)

test Loss: 2.7616 Acc: 0.4886


### Convnet Tiny

In [19]:
conv_path = "models/92_classifier/conv_tiny_frozen.pt"

model_ft =  models.convnext_tiny(pretrained = True)

for param in model_ft.parameters():
    param.requires_grad = False
num_ftrs = model_ft.classifier[2].in_features

# Greate Last Layer
model_ft.classifier[2] = nn.Linear(num_ftrs, species_count)

model_ft = model_ft.to(device)

criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.01, momentum=0.9)
# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
#exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer_ft, factor = .2, patience = 3)

In [20]:
build_from_path(model_ft, conv_path, train_batch=32, test_batch=8)

test Loss: 1.6143 Acc: 0.5877


# Several Layers UnFrozen
### Resnet18

In [16]:
resnet18_path = "models/92_classifier/rn18_frozen_partial.pt"

model_ft = models.resnet18(pretrained=True)
for param in model_ft.parameters():
    param.requires_grad = False
for param in model_ft.layer4.parameters():
    param.requires_grad = True
    
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, species_count)
model_ft = model_ft.to(device)
criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.1, momentum=0.9)
# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

In [18]:
build_from_path(model_ft, resnet18_path, train_batch=128, test_batch=64)

test Loss: 2.1516 Acc: 0.6390


### Efficient Net

In [31]:
efficientnetb0_path = "models/92_classifier/en_netb0_frozen_partial.pt"

model_ft =  models.efficientnet_b0(pretrained = True)

for param in model_ft.parameters():
    param.requires_grad = False
for param in model_ft.classifier.parameters():
    param.requires_grad = True
for param in model_ft.avgpool.parameters():
    param.requires_grad = True
for param in model_ft.features[7].parameters():
    param.requires_grad = True
    
# Create Last Layer
num_ftrs = model_ft.classifier[1].in_features
model_ft.classifier[1] = nn.Linear(num_ftrs, species_count)

model_ft = model_ft.to(device)
criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.1, momentum=0.9)
# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

In [32]:
build_from_path(model_ft, efficientnetb0_path, train_batch=8, test_batch=32)

test Loss: 2.0592 Acc: 0.6039


### Convnet Tiny

In [33]:
model_ft =  models.convnext_tiny(pretrained = True)
for param in model_ft.parameters():
    param.requires_grad = False
for param in model_ft.classifier.parameters():
    param.requires_grad = True
for param in model_ft.avgpool.parameters():
    param.requires_grad = True
for param in model_ft.features[7].parameters():
    param.requires_grad = True
    
# Create Last Layer
num_ftrs = model_ft.classifier[2].in_features
model_ft.classifier[2] = nn.Linear(num_ftrs, species_count)
model_ft = model_ft.to(device)

criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.01, momentum=0.9)

In [34]:
conv_path = "models/92_classifier/conv_tiny_frozen_partial.pt"
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
build_from_path(model_ft, conv_path, train_batch=8, test_batch=32)

test Loss: 1.2974 Acc: 0.7086


In [35]:
conv_path = "models/92_classifier/conv_tiny_frozen_partial_decay.pt"
exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer_ft, factor = .2, patience = 5)
build_from_path(model_ft, conv_path, train_batch=8, test_batch=32, decay = True)

test Loss: 1.2989 Acc: 0.7231


# Freezing Variations with Convnet Tiny

In [36]:
# Load already trained model
model_ft =  models.convnext_tiny(pretrained = True)
for param in model_ft.parameters():
    param.requires_grad = False
for param in model_ft.classifier.parameters():
    param.requires_grad = True
for param in model_ft.avgpool.parameters():
    param.requires_grad = True
for param in model_ft.features[7].parameters():
    param.requires_grad = True
# Create Last Layer
num_ftrs = model_ft.classifier[2].in_features
model_ft.classifier[2] = nn.Linear(num_ftrs, species_count)
model_ft = model_ft.to(device)

model_ft.load_state_dict(torch.load("models/92_classifier/conv_tiny_frozen_partial_decay.pt"))

<All keys matched successfully>

### 6th Block+ Unfrozen

In [37]:
#Unfreeze the next convolutional block
for param in model_ft.features[6].parameters():
    param.requires_grad = True

In [38]:
criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

conv_path = "models/92_classifier/conv_tiny_frozen_partial2_decay.pt"

exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer_ft, factor = .2, patience = 5)
build_from_path(model_ft, conv_path, train_batch=8, test_batch=32, decay = True)

test Loss: 1.2989 Acc: 0.7231


### 5th Block+ Unfrozen

In [16]:
# Load already trained model
model_ft =  models.convnext_tiny(pretrained = True)
for param in model_ft.parameters():
    param.requires_grad = False
for param in model_ft.classifier.parameters():
    param.requires_grad = True
for param in model_ft.avgpool.parameters():
    param.requires_grad = True
for param in model_ft.features[7].parameters():
    param.requires_grad = True
for param in model_ft.features[6].parameters():
    param.requires_grad = True
for param in model_ft.features[5].parameters():
    param.requires_grad = True
# Create Last Layer
num_ftrs = model_ft.classifier[2].in_features
model_ft.classifier[2] = nn.Linear(num_ftrs, species_count)
model_ft = model_ft.to(device)

model_ft.load_state_dict(torch.load("models/92_classifier/conv_tiny_frozen_partial2_decay.pt"))

<All keys matched successfully>

In [17]:
criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

conv_path = "models/92_classifier/conv_tiny_frozen_partial3_decay.pt"

exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer_ft, factor = .2, patience = 5)
build_from_path(model_ft, conv_path, train_batch=8, test_batch=32, decay = True)

test Loss: 1.3248 Acc: 0.7231


### 6th Block+ Unfrozen from the Outset

In [18]:
# Load already trained model
model_ft =  models.convnext_tiny(pretrained = True)
for param in model_ft.parameters():
    param.requires_grad = False
for param in model_ft.classifier.parameters():
    param.requires_grad = True
for param in model_ft.avgpool.parameters():
    param.requires_grad = True
for param in model_ft.features[7].parameters():
    param.requires_grad = True
#Unfreeze the next convolutional block
for param in model_ft.features[6].parameters():
    param.requires_grad = True
    
# Create Last Layer
num_ftrs = model_ft.classifier[2].in_features
model_ft.classifier[2] = nn.Linear(num_ftrs, species_count)
model_ft = model_ft.to(device)

criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.01, momentum=0.9)

In [19]:
conv_path = "models/92_classifier/conv_tiny_frozen_partial2a_decay.pt"
exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer_ft, factor = .2, patience = 5)
build_from_path(model_ft, conv_path, train_batch=8, test_batch=32, decay = True)

test Loss: 1.0332 Acc: 0.7738


# Combatting Overfitting
### Ridge Regression
https://stackoverflow.com/questions/42704283/adding-l1-l2-regularization-in-pytorch

In [20]:
conv_path = "models/92_classifier/ctf_p2ad_dropout.pt"
exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer_ft, factor = .2, patience = 5)
for wd in [0.0002, 0.00007]:#[0.0005, 0.0003, 0.0001, 0.00005, 0.00001]:
    torch.cuda.empty_cache()
    print(f"WD = {wd}")
    try:
        conv_path_s = "models/92_classifier/ctf_p2ad_dropout_" + str(str(wd).split(".")[1] )+ ".pt"
    except:
         conv_path_s = "models/92_classifier/ctf_p2ad_dropout_" + str(wd)+ ".pt"
    
    # Load already trained model
    model_ft =  models.convnext_tiny(pretrained = True)
    for param in model_ft.parameters():
        param.requires_grad = False
    for param in model_ft.classifier.parameters():
        param.requires_grad = True
    for param in model_ft.avgpool.parameters():
        param.requires_grad = True
    for param in model_ft.features[7].parameters():
        param.requires_grad = True
    #Unfreeze the next convolutional block
    for param in model_ft.features[6].parameters():
        param.requires_grad = True



    # Create Last Layer
    num_ftrs = model_ft.classifier[2].in_features
    model_ft.classifier[2] = nn.Linear(num_ftrs, species_count)
    model_ft = model_ft.to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.01, momentum=0.9, weight_decay=wd)
    
    build_from_path(model_ft, conv_path_s, train_batch=8, test_batch=32, decay = True)


WD = 0.0002
test Loss: 0.9992 Acc: 0.7666
WD = 7e-05
test Loss: 1.1088 Acc: 0.7604


# Different Training Sets
### Add Random Crops
- https://journalofbigdata.springeropen.com/articles/10.1186/s40537-019-0197-0 
- https://journalofbigdata.springeropen.com/articles/10.1186/s40537-019-0197-0/tables/1

In [26]:
# We normalize to imagenet mean for the data (https://stackoverflow.com/questions/58151507/why-pytorch-officially-use-mean-0-485-0-456-0-406-and-std-0-229-0-224-0-2)
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.RandomResizedCrop(size=(224, 224)),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),
    'val': transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        transforms.Resize([224, 224])]),
    'test': transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        transforms.Resize([224, 224])])}

In [27]:
data_dirs = ["/home/shivaram/DS/Projects/FishID/data/model_data/is_fish_scientific/", "/home/shivaram/DS/Projects/FishID/data/model_data/is_fish_common/", "/home/shivaram/DS/Projects/FishID/data/model_data/is_fish_mixed/"]
diff_image_datasets = {dd:{x: datasets.ImageFolder(os.path.join(dd, x),
                                          data_transforms[x])
                  for x in ['train', 'val', 'test']} for dd in data_dirs}

In [28]:
image_datasets = diff_image_datasets["/home/shivaram/DS/Projects/FishID/data/model_data/is_fish_scientific/"]

In [29]:
weighted_samplers = {}
for subset in ["train", "val", "test"]:
    target = image_datasets[subset].targets
    
    if True:#subset == "train":
        class_sample_count =np.array([ len(np.where(target == t)[0]) for t in np.unique(target)])
        weight = 1. / class_sample_count
        samples_weight = np.array([weight[t] for t in target])
        samples_weight = torch.from_numpy(samples_weight)
        samples = 2048#len(samples_weight)
        sampler = data.WeightedRandomSampler(samples_weight,samples)
        weighted_samplers[subset] = sampler
    else:
        sampler = data.RandomSampler(image_datasets[subset])
        weighted_samplers[subset] = sampler
    


dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], sampler = weighted_samplers[x], 
                                              batch_size=batch_size, num_workers=4)
              for x in ['train', 'val', 'test']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val', 'test']}
class_names = image_datasets['train'].classes

In [30]:
# Load already trained model
model_ft =  models.convnext_tiny(pretrained = True)
for param in model_ft.parameters():
    param.requires_grad = False
for param in model_ft.classifier.parameters():
    param.requires_grad = True
for param in model_ft.avgpool.parameters():
    param.requires_grad = True
for param in model_ft.features[7].parameters():
    param.requires_grad = True
#Unfreeze the next convolutional block
for param in model_ft.features[6].parameters():
    param.requires_grad = True
    
# Create Last Layer
num_ftrs = model_ft.classifier[2].in_features
model_ft.classifier[2] = nn.Linear(num_ftrs, species_count)
model_ft = model_ft.to(device)

criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.01, momentum=0.9)

In [31]:
conv_path = "models/92_classifier/ct_fp2ad_random_crop.pt"
exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer_ft, factor = .2, patience = 5)
build_from_path(model_ft, conv_path, train_batch=8, test_batch=32, decay = True)

test Loss: 0.9069 Acc: 0.7939


### Common

In [32]:
image_datasets = diff_image_datasets["/home/shivaram/DS/Projects/FishID/data/model_data/is_fish_common/"]

In [33]:
weighted_samplers = {}
for subset in ["train", "val", "test"]:
    target = image_datasets[subset].targets
    
    if True:#subset == "train":
        class_sample_count =np.array([ len(np.where(target == t)[0]) for t in np.unique(target)])
        weight = 1. / class_sample_count
        samples_weight = np.array([weight[t] for t in target])
        samples_weight = torch.from_numpy(samples_weight)
        epoch_samples = len(samples_weight)
        sampler = data.WeightedRandomSampler(samples_weight, epoch_samples)
        weighted_samplers[subset] = sampler
    else:
        sampler = data.RandomSampler(image_datasets[subset])
        weighted_samplers[subset] = sampler
    


dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], sampler = weighted_samplers[x], 
                                              batch_size=batch_size, num_workers=4)
              for x in ['train', 'val', 'test']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val', 'test']}
class_names = image_datasets['train'].classes

In [34]:
# Load already trained model
model_ft =  models.convnext_tiny(pretrained = True)
for param in model_ft.parameters():
    param.requires_grad = False
for param in model_ft.classifier.parameters():
    param.requires_grad = True
for param in model_ft.avgpool.parameters():
    param.requires_grad = True
for param in model_ft.features[7].parameters():
    param.requires_grad = True
#Unfreeze the next convolutional block
for param in model_ft.features[6].parameters():
    param.requires_grad = True
    
# Create Last Layer
num_ftrs = model_ft.classifier[2].in_features
model_ft.classifier[2] = nn.Linear(num_ftrs, species_count)
model_ft = model_ft.to(device)

criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.01, momentum=0.9)

In [35]:
conv_path = "models/92_classifier/ct_fp2ad_common.pt"
exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer_ft, factor = .2, patience = 5)
build_from_path(model_ft, conv_path, train_batch=8, test_batch=32, decay = True)

test Loss: 0.9146 Acc: 0.8033


### Mixed

In [36]:
image_datasets = diff_image_datasets["/home/shivaram/DS/Projects/FishID/data/model_data/is_fish_mixed/"]

In [37]:
weighted_samplers = {}
for subset in ["train", "val", "test"]:
    target = image_datasets[subset].targets
    
    if True:#subset == "train":
        class_sample_count =np.array([ len(np.where(target == t)[0]) for t in np.unique(target)])
        weight = 1. / class_sample_count
        samples_weight = np.array([weight[t] for t in target])
        samples_weight = torch.from_numpy(samples_weight)
        epoch_samples = len(samples_weight)
        sampler = data.WeightedRandomSampler(samples_weight, epoch_samples)
        weighted_samplers[subset] = sampler
    else:
        sampler = data.RandomSampler(image_datasets[subset])
        weighted_samplers[subset] = sampler
    


dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], sampler = weighted_samplers[x], 
                                              batch_size=batch_size, num_workers=4)
              for x in ['train', 'val', 'test']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val', 'test']}
class_names = image_datasets['train'].classes

In [38]:
# Load already trained model
model_ft =  models.convnext_tiny(pretrained = True)
for param in model_ft.parameters():
    param.requires_grad = False
for param in model_ft.classifier.parameters():
    param.requires_grad = True
for param in model_ft.avgpool.parameters():
    param.requires_grad = True
for param in model_ft.features[7].parameters():
    param.requires_grad = True
#Unfreeze the next convolutional block
for param in model_ft.features[6].parameters():
    param.requires_grad = True
    
# Create Last Layer
num_ftrs = model_ft.classifier[2].in_features
model_ft.classifier[2] = nn.Linear(num_ftrs, species_count)
model_ft = model_ft.to(device)

criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.01, momentum=0.9)

In [39]:
conv_path = "models/92_classifier/ct_fp2ad_mixed.pt"
exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer_ft, factor = .2, patience = 5)
build_from_path(model_ft, conv_path, train_batch=8, test_batch=32, decay = True, epochs = 30, early_stop = 10)

test Loss: 0.8923 Acc: 0.8078


### Exploring Mixed in Greater Depth

In [40]:
image_datasets = diff_image_datasets["/home/shivaram/DS/Projects/FishID/data/model_data/is_fish_mixed/"]

In [41]:
for samples in [2048, 15643]:#, 31285, 46928, 62571]:
    print(f"Samples: {samples}")
    # Build dataloaders
    weighted_samplers = {}
    for subset in ["train", "val", "test"]:
        target = image_datasets[subset].targets

        if True:#subset == "train":
            class_sample_count =np.array([ len(np.where(target == t)[0]) for t in np.unique(target)])
            weight = 1. / class_sample_count
            samples_weight = np.array([weight[t] for t in target])
            samples_weight = torch.from_numpy(samples_weight)

            sampler = data.WeightedRandomSampler(samples_weight,samples)
            weighted_samplers[subset] = sampler
        else:
            sampler = data.RandomSampler(image_datasets[subset])
            weighted_samplers[subset] = sampler

    dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], sampler = weighted_samplers[x], 
                                                  batch_size=batch_size, num_workers=4)
                  for x in ['train', 'val', 'test']}
    dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val', 'test']}
    class_names = image_datasets['train'].classes
    
    # Load already trained model
    model_ft =  models.convnext_tiny(pretrained = True)
    for param in model_ft.parameters():
        param.requires_grad = False
    for param in model_ft.classifier.parameters():
        param.requires_grad = True
    for param in model_ft.avgpool.parameters():
        param.requires_grad = True
    for param in model_ft.features[7].parameters():
        param.requires_grad = True
    #Unfreeze the next convolutional block
    for param in model_ft.features[6].parameters():
        param.requires_grad = True

    # Create Last Layer
    num_ftrs = model_ft.classifier[2].in_features
    model_ft.classifier[2] = nn.Linear(num_ftrs, species_count)
    model_ft = model_ft.to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.01, momentum=0.9)
    
    conv_path = f'models/92_classifier/ct_fp2ad_mixed_{samples}.pt'
    exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer_ft, factor = .2, patience = 5)
    build_from_path(model_ft, conv_path, train_batch=8, test_batch=32, decay = True, epochs = 30, early_stop = 15)
    #build_from_path(model_ft, conv_path, train_batch=8, test_batch=32, decay = True)


Samples: 2048
test Loss: 0.8919 Acc: 0.7969
Samples: 15643
test Loss: 1.0265 Acc: 0.7841


# Fine Tuning Models with Additional Data
### Scientific Fine Tuned on Common

In [42]:
# We normalize to imagenet mean for the data (https://stackoverflow.com/questions/58151507/why-pytorch-officially-use-mean-0-485-0-456-0-406-and-std-0-229-0-224-0-2)
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.RandomResizedCrop(size=(224, 224)),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),
    'val': transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        transforms.Resize([224, 224])]),
    'test': transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        transforms.Resize([224, 224])])}

In [43]:
image_datasets = diff_image_datasets["/home/shivaram/DS/Projects/FishID/data/model_data/is_fish_common/"]

In [22]:
# Load already trained model
model_ft =  models.convnext_tiny(pretrained = True)
for param in model_ft.parameters():
    param.requires_grad = False
for param in model_ft.classifier.parameters():
    param.requires_grad = True
for param in model_ft.avgpool.parameters():
    param.requires_grad = True
for param in model_ft.features[7].parameters():
    param.requires_grad = True
#Unfreeze the next convolutional block
for param in model_ft.features[6].parameters():
    param.requires_grad = True
    
# Create Last Layer
num_ftrs = model_ft.classifier[2].in_features
model_ft.classifier[2] = nn.Linear(num_ftrs, species_count)
model_ft = model_ft.to(device)

criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

In [45]:
source_path = "models/92_classifier/ct_fp2ad_random_crop.pt"
model_ft.load_state_dict(torch.load(source_path))

conv_path = "models/92_classifier/ct_fp2adrc_sci_common.pt"
exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer_ft, factor = .2, patience = 5)
build_from_path(model_ft, conv_path, train_batch=8, test_batch=32, decay = True)

test Loss: 0.9645 Acc: 0.7881


### Scientific Fine Tuned on Mixed

In [46]:
image_datasets = diff_image_datasets["/home/shivaram/DS/Projects/FishID/data/model_data/is_fish_mixed/"]
model_ft.load_state_dict(torch.load(source_path))

conv_path = "models/92_classifier/ct_fp2adrc_sci_mixed.pt"
exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer_ft, factor = .2, patience = 5)
build_from_path(model_ft, conv_path, train_batch=8, test_batch=32, decay = True)

test Loss: 0.9956 Acc: 0.7880


Testing with Smaller Initial Learning Rate

In [47]:
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.0001, momentum=0.9)
exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer_ft, factor = .2, patience = 5)
model_ft.load_state_dict(torch.load(source_path))

conv_path = "models/92_classifier/ct_fp2adrc_sci_mixed_0001.pt"
exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer_ft, factor = .2, patience = 5)
build_from_path(model_ft, conv_path, train_batch=8, test_batch=32, decay = True)

test Loss: 1.0051 Acc: 0.7825


### Common on Mixed

In [48]:
source_path = "models/92_classifier/ct_fp2ad_common.pt"
model_ft.load_state_dict(torch.load(source_path))

conv_path = "models/92_classifier/ct_fp2adrc_common_mixed.pt"
exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer_ft, factor = .2, patience = 5)
build_from_path(model_ft, conv_path, train_batch=8, test_batch=32, decay = True)

test Loss: 0.9684 Acc: 0.7928


# Top N Accuracy of Best Model 
### (Partially Unfrozen ConvNet Tiny on Mixed Data, with Reduce LR on Plateau, and Random Crops)

In [23]:
best_path = "models/92_classifier/ct_fp2ad_mixed.pt"
model_ft.load_state_dict(torch.load(best_path))

<All keys matched successfully>

In [24]:
criterion = nn.CrossEntropyLoss()
test_model_k(model_ft, criterion, k=3)
test_model_k(model_ft, criterion)

Evaluating Top 3 Accuracy...
3 Acc: 0.9003
Evaluating Top 5 Accuracy...
5 Acc: 0.9287


0.9286908077994429