In [1]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import time
import copy


import torch
import torchvision.models as models
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.datasets as ds
from torchvision import models, transforms, utils, datasets
from torch.utils import data
from torchsummary import summary
from torch.optim import lr_scheduler

# Data Read-In and Loading

In [2]:
fish_df = pd.read_csv("data/fish_df.csv")

In [3]:
is_fish_df = pd.read_csv("data/is_fish.csv").iloc[:,1:]
is_fish_df["local_paths"] = is_fish_df["Species"].astype(str) + "/" + is_fish_df["Filename"]
path_set = set(is_fish_df["local_paths"])

In [4]:
species_count = 92 #len(is_fish_df["Species"].unique())

In [5]:
# We normalize to imagenet mean for the data (https://stackoverflow.com/questions/58151507/why-pytorch-officially-use-mean-0-485-0-456-0-406-and-std-0-229-0-224-0-2)
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        transforms.Resize([224, 224])]),
    'val': transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        transforms.Resize([224, 224])]),
    'test': transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        transforms.Resize([224, 224])])}

In [6]:
data_dirs = ["/home/shivaram/DS/Projects/FishID/data/model_data/is_fish_scientific/", "/home/shivaram/DS/Projects/FishID/data/model_data/is_fish_common/", "/home/shivaram/DS/Projects/FishID/data/model_data/is_fish_mixed/"]
diff_image_datasets = {dd:{x: datasets.ImageFolder(os.path.join(dd, x),
                                          data_transforms[x])
                  for x in ['train', 'val', 'test']} for dd in data_dirs}

In [7]:
image_datasets = diff_image_datasets["/home/shivaram/DS/Projects/FishID/data/model_data/is_fish_scientific/"]

In [8]:
batch_size = 64#8#32#64
#epoch_samples = 2560# len(samples_weight)

In [9]:
weighted_samplers = {}
for subset in ["train", "val", "test"]:
    target = image_datasets[subset].targets
    
    if True:#subset == "train":
        class_sample_count =np.array([ len(np.where(target == t)[0]) for t in np.unique(target)])
        weight = 1. / class_sample_count
        samples_weight = np.array([weight[t] for t in target])
        samples_weight = torch.from_numpy(samples_weight)
        epoch_samples = len(samples_weight)
        sampler = data.WeightedRandomSampler(samples_weight, epoch_samples)
        weighted_samplers[subset] = sampler
    else:
        sampler = data.RandomSampler(image_datasets[subset])
        weighted_samplers[subset] = sampler
    


dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], sampler = weighted_samplers[x], 
                                              batch_size=batch_size, num_workers=4)
              for x in ['train', 'val', 'test']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val', 'test']}
class_names = image_datasets['train'].classes

# Model Training and Evaluation Methods

In [10]:
def train_model(dataloaders, model, criterion, optimizer, scheduler, num_epochs=25, verbose = True):
    since = time.time()
    
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    epoch_accs = []
    

    for epoch in range(num_epochs):
        epa = {"Epoch": epoch}
        
        if verbose:
            print(f'Epoch {epoch + 1}/{num_epochs}')
            print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]
            
            if phase == 'train':
                scheduler.step(epoch_loss)

            
            epa[f'{phase} Loss'] = float(epoch_loss)
            epa[f'{phase} Accuracy'] = float(epoch_acc)
            if verbose:
                print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        if verbose:
            print()
            
        epoch_accs.append(epa)

    time_elapsed = time.time() - since
    if verbose:
        print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
        print(f'Best val Acc: {best_acc:4f}')

    # load best model weights
    model.load_state_dict(best_model_wts)
    epoch_accs = pd.DataFrame(epoch_accs)
    return model, best_acc, epoch_accs


In [11]:
def test_model(model, criterion, phase = "test", verbose = True):
    model.eval()
    running_loss = 0.0
    running_corrects = 0
    
    for inputs, labels in dataloaders[phase]:
        inputs = inputs.to(device)
        labels = labels.to(device)

        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        loss = criterion(outputs, labels)
        
        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)

    epoch_loss = running_loss / dataset_sizes[phase]
    epoch_acc = running_corrects.double() / dataset_sizes[phase]

    if verbose:
        print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')
        
    return epoch_acc

In [12]:
def train_test_save_model(save_path, dataloaders, model, criterion, optimizer, scheduler, num_epochs = 25, verbose = True):
    
    trained_model, val_acc, epoch_accs = train_model(dataloaders, model, criterion, optimizer, scheduler, num_epochs, verbose) 
    torch.save(model.state_dict(), save_path)
    test_acc = test_model(trained_model, criterion)
    
    if verbose:
        print(f"Val Accuracy: {val_acc}")
        print(f"Test Accuracy: {test_acc}")
        

    return trained_model, val_acc, test_acc, epoch_accs

# Determining the Best Transfer Model

In [13]:
# setting device on GPU if available, else CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)
print()

#Additional Info when using cuda
if device.type == 'cuda':
    print(torch.cuda.get_device_name(0))
    print('Memory Usage:')
    print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3,1), 'GB')
    print('Cached:   ', round(torch.cuda.memory_reserved(0)/1024**3,1), 'GB')

Using device: cuda

NVIDIA GeForce RTX 3060 Ti
Memory Usage:
Allocated: 0.0 GB
Cached:    0.0 GB


## No Frozen Layers
### ResNet18 

In [14]:
resnet18_path = "models/92_classifier/resnet18.pt"

model_ft = models.resnet18(pretrained=True)
#for param in model_ft.parameters():
#    param.requires_grad = False
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, species_count)
model_ft = model_ft.to(device)
criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.1, momentum=0.9)
# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
#exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer_ft, factor = .2, patience = 3)


if os.path.exists(resnet18_path):
    dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], sampler = weighted_samplers[x], batch_size=64, num_workers=4) for x in ['train', 'val', 'test']}
    model_ft.load_state_dict(torch.load(resnet18_path))
    test_model(model_ft, criterion)
else:
    dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], sampler = weighted_samplers[x], batch_size=128, num_workers=4) for x in ['train', 'val', 'test']}
    model_ft, val_acc, test_acc, epoch_accs = train_test_save_model(resnet18_path, dataloaders, model_ft, criterion, optimizer_ft, exp_lr_scheduler, 30, True)

test Loss: 1.8165 Acc: 0.6646


Decaying Learning Rate

In [None]:
exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer_ft, factor = .2, patience = 7)
resnet18_path = "models/92_classifier/resnet18_decay.pt"
if os.path.exists(resnet18_path):
    dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], sampler = weighted_samplers[x], batch_size=64, num_workers=4) for x in ['train', 'val', 'test']}
    model_ft.load_state_dict(torch.load(resnet18_path))
    test_model(model_ft, criterion)
else:
    dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], sampler = weighted_samplers[x], batch_size=128, num_workers=4) for x in ['train', 'val', 'test']}
    model_ft, val_acc, test_acc, epoch_accs = train_test_save_model(resnet18_path, dataloaders, model_ft, criterion, optimizer_ft, exp_lr_scheduler, 30, True)

Epoch 1/30
----------
train Loss: 0.7934 Acc: 0.7780
val Loss: 2.1038 Acc: 0.5547

Epoch 2/30
----------
train Loss: 0.7267 Acc: 0.7999
val Loss: 2.1710 Acc: 0.5752

Epoch 3/30
----------
train Loss: 0.6402 Acc: 0.8217
val Loss: 1.8922 Acc: 0.5916

Epoch 4/30
----------


### Efficient Net B0
Batch size for train 32 and batch size for eval 8

In [2]:
efficientnetb0_path = "models/92_classifier/efficient_netb0.pt"

model_ft =  models.efficientnet_b0(pretrained = True)
#for param in model_ft.parameters():
#    param.requires_grad = False
num_ftrs = model_ft.classifier[1].in_features
model_ft.fc = nn.Linear(num_ftrs, species_count)
model_ft = model_ft.to(device)
criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.1, momentum=0.9)
# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
#exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer_ft, factor = .2, patience = 3)

if os.path.exists(efficientnetb0_path):
    dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], sampler = weighted_samplers[x], batch_size=8, num_workers=4) for x in ['train', 'val', 'test']}
    model_ft.load_state_dict(torch.load(efficientnetb0_path))
    test_model(model_ft, criterion)
else:
    dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], sampler = weighted_samplers[x], batch_size=32, num_workers=4) for x in ['train', 'val', 'test']}
    model_ft, val_acc, test_acc, epoch_accs = train_test_save_model(efficientnetb0_path, dataloaders, model_ft, criterion, optimizer_ft, exp_lr_scheduler, 30, True)

NameError: name 'models' is not defined

Decaying Learning Rate

In [None]:
exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer_ft, factor = .2, patience = 7)
efficientnetb0_path = "models/92_classifier/efficient_netb0_decay.pt"
if os.path.exists(efficientnetb0_path):
    dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], sampler = weighted_samplers[x], batch_size=8, num_workers=4) for x in ['train', 'val', 'test']}
    model_ft.load_state_dict(torch.load(efficientnetb0_path))
    test_model(model_ft, criterion)
else:
    dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], sampler = weighted_samplers[x], batch_size=32, num_workers=4) for x in ['train', 'val', 'test']}
    model_ft, val_acc, test_acc, epoch_accs = train_test_save_model(efficientnetb0_path, dataloaders, model_ft, criterion, optimizer_ft, exp_lr_scheduler, 30, True)

### Convnet Tiny
Batch size for train 32 and for test 8

In [16]:
conv_path = "models/92_classifier/conv_tiny.pt"

model_ft =  models.convnext_tiny(pretrained = True)
#for param in model_ft.parameters():
#    param.requires_grad = False
num_ftrs = model_ft.classifier[2].in_features

model_ft.fc = nn.Linear(num_ftrs, species_count)
model_ft = model_ft.to(device)
criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.01, momentum=0.9)
# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
#exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer_ft, factor = .2, patience = 3)

if os.path.exists(conv_path):
    dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], sampler = weighted_samplers[x], batch_size=8, num_workers=4) for x in ['train', 'val', 'test']}
    model_ft.load_state_dict(torch.load(conv_path))
    test_model(model_ft, criterion)
else:
    dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], sampler = weighted_samplers[x], batch_size=32, num_workers=4) for x in ['train', 'val', 'test']}
    model_ft, val_acc, test_acc, epoch_accs = train_test_save_model(conv_path, dataloaders, model_ft, criterion, optimizer_ft, exp_lr_scheduler, 30, True)

test Loss: 1.3758 Acc: 0.7081


Decaying Learning Rate

In [None]:
exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer_ft, factor = .2, patience = 7)
conv_path = "models/92_classifier/conv_tiny_decay.pt"
if os.path.exists(conv_path):
    dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], sampler = weighted_samplers[x], batch_size=8, num_workers=4) for x in ['train', 'val', 'test']}
    model_ft.load_state_dict(torch.load(conv_path))
    test_model(model_ft, criterion)
else:
    dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], sampler = weighted_samplers[x], batch_size=32, num_workers=4) for x in ['train', 'val', 'test']}
    model_ft, val_acc, test_acc, epoch_accs = train_test_save_model(conv_path, dataloaders, model_ft, criterion, optimizer_ft, exp_lr_scheduler, 30, True)

# Frozen Layers
### ResNet18

In [None]:
resnet18_path = "models/92_classifier/rn18_frozen.pt"

model_ft = models.resnet18(pretrained=True)
for param in model_ft.parameters():
    param.requires_grad = False
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, species_count)
model_ft = model_ft.to(device)
criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.1, momentum=0.9)
# Decay LR by a factor of 0.1 every 7 epochs
#exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer_ft, factor = .2, patience = 7)


if os.path.exists(resnet18_path):
    dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], sampler = weighted_samplers[x], batch_size=64, num_workers=4) for x in ['train', 'val', 'test']}
    model_ft.load_state_dict(torch.load(resnet18_path))
    test_model(model_ft, criterion)
else:
    dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], sampler = weighted_samplers[x], batch_size=128, num_workers=4) for x in ['train', 'val', 'test']}
    model_ft, val_acc, test_acc, epoch_accs = train_test_save_model(resnet18_path, dataloaders, model_ft, criterion, optimizer_ft, exp_lr_scheduler, 30, True)

### Efficient Net B0

https://discuss.pytorch.org/t/how-the-pytorch-freeze-network-in-some-layers-only-the-rest-of-the-training/7088/3

In [24]:
efficientnetb0_path = "models/92_classifier/en_netb0_frozen_decay.pt"

model_ft =  models.efficientnet_b0(pretrained = True)

for param in model_ft.parameters():
    param.requires_grad = False

    
num_ftrs = model_ft.classifier[1].in_features
model_ft.classifier[1] = nn.Linear(num_ftrs, species_count)

model_ft = model_ft.to(device)
criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.1, momentum=0.9)
# Decay LR by a factor of 0.1 every 7 epochs
#exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer_ft, factor = .2, patience = 10)

if os.path.exists(efficientnetb0_path):
    dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], sampler = weighted_samplers[x], batch_size=8, num_workers=4) for x in ['train', 'val', 'test']}
    model_ft.load_state_dict(torch.load(efficientnetb0_path))
    test_model(model_ft, criterion)
else:
    dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], sampler = weighted_samplers[x], batch_size=32, num_workers=4) for x in ['train', 'val', 'test']}
    model_ft, val_acc, test_acc, epoch_accs = train_test_save_model(efficientnetb0_path, dataloaders, model_ft, criterion, optimizer_ft, exp_lr_scheduler, 30, True)

Epoch 1/30
----------


KeyboardInterrupt: 

### Convnet Tiny

In [18]:
conv_path = "models/92_classifier/conv_tiny_frozen.pt"

model_ft =  models.convnext_tiny(pretrained = True)

for param in model_ft.parameters():
    param.requires_grad = False
num_ftrs = model_ft.classifier[2].in_features

# Greate Last Layer
model_ft.classifier[2] = nn.Linear(num_ftrs, species_count)

model_ft = model_ft.to(device)

criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.01, momentum=0.9)
# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
#exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer_ft, factor = .2, patience = 3)

if os.path.exists(conv_path):
    dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], sampler = weighted_samplers[x], batch_size=8, num_workers=4) for x in ['train', 'val', 'test']}
    model_ft.load_state_dict(torch.load(conv_path))
    test_model(model_ft, criterion)
else:
    dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], sampler = weighted_samplers[x], batch_size=32, num_workers=4) for x in ['train', 'val', 'test']}
    model_ft, val_acc, test_acc, epoch_accs = train_test_save_model(conv_path, dataloaders, model_ft, criterion, optimizer_ft, exp_lr_scheduler, 30, True)

Epoch 1/30
----------
train Loss: 2.2187 Acc: 0.4948
val Loss: 1.9179 Acc: 0.5026

Epoch 2/30
----------
train Loss: 1.5427 Acc: 0.6336
val Loss: 1.7820 Acc: 0.5440

Epoch 3/30
----------
train Loss: 1.3817 Acc: 0.6674
val Loss: 1.6825 Acc: 0.5729

Epoch 4/30
----------
train Loss: 1.2805 Acc: 0.6897
val Loss: 1.6486 Acc: 0.5644

Epoch 5/30
----------
train Loss: 1.2121 Acc: 0.7040
val Loss: 1.5331 Acc: 0.5865

Epoch 6/30
----------
train Loss: 1.1622 Acc: 0.7164
val Loss: 1.5343 Acc: 0.6075

Epoch 7/30
----------
train Loss: 1.1288 Acc: 0.7243
val Loss: 1.4792 Acc: 0.6092

Epoch 8/30
----------
train Loss: 1.0999 Acc: 0.7331
val Loss: 1.5007 Acc: 0.6018

Epoch 9/30
----------
train Loss: 1.0741 Acc: 0.7420
val Loss: 1.6164 Acc: 0.5871

Epoch 10/30
----------
train Loss: 1.0781 Acc: 0.7377
val Loss: 1.4610 Acc: 0.6149

Epoch 11/30
----------
train Loss: 1.0600 Acc: 0.7458
val Loss: 1.5462 Acc: 0.6024

Epoch 12/30
----------
train Loss: 1.0568 Acc: 0.7443
val Loss: 1.4919 Acc: 0.6166

E

# Todo
- Train Standard Network
- Adjust sample weighting
- Train Siamese Network or GAN
- Refactor code
- [Don't weighted random sample for validation and test](https://discuss.pytorch.org/t/dataloader-using-subsetrandomsampler-and-weightedrandomsampler-at-the-same-time/29907/7)