In [1]:
import time
import os 
import tensorflow as tf
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, random_split
from torchvision.datasets import CIFAR10
from torchmetrics.image.fid import FrechetInceptionDistance

import matplotlib.pyplot as plt
import numpy as np

from PIL import Image

random_seed = 9292
torch.manual_seed(random_seed)
LATENT_DIM = 100
BATCH_SIZE = 64
NUM_EPOCHS = 500
AVAIL_GPUS = min(1, torch.cuda.device_count())
NUM_WORKERS = int(os.cpu_count() / 2)
lr = 3e-4
latent_dims = 100
device = "cuda" if torch.cuda.is_available() else "cpu"
NUM_CLASSES = 10

2023-09-10 01:02:02.326471: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-09-10 01:02:03.001579: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/home/yisan/anaconda3/envs/gpu_env/lib/
2023-09-10 01:02:03.001641: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/home/yisan/anaconda3/envs/gpu_env/lib/


In [2]:
transform_funcs = transforms.Compose([
    transforms.Resize((256, 256)),  
    transforms.ToTensor(),
    
])

In [3]:
# Load the Data
train_directory = './microscopicdata'

dataset = 'microscopicdata'

# Batch size
batch_size = 128

train_data_size = 7961
val_data_size = 2654
test_data_size = 3539

# Load Data from folders
data = datasets.ImageFolder(root=train_directory, transform=transform_funcs)
train_val_set, test_set = torch.utils.data.random_split(data, [train_data_size+val_data_size, test_data_size])
train_set, val_set = torch.utils.data.random_split(train_val_set, [train_data_size, val_data_size])

train_dataloader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True) 
val_dataloader = torch.utils.data.DataLoader(val_set, batch_size=batch_size, shuffle=True) 
test_dataloader = torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle=True) 

In [4]:
model = torch.hub.load('pytorch/vision:v0.10.0', 'alexnet', pretrained=True)

Using cache found in /home/yisan/.cache/torch/hub/pytorch_vision_v0.10.0


In [5]:
for param in model.parameters():
    param.requires_grad = False

In [6]:
model.classifier[6] = nn.Linear(4096, 6)
model.classifier.add_module("7", nn.Softmax(dim=1))
model.to(device)
model

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [7]:
loss_func = nn.NLLLoss()
optimizer = optim.Adam(model.parameters())
optimizer

Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    eps: 1e-08
    foreach: None
    lr: 0.001
    maximize: False
    weight_decay: 0
)

In [8]:
def train_and_validate(model, loss_criterion, optimizer, epochs=25):
    '''
    Function to train and validate
    Parameters
        :param model: Model to train and validate
        :param loss_criterion: Loss Criterion to minimize
        :param optimizer: Optimizer for computing gradients
        :param epochs: Number of epochs (default=25)
  
    Returns
        model: Trained Model with best validation accuracy
        history: (dict object): Having training loss, accuracy and validation loss, accuracy
    '''
    
    start = time.time()
    history = []
    best_acc = 0.0

    for epoch in range(epochs):
        epoch_start = time.time()
        print("Epoch: {}/{}".format(epoch+1, epochs))
        
        # Set to training mode
        model.train()
        
        # Loss and Accuracy within the epoch
        train_loss = 0.0
        train_acc = 0.0
        
        valid_loss = 0.0
        valid_acc = 0.0
        
        for i, (inputs, labels) in enumerate(train_dataloader):

            inputs = inputs.to(device)
            labels = labels.to(device)
            
            # Clean existing gradients
            optimizer.zero_grad()
            
            # Forward pass - compute outputs on input data using the model
            outputs = model(inputs)
            
            # Compute loss
            loss = loss_criterion(outputs, labels)
            
            # Backpropagate the gradients
            loss.backward()
            
            # Update the parameters
            optimizer.step()
            
            # Compute the total loss for the batch and add it to train_loss
            train_loss += loss.item() * inputs.size(0)
            
            # Compute the accuracy
            ret, predictions = torch.max(outputs.data, 1)
            correct_counts = predictions.eq(labels.data.view_as(predictions))
            
            # Convert correct_counts to float and then compute the mean
            acc = torch.mean(correct_counts.type(torch.FloatTensor))
            
            # Compute total accuracy in the whole batch and add to train_acc
            train_acc += acc.item() * inputs.size(0)
            
            #print("Batch number: {:03d}, Training: Loss: {:.4f}, Accuracy: {:.4f}".format(i, loss.item(), acc.item()))

            
        # Validation - No gradient tracking needed
        with torch.no_grad():

            # Set to evaluation mode
            model.eval()

            # Validation loop
            for j, (inputs, labels) in enumerate(val_dataloader):
                inputs = inputs.to(device)
                labels = labels.to(device)

                # Forward pass - compute outputs on input data using the model
                outputs = model(inputs)

                # Compute loss
                loss = loss_criterion(outputs, labels)

                # Compute the total loss for the batch and add it to valid_loss
                valid_loss += loss.item() * inputs.size(0)

                # Calculate validation accuracy
                ret, predictions = torch.max(outputs.data, 1)
                correct_counts = predictions.eq(labels.data.view_as(predictions))

                # Convert correct_counts to float and then compute the mean
                acc = torch.mean(correct_counts.type(torch.FloatTensor))

                # Compute total accuracy in the whole batch and add to valid_acc
                valid_acc += acc.item() * inputs.size(0)

                #print("Validation Batch number: {:03d}, Validation: Loss: {:.4f}, Accuracy: {:.4f}".format(j, loss.item(), acc.item()))
            
        # Find average training loss and training accuracy
        avg_train_loss = train_loss/train_data_size 
        avg_train_acc = train_acc/train_data_size

        # Find average training loss and training accuracy
        avg_valid_loss = valid_loss/val_data_size 
        avg_valid_acc = valid_acc/val_data_size

        history.append([avg_train_loss, avg_valid_loss, avg_train_acc, avg_valid_acc])
                
        epoch_end = time.time()
    
        print("Epoch : {:03d}, Training: Loss: {:.4f}, Accuracy: {:.4f}%, \n\t\tValidation : Loss : {:.4f}, Accuracy: {:.4f}%, Time: {:.4f}s".format(epoch+1, avg_train_loss, avg_train_acc*100, avg_valid_loss, avg_valid_acc*100, epoch_end-epoch_start))
        
        # Save if the model has best accuracy till now
        #torch.save(model, dataset+'_model_'+str(epoch)+'.pt')
            
    return model, history


In [9]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

num_epochs = 5
trained_model, history = train_and_validate(model, loss_func, optimizer, num_epochs)

torch.save(model.state_dict(), 'microscopicdata'+'_history.pth')

Epoch: 1/5
Epoch : 001, Training: Loss: -0.8779, Accuracy: 91.4458%, 
		Validation : Loss : -0.9780, Accuracy: 98.8320%, Time: 279.4610s
Epoch: 2/5
Epoch : 002, Training: Loss: -0.9839, Accuracy: 99.0956%, 
		Validation : Loss : -0.9873, Accuracy: 99.2464%, Time: 281.7360s
Epoch: 3/5
Epoch : 003, Training: Loss: -0.9902, Accuracy: 99.6608%, 
		Validation : Loss : -0.9900, Accuracy: 99.3218%, Time: 280.3175s
Epoch: 4/5
Epoch : 004, Training: Loss: -0.9931, Accuracy: 99.7111%, 
		Validation : Loss : -0.9916, Accuracy: 99.3971%, Time: 279.7494s
Epoch: 5/5
Epoch : 005, Training: Loss: -0.9934, Accuracy: 99.6734%, 
		Validation : Loss : -0.9925, Accuracy: 99.4348%, Time: 282.9321s


In [10]:
idx_to_class = {
    1: 'Crystal',
    2: 'Diatom',
    3: 'Fungi',
    4: 'Mineral',
    5: 'Stomata',
    6: 'Wood'
}

In [11]:
def predict(model):
    with torch.no_grad():
        total_acc = []

        # Set to evaluation mode
        model.eval()

        # Validation loop
        for j, (inputs, labels) in enumerate(test_dataloader):
            inputs = inputs.to(device)
            labels = labels.to(device)

            # Forward pass - compute outputs on input data using the model
            outputs = model(inputs)

            # Calculate validation accuracy
            ret, predictions = torch.max(outputs.data, 1)
            correct_counts = predictions.eq(labels.data.view_as(predictions))

            # Convert correct_counts to float and then compute the mean
            acc = torch.mean(correct_counts.type(torch.FloatTensor))

            # Compute total accuracy in the whole batch and add to valid_acc
            # valid_acc += acc.item() * inputs.size(0)
            total_acc.append(acc.item())
            # print(acc.item())
            print(np.mean(total_acc))

In [13]:
predict(model)

1.0
0.98828125
0.9921875
0.9921875
0.99375
0.9947916666666666
0.9944196428571429
0.994140625
0.9939236111111112
0.99453125
0.9950284090909091
0.9947916666666666
0.9951923076923077
0.9949776785714286
0.9953125
0.99560546875
0.9958639705882353
0.99609375
0.9962993421052632
0.996484375
0.9966517857142857
0.9968039772727273
0.9969429347826086
0.9970703125
0.9971875
0.9972956730769231
0.9973958333333334
0.9970585469688688


In [None]:
def make_model():
    model = torch.hub.load('pytorch/vision:v0.10.0', 'alexnet', pretrained=True)
    for param in model.parameters():
        param.requires_grad = False
    model.classifier[6] = nn.Linear(4096, 6)
    model.classifier.add_module("7", nn.Softmax(dim=1))
    model.to(device)
    model.load_state_dict(torch.load('./microscopicdata_history.pth'))

    return model

In [14]:
model.load_state_dict(torch.load('./microscopicdata_history.pth'))

<All keys matched successfully>