In [1]:
# Import Python Standard Library dependencies
from copy import copy
import datetime
from glob import glob
import json
import math
import multiprocessing
import os
from pathlib import Path
import random

# Import utility functions
from cjm_pandas_utils.core import markdown_to_pandas
from cjm_pil_utils.core import resize_img
from cjm_pytorch_utils.core import set_seed, pil_to_tensor, tensor_to_pil, get_torch_device, denorm_img_tensor

# Import HuggingFace Datasets dependencies
from datasets import load_dataset

# Import matplotlib for creating plots
import matplotlib.pyplot as plt

# Import numpy
import numpy as np

# Import pandas module for data manipulation
import pandas as pd

# Set options for Pandas DataFrame display
pd.set_option('max_colwidth', None)  # Do not truncate the contents of cells in the DataFrame
pd.set_option('display.max_rows', None)  # Display all rows in the DataFrame
pd.set_option('display.max_columns', None)  # Display all columns in the DataFrame

# Import PIL for image manipulation
from PIL import Image

# Import timm library
import timm

# Import PyTorch dependencies
import torch
import torch.nn as nn
from torch.amp import autocast
from torch.cuda.amp import GradScaler
from torchvision import transforms
import torchvision.transforms.functional as TF
from torch.utils.data import Dataset, DataLoader
from torcheval.tools import get_module_summary
from torcheval.metrics import MulticlassAccuracy

# Import tqdm for progress bar
from tqdm.auto import tqdm

In [2]:
# Set the seed for generating random numbers in PyTorch, NumPy, and Python's random module.
seed = 1234
set_seed(seed)

In [3]:
device = get_torch_device()
dtype = torch.float32
device, dtype

('cuda', torch.float32)

In [4]:
# The name for the project
project_name = f"pytorch-timm-image-classifier"

# The path for the project folder
project_dir = Path(f"./{project_name}/")

# Create the project directory if it does not already exist
project_dir.mkdir(parents=True, exist_ok=True)
print(project_dir)

# Define path parent directory to store datasets
dataset_dir = Path("./dataset")
# Create the dataset parent directory if it does not exist
dataset_dir.mkdir(parents=True, exist_ok=True)
print(f"Dataset Directory: {dataset_dir}")

pytorch-timm-image-classifier
Dataset Directory: dataset


In [5]:
# import the necessary libraries
from torchvision.transforms import RandomHorizontalFlip
from torch.utils.data import WeightedRandomSampler
from sklearn.metrics import classification_report
from torchvision.transforms import RandomCrop
from torchvision.transforms import RandomRotation
from torchvision.transforms import Grayscale,Resize
from torchvision.transforms import ToTensor
from torch.utils.data import random_split
from torch.utils.data import DataLoader
from spike import config as cfg
from spike import EarlyStopping
from spike import LRScheduler
from spike import EmotionNet
from torchvision import transforms
from torchvision import datasets
import matplotlib.pyplot as plt
from collections import Counter
from datetime import datetime
from torch.optim import SGD, Adam
import torch.nn as nn
import pandas as pd
import argparse
import torch
import math
import warnings

In [6]:
# TRAIN_DIRECTORY = "./dataset/train"
# TEST_DIRECTORY = "./dataset/test"
# train_transform = transforms.Compose([
#     Grayscale(num_output_channels=1),
#     RandomRotation(25),
#     RandomHorizontalFlip(),
#     RandomCrop((48, 48)),
#     ToTensor()
# ])

# test_transform = transforms.Compose([
#     Grayscale(num_output_channels=1),
#     ToTensor()
# ])

TRAIN_DIRECTORY = "./dataset/train"
TEST_DIRECTORY = "./dataset/test"
train_transform = transforms.Compose([
    Grayscale(num_output_channels=1),
    RandomRotation(30),
    RandomHorizontalFlip(),
    Resize((224, 224)),
    ToTensor()
])

test_transform = transforms.Compose([
    Grayscale(num_output_channels=1),
    Resize((224, 224)),
    ToTensor()
])

In [7]:
# Instantiate the datasets using the defined transformations
# train_dataset = ImageDataset(dataset=train_split, classes=class_names, tfms=train_tfms)
# valid_dataset = ImageDataset(dataset=val_split, classes=class_names, tfms=valid_tfms)
train_dataset = datasets.ImageFolder(TRAIN_DIRECTORY, transform=train_transform)
test_dataset = datasets.ImageFolder(TEST_DIRECTORY, transform=test_transform)
TRAIN_SIZE = .9
VAL_SIZE = .1

classes = train_dataset.classes
num_of_classes = len(classes)
print(f"[INFO] Class labels: {classes}")

# use train samples to generate train/validation set
num_train_samples = len(train_dataset)
train_size = math.floor(num_train_samples * TRAIN_SIZE)
val_size = math.ceil(num_train_samples * VAL_SIZE)
print(f"[INFO] Train samples: {train_size} ...\t Validation samples: {val_size}...")

# randomly split the training dataset into train and validation set
train_datasetset, valid_dataset = random_split(train_dataset, [train_size, val_size])

# modify the data transform applied towards the validation set
valid_dataset.dataset.transforms = test_transform
# Print the number of samples in the training and validation datasets
print(f'Training dataset size: {len(train_dataset)}')
print(f'Validation dataset size: {len(valid_dataset)}')

[INFO] Class labels: ['angry', 'fear', 'happy', 'neutral', 'sad', 'surprise']
[INFO] Train samples: 25838 ...	 Validation samples: 2871...
Training dataset size: 28709
Validation dataset size: 2871


In [8]:
bs = 32

In [9]:
train_classes = [label for _, label in train_datasetset]

# count each labels within each classes
class_count = Counter(train_classes)
print(f"[INFO] Total sample: {class_count}")

# compute and determine the weights to be applied on each category
# depending on the number of samples available
class_weight = torch.Tensor([len(train_classes) / c
                             for c in pd.Series(class_count).sort_index().values])

# initialize a placeholder for each target image, and iterate via the train dataset,
# get the weights for each class and modify the default sample weight to its
# corresponding class weight already computed
sample_weight = [0] * len(train_datasetset)
for idx, (image, label) in enumerate(train_datasetset):
    weight = class_weight[label]
    sample_weight[idx] = weight

# define a sampler which randomly sample labels from the train dataset
sampler = WeightedRandomSampler(weights=sample_weight, num_samples=len(train_datasetset),
                                replacement=True)

[INFO] Total sample: Counter({2: 6473, 3: 4458, 4: 4342, 0: 3987, 1: 3716, 5: 2862})


In [10]:
# Set the number of worker processes for loading data. This should be the number of CPUs available.
num_workers = multiprocessing.cpu_count()

# Define parameters for DataLoader
data_loader_params = {
    'batch_size': bs,  # Batch size for data loading
    'num_workers': num_workers,  # Number of subprocesses to use for data loading
    'persistent_workers': True,  # If True, the data loader will not shutdown the worker processes after a dataset has been consumed once. This allows to maintain the worker dataset instances alive.
    'pin_memory': True,  # If True, the data loader will copy Tensors into CUDA pinned memory before returning them. Useful when using GPU.
    'pin_memory_device': device,  # Specifies the device where the data should be loaded. Commonly set to use the GPU.
}

# Create DataLoader for training data. Data is shuffled for every epoch.
train_dataloader = DataLoader(train_dataset, **data_loader_params, shuffle=True)

# Create DataLoader for validation data. Shuffling is not necessary for validation data.
valid_dataloader = DataLoader(valid_dataset, **data_loader_params)
test_dataloader = DataLoader(test_dataset, **data_loader_params)
# Print the number of batches in the training and validation DataLoaders
print(f'Number of batches in train DataLoader: {len(train_dataloader)}')
print(f'Number of batches in validation DataLoader: {len(valid_dataloader)}')

Number of batches in train DataLoader: 898
Number of batches in validation DataLoader: 90


In [11]:
timm.list_models('vit_tiny*', pretrained=True)

['vit_tiny_patch16_224',
 'vit_tiny_patch16_224_in21k',
 'vit_tiny_patch16_384',
 'vit_tiny_r_s16_p8_224',
 'vit_tiny_r_s16_p8_224_in21k',
 'vit_tiny_r_s16_p8_384']

In [12]:
# pd.DataFrame(timm.list_models('resnet18*', pretrained=True))
# pd.DataFrame(timm.list_models('efficientnet*', pretrained=True))
pd.DataFrame(timm.list_models('vit_tiny*', pretrained=True))

Unnamed: 0,0
0,vit_tiny_patch16_224
1,vit_tiny_patch16_224_in21k
2,vit_tiny_patch16_384
3,vit_tiny_r_s16_p8_224
4,vit_tiny_r_s16_p8_224_in21k
5,vit_tiny_r_s16_p8_384


In [14]:
# Import the resnet module
from timm.models import resnet
from timm.models import efficientnet
from timm.models import vision_transformer

# Define the ResNet model variant to use
# resnet_model = 'resnet18'
# eff_model = 'efficientnet_b2'
vit_model = 'vit_tiny_patch16_224'

# Get the default configuration of the chosen model
# model_cfg = resnet.default_cfgs[resnet_model]
model_cfg = vision_transformer.default_cfgs[vit_model]

# Show the default configuration values
pd.DataFrame.from_dict(model_cfg, orient='index')

Unnamed: 0,0
url,https://storage.googleapis.com/vit_models/augreg/Ti_16-i21k-300ep-lr_0.001-aug_none-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_224.npz
num_classes,1000
input_size,"(3, 224, 224)"
pool_size,
crop_pct,0.9
interpolation,bicubic
fixed_input_size,True
mean,"(0.5, 0.5, 0.5)"
std,"(0.5, 0.5, 0.5)"
first_conv,patch_embed.proj


In [15]:
# Retrieve normalization statistics (mean and std) specific to the pretrained model
mean, std = model_cfg['mean'], model_cfg['std']
norm_stats = (mean, std)
norm_stats

((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))

In [16]:
# Create a pretrained ResNet model with the number of output classes equal to the number of class names
# 'timm.create_model' function automatically downloads and initializes the pretrained weights

# resnet18 = timm.create_model(resnet_model, pretrained=True, num_classes=len(class_names))
# model = timm.create_model(eff_model, pretrained=True, num_classes=len(classes),in_chans=1)
# model = timm.create_model(eff_model, pretrained=True, num_classes=len(classes))
# model = timm.create_model(vit_model, pretrained=True, num_classes=len(classes))
model = timm.create_model(vit_model, pretrained=True, num_classes=len(classes),in_chans=1)

# Set the device and data type for the model
model = model.to(device=device, dtype=dtype)

# Add attributes to store the device and model name for later reference
model.device = device
# model.name = eff_model
model.name = vit_model

In [17]:
# Define the input to the model
test_inp = torch.randn(1, 1, 224, 224).to(device)

# Get a summary of the model as a Pandas DataFrame
summary_df = markdown_to_pandas(f"{get_module_summary(model, [test_inp])}")

# Filter the summary to only contain Conv2d layers and the model
# summary_df = summary_df[(summary_df.index == 0) | (summary_df['Type'] == 'Conv2d')]

# Remove the column "Contains Uninitialized Parameters?"
summary_df.drop('Contains Uninitialized Parameters?', axis=1)



Unnamed: 0,Type,# Parameters,# Trainable Parameters,Size (bytes),In size,Out size
0,VisionTransformer,5.4 M,5.4 M,21.7 M,"[1, 1, 224, 224]","[1, 6]"
1,PatchEmbed,49.3 K,49.3 K,197 K,"[1, 1, 224, 224]","[1, 196, 192]"
2,Conv2d,49.3 K,49.3 K,197 K,"[1, 1, 224, 224]","[1, 192, 14, 14]"
3,Identity,0,0,0,"[1, 196, 192]","[1, 196, 192]"
4,Dropout,0,0,0,"[1, 197, 192]","[1, 197, 192]"
5,Identity,0,0,0,"[1, 197, 192]","[1, 197, 192]"
6,Sequential,5.3 M,5.3 M,21.4 M,"[1, 197, 192]","[1, 197, 192]"
7,Block,444 K,444 K,1.8 M,"[1, 197, 192]","[1, 197, 192]"
8,LayerNorm,384,384,1.5 K,"[1, 197, 192]","[1, 197, 192]"
9,Attention,148 K,148 K,592 K,"[1, 197, 192]","[1, 197, 192]"


In [18]:
# Function to run a single training/validation epoch
def run_epoch(model, dataloader, optimizer, metric, lr_scheduler, device, scaler, is_training):
    # Set model to training mode if 'is_training' is True, else set to evaluation mode
    model.train() if is_training else model.eval()

    # Reset the performance metric
    metric.reset()
    # Initialize the average loss for the current epoch
    epoch_loss = 0
    # Initialize progress bar with total number of batches in the dataloader
    progress_bar = tqdm(total=len(dataloader), desc="Train" if is_training else "Eval")

    # Iterate over data batches
    for batch_id, (inputs, targets) in enumerate(dataloader):
        # Move inputs and targets to the specified device (e.g., GPU)
        inputs, targets = inputs.to(device), targets.to(device)

        # Enables gradient calculation if 'is_training' is True
        with torch.set_grad_enabled(is_training):
            # Automatic Mixed Precision (AMP) context manager for improved performance
            with autocast(device):
                outputs = model(inputs) # Forward pass
                loss = torch.nn.functional.cross_entropy(outputs, targets) # Compute loss

        # Update the performance metric
        metric.update(outputs.detach().cpu(), targets.detach().cpu())

        # If in training mode
        if is_training:
            if scaler is not None: # If using AMP
                # Scale the loss and backward propagation
                scaler.scale(loss).backward()
                scaler.step(optimizer) # Make an optimizer step
                scaler.update() # Update the scaler
            else:
                loss.backward() # Backward propagation
                optimizer.step() # Make an optimizer step

            optimizer.zero_grad() # Clear the gradients
            lr_scheduler.step() # Update learning rate

        loss_item = loss.item()
        epoch_loss += loss_item
        # Update progress bar
        progress_bar.set_postfix(accuracy=metric.compute().item(),
                                 loss=loss_item,
                                 avg_loss=epoch_loss/(batch_id+1),
                                 lr=lr_scheduler.get_last_lr()[0] if is_training else "")
        progress_bar.update()

        # If loss is NaN or infinity, stop training
        if math.isnan(loss_item) or math.isinf(loss_item):
            print(f"Loss is NaN or infinite at epoch {epoch}, batch {batch_id}. Stopping training.")
            break

    progress_bar.close()
    return epoch_loss / (batch_id + 1)

In [19]:
# Main training loop
def train_loop(model, train_dataloader, valid_dataloader, optimizer, metric, lr_scheduler, device, epochs, use_amp, checkpoint_path):
    # Initialize GradScaler for Automatic Mixed Precision (AMP) if 'use_amp' is True
    scaler = GradScaler() if use_amp else None
    best_loss = float('inf')

    # Iterate over each epoch
    for epoch in tqdm(range(epochs), desc="Epochs"):
        # Run training epoch and compute training loss
        train_loss = run_epoch(model, train_dataloader, optimizer, metric, lr_scheduler, device, scaler, is_training=True)

        with torch.no_grad():
            # Run validation epoch and compute validation loss
            valid_loss = run_epoch(model, valid_dataloader, None, metric, None, device, scaler, is_training=False)

        # If current validation loss is lower than the best one so far, save model and update best loss
        if valid_loss < best_loss:
            best_loss = valid_loss
            metric_value = metric.compute().item()
            torch.save(model.state_dict(), checkpoint_path)

            training_metadata = {
                'epoch': epoch,
                'train_loss': train_loss,
                'valid_loss': valid_loss,
                'metric_value': metric_value,
                'learning_rate': lr_scheduler.get_last_lr()[0],
                'model_architecture': model.name
            }

            # Save best_loss and metric_value in a JSON file
            with open(Path(checkpoint_path.parent/'training_metadata.json'), 'w') as f:
                json.dump(training_metadata, f)

        # If loss is NaN or infinity, stop training
        if any(math.isnan(loss) or math.isinf(loss) for loss in [train_loss, valid_loss]):
            print(f"Loss is NaN or infinite at epoch {epoch}. Stopping training.")
            break

    # If using AMP, clean up the unused memory in GPU
    if use_amp:
        torch.cuda.empty_cache()

In [20]:
# Generate timestamp for the training session (Year-Month-Day_Hour_Minute_Second)
timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")

# Create a directory to store the checkpoints if it does not already exist
checkpoint_dir = Path(project_dir/f"{timestamp}")

# Create the checkpoint directory if it does not already exist
checkpoint_dir.mkdir(parents=True, exist_ok=True)

# The model checkpoint path
checkpoint_path = checkpoint_dir/f"{model.name}.pth"

print(checkpoint_path)

pytorch-timm-image-classifier/2023-07-12_16-57-10/vit_tiny_patch16_224.pth


In [21]:
# Learning rate for the model
lr = 1e-5

# Number of training epochs
epochs = 15

# AdamW optimizer; includes weight decay for regularization
optimizer = torch.optim.AdamW(model.parameters(), lr=lr, eps=1e-5)

# Learning rate scheduler; adjusts the learning rate during training
lr_scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer,
                                                   max_lr=lr,
                                                   total_steps=epochs*len(train_dataloader))

# Performance metric: Multiclass Accuracy
metric = MulticlassAccuracy()

# Check for CUDA-capable GPU availability
use_amp = torch.cuda.is_available()

In [None]:
train_loop(model, train_dataloader, valid_dataloader, optimizer, metric, lr_scheduler, device, epochs, use_amp, checkpoint_path)

In [None]:
train_sz = (224,224)
# Set the minimum input dimension for inference
infer_sz = max(train_sz)

# Choose a random item from the dataset
item, l = random.choice(test_dataset.imgs)
# print(item)
import torchvision.transforms.functional as TF

img = Image.open(item)
img = TF.to_tensor(img)

img = torch.unsqueeze(img,dim=0)
# Open the image and resize it
# sample_img = item['image']
# inp_img = resize_img(sample_img.copy(), infer_sz)

# Convert the image to a normalized tensor and move it to the device
# img_tensor = pil_to_tensor(inp_img, *norm_stats).to(device=device)

# Make a prediction with the model
with torch.no_grad():
    pred = model(img.to("cuda"))

# Scale the model predictions to add up to 1
pred_scores = torch.softmax(pred, dim=1)

# Get the highest confidence score
confidence_score = pred_scores.max()

# Get the class index with the highest confidence score and convert it to the class name
pred_class = train_dataset.classes[torch.argmax(pred_scores)]
# print(pred_class)
# Store the prediction data in a Pandas Series for easy formatting
pred_data = pd.Series({
    "Target Class:": classes[l],
    "Predicted Class:": pred_class,
    "Confidence Score:": f"{confidence_score*100:.2f}%"
})

# Print the prediction data
print(pred_data.to_string(header=False))

# Display the image
# sample_img

In [87]:
with torch.set_grad_enabled(False):
    # set the evaluation mode
    model.eval()

    # initialize a list to keep track of our predictions
    predictions = []

    # iterate through the test set
    for (data, _) in test_dataloader:
        # move the data into the device used for testing
        data = data.to(device)

        # perform a forward pass and calculate the training loss
        output = model(data)
        output = output.argmax(axis=1).cpu().numpy()
        predictions.extend(output)

# evaluate the network
print("[INFO] evaluating network...")
actual = [label for _, label in test_dataset]
print(classification_report(actual, predictions, target_names=test_dataset.classes))

[INFO] evaluating network...
              precision    recall  f1-score   support

       angry       0.56      0.58      0.57      1069
        fear       0.47      0.35      0.40      1024
       happy       0.83      0.85      0.84      1774
     neutral       0.55      0.61      0.58      1233
         sad       0.47      0.49      0.48      1247
    surprise       0.74      0.75      0.74       831

    accuracy                           0.62      7178
   macro avg       0.60      0.60      0.60      7178
weighted avg       0.62      0.62      0.62      7178



In [None]:
#               precision    recall  f1-score   support

#        angry       0.56      0.58      0.57      1069
#         fear       0.47      0.35      0.40      1024
#        happy       0.83      0.85      0.84      1774
#      neutral       0.55      0.61      0.58      1233
#          sad       0.47      0.49      0.48      1247
#     surprise       0.74      0.75      0.74       831

#     accuracy                           0.62      7178
#    macro avg       0.60      0.60      0.60      7178
# weighted avg       0.62      0.62      0.62      7178

In [None]:
#               precision    recall  f1-score   support

#        angry       0.56      0.57      0.56      1069
#         fear       0.48      0.34      0.40      1024
#        happy       0.82      0.84      0.83      1774
#      neutral       0.55      0.58      0.57      1233
#          sad       0.48      0.52      0.49      1247
#     surprise       0.71      0.77      0.74       831

#     accuracy                           0.62      7178
#    macro avg       0.60      0.60      0.60      7178
# weighted avg       0.61      0.62      0.61      7178
#               precision    recall  f1-score   support

#        angry       0.55      0.54      0.55      1069
#         fear       0.44      0.34      0.38      1024
#        happy       0.79      0.84      0.81      1774
#      neutral       0.58      0.54      0.56      1233
#          sad       0.46      0.52      0.49      1247
#     surprise       0.71      0.75      0.73       831

#     accuracy                           0.61      7178
#    macro avg       0.59      0.59      0.59      7178
# weighted avg       0.60      0.61      0.60      7178
#               precision    recall  f1-score   support

#        angry       0.50      0.54      0.52      1069
#         fear       0.44      0.20      0.28      1024
#        happy       0.80      0.78      0.79      1774
#      neutral       0.53      0.57      0.55      1233
#          sad       0.43      0.53      0.48      1247
#     surprise       0.65      0.74      0.69       831

#     accuracy                           0.58      7178
#    macro avg       0.56      0.56      0.55      7178
# weighted avg       0.58      0.58      0.57      7178
#               precision    recall  f1-score   support

#        angry       0.35      0.34      0.35      1069
#         fear       0.31      0.17      0.22      1024
#        happy       0.61      0.73      0.67      1774
#      neutral       0.46      0.43      0.44      1233
#          sad       0.36      0.39      0.38      1247
#     surprise       0.60      0.67      0.63       831

#     accuracy                           0.48      7178
#    macro avg       0.45      0.46      0.45      7178
# weighted avg       0.46      0.48      0.46      7178


In [26]:
# import the necessary libraries
from torchvision.transforms import RandomHorizontalFlip
from torch.utils.data import WeightedRandomSampler
from sklearn.metrics import classification_report
from torchvision.transforms import RandomCrop
from torchvision.transforms import RandomRotation
from torchvision.transforms import Grayscale,Resize
from torchvision.transforms import ToTensor
from torch.utils.data import random_split
from torch.utils.data import DataLoader
from spike import config as cfg
from spike import EarlyStopping
from spike import LRScheduler
from spike import EmotionNet
from torchvision import transforms
from torchvision import datasets
import matplotlib.pyplot as plt
from collections import Counter
from datetime import datetime
from torch.optim import SGD, Adam
import torch.nn as nn
import pandas as pd
import argparse
import torch
import math
import warnings
import multiprocessing


In [31]:
# Import the resnet module
from timm.models import resnet
from timm.models import efficientnet
from cjm_pandas_utils.core import markdown_to_pandas
from cjm_pil_utils.core import resize_img
from cjm_pytorch_utils.core import set_seed, pil_to_tensor, tensor_to_pil, get_torch_device, denorm_img_tensor
import timm
device = get_torch_device()

# Define the ResNet model variant to use
# resnet_model = 'resnet18'
# eff_model = 'efficientnet_b2'
eff_model = 'vit_tiny_patch16_224'

In [32]:
TEST_DIRECTORY = "/media/rllab/HDD2/nima_bsl/face/Latest-Version/Latest-Version/dataset/test"

In [34]:
test_transform = transforms.Compose([
    Grayscale(num_output_channels=1),
    Resize((224, 224)),
    ToTensor()
])
test_dataset = datasets.ImageFolder(TEST_DIRECTORY, transform=test_transform)


classes = test_dataset.classes
num_of_classes = len(classes)
print(f"[INFO] Class labels: {classes}")

bs = 32
num_workers = multiprocessing.cpu_count()

# Define parameters for DataLoader
data_loader_params = {
    'batch_size': bs,  # Batch size for data loading
    'num_workers': num_workers,  # Number of subprocesses to use for data loading
    'persistent_workers': True,  # If True, the data loader will not shutdown the worker processes after a dataset has been consumed once. This allows to maintain the worker dataset instances alive.
    'pin_memory': True,  # If True, the data loader will copy Tensors into CUDA pinned memory before returning them. Useful when using GPU.
    'pin_memory_device': device,  # Specifies the device where the data should be loaded. Commonly set to use the GPU.
}


test_dataloader = DataLoader(test_dataset, **data_loader_params)
# model = timm.create_model(eff_model, pretrained=True, num_classes=len(classes),in_chans=1)
# model = timm.create_model(eff_model, pretrained=True, num_classes=len(classes))
# model = timm.create_model(eff_model, pretrained=True, num_classes=len(classes))
model = timm.create_model(eff_model, pretrained=True, num_classes=len(classes),in_chans=1)


model.name = eff_model


predictions = []

size = len(test_dataloader.dataset)
num_batches = len(test_dataloader)    


# model.load_state_dict(torch.load("./pytorch-timm-image-classifier/2023-07-12_15-11-04/efficientnet_b2.pth"))
# model.load_state_dict(torch.load("./pytorch-timm-image-classifier/2023-07-12_16-38-16/vit_tiny_patch16_224.pth"))
model.load_state_dict(torch.load("./pytorch-timm-image-classifier/2023-07-12_16-57-10/vit_tiny_patch16_224.pth"))

model = model.to(device)
model.eval()

# disable gradients for inference
with torch.no_grad():
    for (X, _) in test_dataloader:
    
    ################################# inference #################################
        # print(X)
        X = X.to(device)

    # compute predictions
        pred = model(X)       
        # softmax
        y_pred = pred.argmax(axis=1).cpu().numpy()

    # store results
        predictions.append(y_pred)

    # log

    # if batch % (CFG.print_freq) == 0:
    #   print(f"Inference Batch: {current:>5d}/{size:>5d}: Elapsed Time: {time_delta} s")          



[INFO] Class labels: ['angry', 'fear', 'happy', 'neutral', 'sad', 'surprise']


In [35]:
len(predictions)

225

In [36]:
import numpy as np
predictions = np.concatenate(predictions, axis = 0) # join sequence of arrays along axis 0


In [37]:
len(predictions)

7178

In [38]:
# vit_tiny gray base 1,224,224
# evaluate the network
print("[INFO] evaluating network...")
actual = [label for _, label in test_dataset]
print(classification_report(actual, predictions, target_names=test_dataset.classes))

[INFO] evaluating network...
              precision    recall  f1-score   support

       angry       0.61      0.60      0.61      1069
        fear       0.54      0.40      0.46      1024
       happy       0.86      0.88      0.87      1774
     neutral       0.60      0.67      0.63      1233
         sad       0.52      0.56      0.54      1247
    surprise       0.76      0.78      0.77       831

    accuracy                           0.67      7178
   macro avg       0.65      0.65      0.65      7178
weighted avg       0.66      0.67      0.66      7178



In [32]:
# vit_tiny base 3,224,224
# evaluate the network
print("[INFO] evaluating network...")
actual = [label for _, label in test_dataset]
print(classification_report(actual, predictions, target_names=test_dataset.classes))

[INFO] evaluating network...
              precision    recall  f1-score   support

       angry       0.56      0.68      0.62      1069
        fear       0.50      0.43      0.46      1024
       happy       0.89      0.85      0.87      1774
     neutral       0.62      0.64      0.63      1233
         sad       0.55      0.52      0.53      1247
    surprise       0.75      0.78      0.76       831

    accuracy                           0.66      7178
   macro avg       0.64      0.65      0.65      7178
weighted avg       0.66      0.66      0.66      7178



In [61]:
# efficientnet_b2
# evaluate the network
print("[INFO] evaluating network...")
actual = [label for _, label in test_dataset]
print(classification_report(actual, predictions, target_names=test_dataset.classes))

[INFO] evaluating network...
              precision    recall  f1-score   support

       angry       0.56      0.58      0.57      1069
        fear       0.47      0.35      0.40      1024
       happy       0.83      0.85      0.84      1774
     neutral       0.55      0.61      0.58      1233
         sad       0.47      0.49      0.48      1247
    surprise       0.74      0.75      0.74       831

    accuracy                           0.62      7178
   macro avg       0.60      0.60      0.60      7178
weighted avg       0.62      0.62      0.62      7178



In [50]:
# evaluate the network
print("[INFO] evaluating network...")
actual = [label for _, label in test_dataset]
print(classification_report(actual, predictions, target_names=test_dataset.classes))

[INFO] evaluating network...
              precision    recall  f1-score   support

       angry       0.56      0.57      0.56      1069
        fear       0.48      0.34      0.40      1024
       happy       0.82      0.84      0.83      1774
     neutral       0.55      0.58      0.57      1233
         sad       0.48      0.52      0.49      1247
    surprise       0.71      0.77      0.74       831

    accuracy                           0.62      7178
   macro avg       0.60      0.60      0.60      7178
weighted avg       0.61      0.62      0.61      7178



In [37]:
# evaluate the network
print("[INFO] evaluating network...")
actual = [label for _, label in test_dataset]
print(classification_report(actual, predictions, target_names=test_dataset.classes))

[INFO] evaluating network...
              precision    recall  f1-score   support

       angry       0.54      0.58      0.56      1069
        fear       0.46      0.34      0.39      1024
       happy       0.83      0.85      0.84      1774
     neutral       0.56      0.61      0.59      1233
         sad       0.47      0.50      0.48      1247
    surprise       0.74      0.72      0.73       831

    accuracy                           0.62      7178
   macro avg       0.60      0.60      0.60      7178
weighted avg       0.61      0.62      0.61      7178

