In [354]:
import os
import time
import random
from pathlib import Path
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset as TorchDataset, DataLoader
from torch import nn
from torch import optim as TorchOptimizers
from torchmetrics import MeanMetric
from torchvision import models as torchmodels
import torchvision.transforms.v2 as T
from sklearn.model_selection import train_test_split, StratifiedGroupKFold
from sklearn.metrics import r2_score, mean_absolute_error, accuracy_score
from PIL import Image, ImageFile
import matplotlib.pyplot as plt
import json
from dataclasses import dataclass
from typing import Callable
from torchinfo import summary as torch_summary
import math
import wandb

In [356]:
@dataclass
class Config:
    """ A different config should be created for each device/environment """
    train_csv_filepath: str
    test_csv_filepath: str
    submission_filepath: str
    images_root_folder: str
    training_output_folder: str
    saved_weights_filepath: str
    device: str

    def __post_init__(self):
        """ For configuration variables that are shared across environments """
        self.fine_tune_start = 5
        self.seed = 201

        self.starting_learning_rate = 4e-4
        self.max_epochs = 150
        self.patience = 10

        self.np_dtype = np.float32

        self.batch_size = 32
        self.num_workers = 4
        self.pin_memory = self.num_workers > 0 and self.device == 'cuda'

        self.classes_list = [
            'bhaji',
            'chapati',
            'githeri',
            'kachumbari',
            'kukuchoma',
            'mandazi',
            'masalachips',
            'matoke',
            'mukimo',
            'nyamachoma',
            'pilau',
            'sukumawiki',
            'ugali',
        ]

        self.num_classes = len(self.classes_list)

        self.image_height = 300
        self.image_width = 300

    # noinspection PyAttributeOutsideInit
    def init(self, training):
        """ Adjust configuration setup for training vs inference """
        self.training = training
        self.imagenet_mean_cpu_tensor = torch.tensor(imagenet_mean_array)
        self.imagenet_std_cpu_tensor = torch.tensor(imagenet_std_array)
        self.channelwise_imagenet_mean_cpu_tensor = self.imagenet_mean_cpu_tensor.view(3, 1, 1)
        self.channelwise_imagenet_std_cpu_tensor = self.imagenet_std_cpu_tensor.view(3, 1, 1)
        self.imagenet_mean_gpu_tensor = gpu_tensor(imagenet_mean_array)
        self.imagenet_std_gpu_tensor = gpu_tensor(imagenet_std_array)
        self.channelwise_imagenet_mean_gpu_tensor = self.imagenet_mean_gpu_tensor.view(3, 1, 1)
        self.channelwise_imagenet_std_gpu_tensor = self.imagenet_std_gpu_tensor.view(3, 1, 1)

        if self.training:
            os.makedirs(self.training_output_folder, exist_ok=True)

        image_dims = (self.image_height, self.image_width)

        self.transforms_val = T.Compose([
            T.Resize(size=image_dims, antialias=True),
            T.ToImage(),
            T.ToDtype(torch.float32, scale=True),
            T.Normalize(self.imagenet_mean_cpu_tensor, self.imagenet_std_cpu_tensor),
        ])

        self.transforms_train = T.Compose([
            T.ToImage(),
            T.RandomHorizontalFlip(p=0.5),
            T.RandomVerticalFlip(p=0.5),
            T.RandomRotation(degrees=180),
            T.RandomResizedCrop(
                size=image_dims,
                scale=(0.6, 1.0),
                ratio=(0.75, 1.33),
                antialias=True,
            ),
            T.ColorJitter(
                brightness=0.2,
                contrast=0.2,
                saturation=0.1,
                hue=0.05,
            ),
            T.ToDtype(torch.float32, scale=True),
            T.Normalize(self.imagenet_mean_cpu_tensor, self.imagenet_std_cpu_tensor),
        ])


config: Config = None
""" Set to environment-relevant config before training/inference """;

In [341]:
local_config = Config(
    train_csv_filepath='data/train.csv',
    test_csv_filepath='data/test.csv',
    images_root_folder='data/images/',
    submission_filepath='data_gen/submission.csv',
    training_output_folder='data_gen/training_output/',
    saved_weights_filepath='data_gen/training_output/model_weights.pth',
    device='cpu',
)
kaggle_config = Config(
    train_csv_filepath='/kaggle/input/opencv-pytorch-project-2-classification-round-3/train.csv',
    test_csv_filepath='/kaggle/input/opencv-pytorch-project-2-classification-round-3/test.csv',
    images_root_folder='/kaggle/input/opencv-pytorch-project-2-classification-round-3/images/images/',
    submission_filepath='/kaggle/working/submission.csv',
    training_output_folder='/kaggle/working/training_output/',
    saved_weights_filepath='/kaggle/input/kenyan-food-classification-model-weights/model_weights.pth',
    device='cuda',
)

In [342]:
imagenet_mean_array = np.array([0.485, 0.456, 0.406], dtype=np.float32)
imagenet_std_array = np.array([0.229, 0.224, 0.225], dtype=np.float32)

def gpu_tensor(numpy_array):
    return torch.tensor(numpy_array, device=config.device)

def visualize_image(image_tensor):
    """ Input tensor should be on gpu """
    image = denormalize(image_tensor, config.channelwise_imagenet_mean_gpu_tensor, config.channelwise_imagenet_std_gpu_tensor)
    image = torch.clamp(image, 0, 1)
    image = image.permute(1, 2, 0).cpu().numpy()
    image = (image * 255).astype('uint8')
    plt.imshow(image)
    plt.axis('off')
    plt.show()

def normalize(tensor, mean, std):
    return (tensor - mean) / std

def denormalize(tensor, mean, std):
    return tensor * std + mean

def load_pil_image_from_id(image_id) -> ImageFile.ImageFile:
    return Image.open(config.images_root_folder + image_id + '.jpg')

In [343]:
@dataclass
class ImageClassificationDataset(TorchDataset):
    image_ids: np.ndarray
    labels: np.ndarray
    image_transforms: Callable

    def __len__(self):
        return len(self.image_ids)

    def __getitem__(self, idx):
        pil_image = load_pil_image_from_id(self.image_ids[idx])
        return self.image_transforms(pil_image), self.labels[idx]

In [344]:
def create_efficient_net_model() -> nn.Module:
    weights = 'DEFAULT' if config.training else None
    model = torchmodels.efficientnet_b3(weights=weights)

    for param in model.parameters():
        param.requires_grad = False

    # Generally, unfreezing around layer 5 or 6 seems to work best
    for layer_to_unfreeze in range(config.fine_tune_start, 9):
        for param in model.features[layer_to_unfreeze].parameters():
            param.requires_grad = True

    in_features = model.classifier[-1].in_features
    model.classifier = nn.Sequential(
        nn.Dropout(p=0.3, inplace=True),
        nn.Linear(in_features, config.num_classes),
    )

    model.to(config.device)

    return model

In [346]:
def one_hot_accuracy_score(preds, true):
    # De-one-hot encode preds and true using argmax
    preds_argmax = np.argmax(preds, axis=1)
    true_argmax = np.argmax(true, axis=1)
    return accuracy_score(preds_argmax, true_argmax)

def train_one_epoch(start_time, model, loader, optimizer, loss_function):
    model.train()
    running_loss = 0.0

    all_preds = []
    all_labels = []

    num_batches = math.ceil(len(loader.dataset) / config.batch_size)
    for batch_number, (x, y) in enumerate(loader):
        print(f't={time.time() - start_time:.2f}: Loading training batch {batch_number + 1}/{num_batches}')

        x = x.to(config.device, non_blocking=True)
        y = y.to(config.device, non_blocking=True)

        if batch_number == 0:
            allocated = torch.cuda.memory_allocated(config.device) / 1024**3
            reserved = torch.cuda.memory_reserved(config.device) / 1024**3
            print(f"Memory allocated={allocated:.2f} GiB, reserved={reserved:.2f} GiB")
            print(f'First image:')
            visualize_image(x[0])

        optimizer.zero_grad()
        preds = model(x)
        loss = loss_function(preds, y)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * x.size(0)

        all_preds.append(preds.detach().cpu())
        all_labels.append(y.detach().cpu())

    epoch_loss = running_loss / len(loader.dataset)

    all_preds = torch.cat(all_preds, dim=0).numpy()
    all_labels = torch.cat(all_labels, dim=0).numpy()

    epoch_accuracy = one_hot_accuracy_score(all_preds, all_labels)

    return epoch_loss, epoch_accuracy

@torch.no_grad()
def validate_one_epoch(start_time, model, loader, loss_function):
    model.eval()
    running_loss = 0.0

    all_preds = []
    all_labels = []

    num_batches = math.ceil(len(loader.dataset) / config.batch_size)
    for batch_number, (x, y) in enumerate(loader):
        print(f't={time.time() - start_time:.2f}: Loading validation batch {batch_number + 1}/{num_batches}')

        x = x.to(config.device, non_blocking=True)
        y = y.to(config.device, non_blocking=True)

        if batch_number == 0:
            print('First image:')
            visualize_image(x[0])

        preds = model(x)
        loss = loss_function(preds, y)

        running_loss += loss.item() * x.size(0)

        all_preds.append(preds.detach().cpu())
        all_labels.append(y.detach().cpu())

    epoch_loss = running_loss / len(loader.dataset)

    all_preds = torch.cat(all_preds, dim=0).numpy()
    all_labels = torch.cat(all_labels, dim=0).numpy()

    epoch_accuracy = one_hot_accuracy_score(all_preds, all_labels)

    return epoch_loss, epoch_accuracy

In [347]:
def train():
    config.init(training=True)

    start_time = time.time()
    print('t=0: Starting data prep and model loading')

    run = wandb.init(
        project='kenyan_food_classifier',
        name=f'run-{int(start_time)}',
        config={
            'batch_size': config.batch_size,
            'learning_rate': config.starting_learning_rate,
            'max_epochs': config.max_epochs,
            'seed': config.seed,
            'model': 'efficientnet_b3',
            'optimizer': 'Adam',
        },
    )

    train_df = pd.read_csv(config.train_csv_filepath)
    train_df['id'] = train_df['id'].astype(str)

    train_df, val_df = train_test_split(
        train_df,
        test_size=0.20,
        random_state=config.seed,
        shuffle=True,
        stratify=train_df['class'],
    )

    def one_hot_encode_classes(df) -> pd.DataFrame:
        one_hot_classes = pd.get_dummies(df['class']).astype(config.np_dtype)
        one_hot_df = pd.concat([df['id'], one_hot_classes], axis='columns')
        return one_hot_df

    train_df = one_hot_encode_classes(train_df)
    val_df = one_hot_encode_classes(val_df)

    train_ids = train_df['id'].to_numpy()
    train_classes = train_df.drop(columns='id').to_numpy()
    val_ids = val_df['id'].to_numpy()
    val_classes = val_df.drop(columns='id').to_numpy()

    model = create_efficient_net_model()

    wandb.watch(model, log="gradients", log_freq=100)

    train_dataset = ImageClassificationDataset(train_ids, train_classes, config.transforms_train)
    val_dataset = ImageClassificationDataset(val_ids, val_classes, config.transforms_val)

    def loader(ds, shuffle):
        return DataLoader(ds, shuffle=shuffle, batch_size=config.batch_size, num_workers=config.num_workers, pin_memory=config.pin_memory)

    train_loader = loader(train_dataset, shuffle=True)
    val_loader = loader(val_dataset, shuffle=False)

    loss_function = nn.CrossEntropyLoss()
    optimizer = TorchOptimizers.Adam(model.parameters(), lr=config.starting_learning_rate)

    best_val_loss = float('inf')
    best_val_loss_epoch = -1
    best_val_accuracy = float('-inf')
    best_val_accuracy_epoch = -1
    best_state_dict = None
    history = dict(train_loss=[], val_loss=[], train_accuracy=[], val_accuracy=[], best_val_loss_epoch=dict())

    training_start_time = time.time()
    print(f't={training_start_time - start_time:.2f}: Starting training')
    torch.manual_seed(config.seed)

    epochs_since_best = 0

    for epoch in range(1, config.max_epochs + 1):
        epoch_start_time = time.time()
        print(f't={epoch_start_time - start_time:.2f}: Starting epoch {epoch}')
        train_loss, train_accuracy = train_one_epoch(start_time, model, train_loader, optimizer, loss_function)
        val_loss, val_accuracy = validate_one_epoch(start_time, model, val_loader, loss_function)

        history['train_loss'].append(train_loss)
        history['val_loss'].append(val_loss)
        history['train_accuracy'].append(train_accuracy)
        history['val_accuracy'].append(val_accuracy)

        print(f'================ Epoch {epoch:03d} stats ==================')
        print(f'train_loss: {train_loss:.4f}  val_loss: {val_loss:.4f}')
        print(f'train_accuracy: {train_accuracy:.4f}  val_accuracy: {val_accuracy:.4f}')
        print('===================================================')

        wandb.log(
            {
                'epoch': epoch,
                'train_loss': train_loss,
                'val_loss': val_loss,
                'train_accuracy': train_accuracy,
                'val_accuracy': val_accuracy,
            }
        )

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_val_loss_epoch = epoch

        if val_accuracy > best_val_accuracy:
            best_val_accuracy = val_accuracy
            best_val_accuracy_epoch = epoch
            epochs_since_best = 0
            best_state_dict = {k: v.cpu().clone() for k, v in model.state_dict().items()}
        else:
            epochs_since_best += 1
            if epochs_since_best >= config.patience:
                break

    if best_state_dict  is not None:
        model.load_state_dict(best_state_dict)

    history['best_val_loss_epoch']['epoch'] = best_val_loss_epoch
    history['best_val_loss_epoch']['val_loss'] = best_val_loss

    print()
    print('==================== Results ======================')
    print(f'Best val accuracy epoch: {best_val_accuracy_epoch}')
    print(f'Best val accuracy: {best_val_accuracy:.4f}')
    print(f'Best val loss epoch: {best_val_loss_epoch}')
    print(f'Best val loss: {best_val_loss:.2f}')
    print('===================================================')
    print()

    wandb.run.summary['best_val_accuracy'] = best_val_accuracy
    wandb.run.summary['best_val_accuracy_epoch'] = best_val_accuracy_epoch
    wandb.run.summary['best_val_loss'] = best_val_loss
    wandb.run.summary['best_val_loss_epoch'] = best_val_loss_epoch

    train_loss = history['train_accuracy']
    val_loss = history['val_accuracy']
    epochs = list(range(1, len(train_loss) + 1))

    plt.figure(figsize=(8, 5))

    plt.plot(epochs, train_loss, label='train_accuracy', marker='o')
    plt.plot(epochs, val_loss, label='val_accuracy', marker='o')

    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.title('Train vs Validation Accuracy per Epoch')
    plt.legend()
    plt.grid(True)

    plt.tight_layout()

    wandb.log({"accuracy_curve": wandb.Image(plt.gcf())})

    plt.show()

    torch.save(model.state_dict(), config.training_output_folder + 'model_weights.pth')
    with open(config.training_output_folder + 'history.json', 'w') as json_file:
        json.dump(history, json_file, indent=4)

    wandb.save(config.training_output_folder + 'model_weights.pth')
    wandb.save(config.training_output_folder + 'history.json')

In [None]:
def load_saved_model() -> nn.Module:
    loaded_model = create_efficient_net_model()
    saved_model_weights = torch.load('data_gen/training_output/model_weights.pth', weights_only=True, map_location='cpu')
    loaded_model.load_state_dict(saved_model_weights)
    loaded_model.eval()
    return loaded_model

In [None]:
def pred_single_image(model, image_id) -> str:
    image = config.transforms_val(load_pil_image_from_id(image_id))
    image = image.unsqueeze(0) # model expects image batches, so must add a 4th dimension to input image
    pred = model(image).detach().cpu().numpy() # one-hot encoded prediction
    pred_index = np.argmax(pred, axis=1)[0] # index of highest value among predictions
    top_pred = config.classes_list[pred_index]
    return top_pred

In [348]:
def predict_val():
    """ For verifying that the saved weights load and predict correctly on the original validation dataset """
    config.init(training=False)

    train_df = pd.read_csv(config.train_csv_filepath)
    train_df['id'] = train_df['id'].astype(str)

    _, val_df = train_test_split(
        train_df,
        test_size=0.20,
        random_state=config.seed,
        shuffle=True,
        stratify=train_df['class'],
    )

    loaded_model = load_saved_model()

    preds_df = pd.DataFrame(columns=['image_id', 'actual', 'pred'])

    with torch.no_grad():
        for index, image_id, true_class in val_df.itertuples():
            pred = pred_single_image(loaded_model, image_id)
            preds_df.loc[len(preds_df)] = [image_id, true_class, pred]

    return preds_df

In [349]:
def predict_test():
    """ For generating predictions on hidden test set """
    config.init(training=False)
    test_df = pd.read_csv(config.test_csv_filepath)['id'].astype(str)

    loaded_model = load_saved_model()

    preds_df = pd.DataFrame(columns=['id', 'class'])

    with torch.no_grad():
        for image_id in test_df:
            pred = pred_single_image(loaded_model, image_id)
            preds_df.loc[len(preds_df)] = [image_id, pred]

    preds_df.to_csv(config.submission_filepath, index=False)
    return preds_df

In [351]:
config = local_config
submission_df = predict_test()
submission_df

Unnamed: 0,id,class
0,9156739011499789258,nyamachoma
1,2049465964503133373,kachumbari
2,6446998501027132988,nyamachoma
3,4194396063119815321,ugali
4,9018117998187006009,bhaji
...,...,...
1633,18302448610371772604,githeri
1634,15920672464676076400,chapati
1635,3232020170382870007,bhaji
1636,3094804487341098468,kachumbari
