https://www.tensorflow.org/tutorials/images/transfer_learning#create_the_base_model_from_the_pre-trained_convnets

In [2]:
from sklearn.model_selection import train_test_split
from PIL import Image
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import os
import cv2
import pandas as pd
from google.colab import drive
from typing import List

In [3]:
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
class LoadDataset:
  def __init__(self, path: str):
    self.path = path

  def collect_image_files(self) -> List[str]:
    """
    Collects image files from the specified directory.

    Returns:
        List[str]: List of image file paths.
    """
    image_files = []
    for dirname, _, filenames in os.walk(self.path):
      for filename in filenames:
        image_files.append(os.path.join(dirname, filename))
    return image_files

  def show_images(self, images_list: List[str], rows: int=10, cols: int=10):
    """
    Displays a grid of images.

    Args:
        images_list (List[str]): List of image file paths.
    """
    fig, axes = plt.subplots(rows, cols, figsize=(20, 20))
    axes = axes.flatten()
    for i, image_path in enumerate(images_list):
      if i >= rows * cols:
        break
      try:
        img = cv.imread(image_path)
        if img is None:
          raise Exception(f"Failed to load image: {image_path}")

        axes[i].imshow(img)

      except Exception as e:
        print(f"Error processing image {image_path}: {str(e)}")

    plt.tight_layout()
    plt.show()

In [5]:
folder_a = "/content/drive/MyDrive/24-25 Academic Year/CS156/Second Pipeline/0. Dataset/Raw Data/letter_a_samples"
folder_aw = "/content/drive/MyDrive/24-25 Academic Year/CS156/Second Pipeline/0. Dataset/Raw Data/letter_aw_samples"
folder_aa = "/content/drive/MyDrive/24-25 Academic Year/CS156/Second Pipeline/0. Dataset/Raw Data/letter_aa_samples"
folder_d = "/content/drive/MyDrive/24-25 Academic Year/CS156/Second Pipeline/0. Dataset/Raw Data/letter_d_samples"
folder_dd = "/content/drive/MyDrive/24-25 Academic Year/CS156/Second Pipeline/0. Dataset/Raw Data/letter_dd_samples"
folder_e = "/content/drive/MyDrive/24-25 Academic Year/CS156/Second Pipeline/0. Dataset/Raw Data/letter_e_samples"
folder_ee = "/content/drive/MyDrive/24-25 Academic Year/CS156/Second Pipeline/0. Dataset/Raw Data/letter_ee_samples"
folder_o = "/content/drive/MyDrive/24-25 Academic Year/CS156/Second Pipeline/0. Dataset/Raw Data/letter_o_samples"
folder_oo = "/content/drive/MyDrive/24-25 Academic Year/CS156/Second Pipeline/0. Dataset/Raw Data/letter_oo_samples"
folder_ow = "/content/drive/MyDrive/24-25 Academic Year/CS156/Second Pipeline/0. Dataset/Raw Data/letter_ow_samples"
folder_u = "/content/drive/MyDrive/24-25 Academic Year/CS156/Second Pipeline/0. Dataset/Raw Data/letter_u_samples"
folder_uw = "/content/drive/MyDrive/24-25 Academic Year/CS156/Second Pipeline/0. Dataset/Raw Data/letter_uw_samples"

In [6]:
letter_a = LoadDataset(folder_a)
images_a = letter_a.collect_image_files()
letter_aw = LoadDataset(folder_aw)
images_aw = letter_aw.collect_image_files()
letter_aa = LoadDataset(folder_aa)
images_aa = letter_aa.collect_image_files()
letter_d = LoadDataset(folder_d)
images_d = letter_d.collect_image_files()
letter_dd = LoadDataset(folder_dd)
images_dd = letter_dd.collect_image_files()
letter_e = LoadDataset(folder_e)
images_e = letter_e.collect_image_files()
letter_ee = LoadDataset(folder_ee)
images_ee = letter_ee.collect_image_files()
letter_o = LoadDataset(folder_o)
images_o = letter_o.collect_image_files()
letter_oo = LoadDataset(folder_oo)
images_oo = letter_oo.collect_image_files()
letter_ow = LoadDataset(folder_ow)
images_ow = letter_ow.collect_image_files()
letter_u = LoadDataset(folder_u)
images_u = letter_u.collect_image_files()
letter_uw = LoadDataset(folder_uw)
images_uw = letter_uw.collect_image_files()

In [7]:
# Label data
a_labeled_images = {a_image: 'a' for a_image in images_a}
aw_labeled_images = {aw_image: 'ă' for aw_image in images_aw}
aa_labeled_images = {aa_image: 'â' for aa_image in images_aa}
d_labeled_images = {d_image: 'd' for d_image in images_d}
dd_labeled_images = {dd_image: 'đ' for dd_image in images_dd}
e_labeled_images = {e_image: 'e' for e_image in images_e}
ee_labeled_images = {ee_image: 'ê' for ee_image in images_ee}
o_labeled_images = {o_image: 'o' for o_image in images_o}
oo_labeled_images = {oo_image: 'ô' for oo_image in images_oo}
ow_labeled_images = {ow_image: 'ơ' for ow_image in images_ow}
u_labeled_images = {u_image: 'u' for u_image in images_u}
uw_labeled_images = {uw_image: 'ư' for uw_image in images_uw}

In [8]:
all_images = {**a_labeled_images, **aw_labeled_images, **aa_labeled_images, \
              **d_labeled_images, **dd_labeled_images, \
              **e_labeled_images, **ee_labeled_images, \
              **o_labeled_images, **oo_labeled_images, **ow_labeled_images, \
              **u_labeled_images, **uw_labeled_images}

In [9]:
# Convert data into dataframe
df = pd.DataFrame(list(all_images.items()), columns=['image_file', 'label'])
df

Unnamed: 0,image_file,label
0,/content/drive/MyDrive/24-25 Academic Year/CS1...,a
1,/content/drive/MyDrive/24-25 Academic Year/CS1...,a
2,/content/drive/MyDrive/24-25 Academic Year/CS1...,a
3,/content/drive/MyDrive/24-25 Academic Year/CS1...,a
4,/content/drive/MyDrive/24-25 Academic Year/CS1...,a
...,...,...
1195,/content/drive/MyDrive/24-25 Academic Year/CS1...,ư
1196,/content/drive/MyDrive/24-25 Academic Year/CS1...,ư
1197,/content/drive/MyDrive/24-25 Academic Year/CS1...,ư
1198,/content/drive/MyDrive/24-25 Academic Year/CS1...,ư


In [None]:
import torch
import torch.nn as nn
from torchvision import transforms, models
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from sklearn.model_selection import train_test_split
import numpy as np
from torch.cuda.amp import autocast, GradScaler
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix

# Data augmentation that preserves full image content
train_transforms = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize while maintaining aspect ratio
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(
        brightness=0.2,
        contrast=0.2,
        saturation=0.2,
        hue=0.1
    ),
    transforms.RandomRotation(15),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

# Simple resize for validation/testing
val_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

class CustomImageDataset(Dataset):
    """
    Dataset class with error handling and class balancing capabilities
    """
    def __init__(self, dataframe, transform=None, phase='train'):
        self.dataframe = dataframe
        self.transform = transform
        self.phase = phase

        # Create label mapping
        self.label_to_idx = {label: idx for idx, label in enumerate(dataframe['label'].unique())}
        self.idx_to_label = {idx: label for label, idx in self.label_to_idx.items()}
        self.num_classes = len(self.label_to_idx)

        # Calculate class weights for potential weighted sampling
        class_counts = dataframe['label'].value_counts()
        total_samples = len(dataframe)
        self.class_weights = torch.tensor([total_samples / (self.num_classes * count)
                                         for count in class_counts])

        print(f"Dataset initialized for {phase} with {self.num_classes} classes")
        print("Class distribution:", class_counts.to_dict())

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        try:
            img_path = self.dataframe.iloc[idx]['image_file']
            label = self.dataframe.iloc[idx]['label']

            # Load and convert image with error handling
            try:
                image = Image.open(img_path).convert('RGB')
            except Exception as e:
                print(f"Error loading image {img_path}: {e}")
                image = Image.new('RGB', (224, 224))

            if self.transform:
                image = self.transform(image)

            label_idx = self.label_to_idx[label]
            return image, label_idx

        except Exception as e:
            print(f"Error in dataset.__getitem__: {e}")
            return torch.zeros((3, 224, 224)), 0

class TransferModel(nn.Module):
    """
    Enhanced MobileNetV2 model with custom classifier
    """
    def __init__(self, num_classes):
        super().__init__()
        # Load pretrained model
        self.backbone = models.mobilenet_v2(pretrained=True)

        # Freeze initial layers
        for param in self.backbone.parameters():
            param.requires_grad = False

        # Unfreeze final blocks for fine-tuning
        for layer in self.backbone.features[-3:]:
            for param in layer.parameters():
                param.requires_grad = True

        # Enhanced classifier with dropout and batch normalization
        self.classifier = nn.Sequential(
            nn.Linear(self.backbone.last_channel, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(512, num_classes)
        )

        # Replace original classifier
        self.backbone.classifier = self.classifier

    def forward(self, x):
        return self.backbone(x)

def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler,
                num_epochs, device):
    """
    Training loop with mixed precision and monitoring
    """
    scaler = GradScaler()
    best_acc = 0.0
    patience = 5
    patience_counter = 0
    history = {'train_loss': [], 'train_acc': [], 'val_loss': [], 'val_acc': []}

    for epoch in range(num_epochs):
        print(f'Epoch {epoch+1}/{num_epochs}')
        print('-' * 10)

        for phase in ['train', 'val']:
            running_loss = 0.0
            running_corrects = 0

            if phase == 'train':
                model.train()
                dataloader = train_loader
            else:
                model.eval()
                dataloader = val_loader

            for inputs, labels in dataloader:
                inputs = inputs.to(device)
                labels = labels.to(device)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    with autocast():
                        outputs = model(inputs)
                        loss = criterion(outputs, labels)

                    if phase == 'train':
                        scaler.scale(loss).backward()
                        # Gradient clipping
                        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
                        scaler.step(optimizer)
                        scaler.update()

                running_loss += loss.item() * inputs.size(0)
                _, preds = torch.max(outputs, 1)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(dataloader.dataset)
            epoch_acc = running_corrects.double() / len(dataloader.dataset)

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            history[f'{phase}_loss'].append(epoch_loss)
            history[f'{phase}_acc'].append(epoch_acc.item())

            if phase == 'val':
                scheduler.step(epoch_loss)

                if epoch_acc > best_acc:
                    best_acc = epoch_acc
                    torch.save({
                        'epoch': epoch,
                        'model_state_dict': model.state_dict(),
                        'optimizer_state_dict': optimizer.state_dict(),
                        'best_acc': best_acc,
                    }, 'best_model.pth')
                    patience_counter = 0
                else:
                    patience_counter += 1

                if patience_counter >= patience:
                    print("Early stopping triggered")
                    return model, history

    return model, history

def evaluate_model(model, test_loader, device):
    """
    Comprehensive model evaluation with confusion matrix
    """
    model.eval()
    all_preds = []
    all_labels = []
    running_corrects = 0

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

            running_corrects += torch.sum(preds == labels.data)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    test_acc = running_corrects.double() / len(test_loader.dataset)
    print(f'Test Accuracy: {test_acc:.4f}')

    # Create confusion matrix
    cm = confusion_matrix(all_labels, all_preds)
    return test_acc, cm

def main():
    # Assuming df is your DataFrame with 'image_file' and 'label' columns
    train_df, temp_df = train_test_split(df, test_size=0.3, stratify=df['label'])
    val_df, test_df = train_test_split(temp_df, test_size=0.5, stratify=temp_df['label'])

    # Create datasets
    train_dataset = CustomImageDataset(train_df, transform=train_transforms, phase='train')
    val_dataset = CustomImageDataset(val_df, transform=val_transforms, phase='val')
    test_dataset = CustomImageDataset(test_df, transform=val_transforms, phase='test')

    # Create dataloaders
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4)
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4)

    # Setup device and model
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = TransferModel(train_dataset.num_classes).to(device)

    # Loss function with class weighting if needed
    criterion = nn.CrossEntropyLoss(weight=train_dataset.class_weights.to(device))

    # Optimizer with different learning rates for different parts
    optimizer = torch.optim.AdamW([
        {'params': model.backbone.features[-3:].parameters(), 'lr': 1e-4},
        {'params': model.classifier.parameters(), 'lr': 1e-3}
    ], weight_decay=0.01)

    # Learning rate scheduler
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,
        mode='min',
        factor=0.1,
        patience=2,
        verbose=True
    )

    # Train model
    model, history = train_model(
        model, train_loader, val_loader, criterion, optimizer, scheduler,
        num_epochs=10, device=device
    )

    # Evaluate model
    test_acc, confusion_mat = evaluate_model(model, test_loader, device)

    return model, history, test_acc, confusion_mat

if __name__ == "__main__":
    model, history, test_acc, confusion_mat = main()

Dataset initialized for train with 12 classes
Class distribution: {'â': 70, 'd': 70, 'ơ': 70, 'ă': 70, 'o': 70, 'ư': 70, 'e': 70, 'ê': 70, 'u': 70, 'đ': 70, 'ô': 70, 'a': 70}
Dataset initialized for val with 12 classes
Class distribution: {'u': 15, 'đ': 15, 'ê': 15, 'ư': 15, 'd': 15, 'ơ': 15, 'â': 15, 'e': 15, 'ô': 15, 'o': 15, 'ă': 15, 'a': 15}
Dataset initialized for test with 12 classes
Class distribution: {'ê': 15, 'ă': 15, 'đ': 15, 'd': 15, 'ô': 15, 'ư': 15, 'â': 15, 'a': 15, 'u': 15, 'o': 15, 'ơ': 15, 'e': 15}




Epoch 1/10
----------


  scaler = GradScaler()
  with autocast():


train Loss: 0.9064 Acc: 0.7179
val Loss: 4.7228 Acc: 0.0778
Epoch 2/10
----------
