<a href="https://colab.research.google.com/github/nanubee/mainproject-defect-detection/blob/main/notebooks/Day2_Hybrid_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [18]:
# Install required libraries: PyTorch Lightning and 'timm' (recommended for ViT/MobileNet)
!pip install pytorch-lightning timm

# Standard Imports
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
import pytorch_lightning as pl
import timm

# --- Connect Google Drive ---
# This command will prompt you to authorize Colab to access your Drive files.
from google.colab import drive
drive.mount('/content/gdrive')

# Define the root directory where your dataset is located on your Drive
# **IMPORTANT: REPLACE THIS PATH with the actual folder containing your dataset!**
DATA_DIR = "/content/drive/MyDrive/Automobile-Defect-Detection/Classification_Data_FINAL"

print(f"Libraries installed, Google Drive mounted, and DATA_DIR set to: {DATA_DIR}")

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
Libraries installed, Google Drive mounted, and DATA_DIR set to: /content/drive/MyDrive/Automobile-Defect-Detection/Classification_Data_FINAL


In [21]:
# PASTE THE PATH YOU COPIED HERE, replacing this placeholder string
CORRECT_TRAIN_PATH = "/content/gdrive/MyDrive/Automobile-Defect-Detection/Classification_Data_FINAL/train"
print(f"Copied Path: {CORRECT_TRAIN_PATH}")

# Now, we redefine the DATA_DIR using the successfully copied path
# We remove '/train' from the end to get the root data directory
DATA_DIR = CORRECT_TRAIN_PATH.rsplit('/train', 1)[0]
print(f"Corrected DATA_DIR: {DATA_DIR}")

Copied Path: /content/gdrive/MyDrive/Automobile-Defect-Detection/Classification_Data_FINAL/train
Corrected DATA_DIR: /content/gdrive/MyDrive/Automobile-Defect-Detection/Classification_Data_FINAL


In [22]:
import os
from torch.utils.data import DataLoader
from torchvision import transforms, datasets
import pytorch_lightning as pl
import torch

# DATA_DIR is now defined from the path copied in the previous step
DATA_DIR = "/content/gdrive/MyDrive/Automobile-Defect-Detection/Classification_Data_FINAL"

# Define standard transformations for MobileNetV2 and ViT (224x224 input)
IMAGE_TRANSFORM = transforms.Compose([
    transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

class HybridDataModule(pl.LightningDataModule):
    def __init__(self, data_dir: str, batch_size: int = 32):
        super().__init__()
        self.data_dir = data_dir
        self.batch_size = batch_size
        self.transform = IMAGE_TRANSFORM

        self.train_dir = os.path.join(self.data_dir, 'train')
        self.val_dir = os.path.join(self.data_dir, 'valid')
        self.test_dir = os.path.join(self.data_dir, 'test')

    def setup(self, stage=None):
        try:
            print("Loading training dataset...")
            self.train_dataset = datasets.ImageFolder(root=self.train_dir, transform=self.transform)
            print("Loading validation dataset...")
            self.val_dataset = datasets.ImageFolder(root=self.val_dir, transform=self.transform)
            self.test_dataset = datasets.ImageFolder(root=self.test_dir, transform=self.transform)

            self.num_classes = len(self.train_dataset.classes)
            print(f"Dataset loaded successfully. Found {self.num_classes} classes.")

        except Exception as e:
            print(f"⚠️ Warning: Failed to load real data. Error: {e}. Creating DUMMY data.")
            self.num_classes = 3

            # DUMMY DATA CREATION
            self.train_dataset = [(torch.randn(3, 224, 224), torch.randint(0, self.num_classes, (1,)).item()) for _ in range(100)]
            self.val_dataset = [(torch.randn(3, 224, 224), torch.randint(0, self.num_classes, (1,)).item()) for _ in range(20)]
            self.test_dataset = [(torch.randn(3, 224, 224), torch.randint(0, self.num_classes, (1,)).item()) for _ in range(20)]


    def train_dataloader(self):
        return DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True, num_workers=2)

    def val_dataloader(self):
        return DataLoader(self.val_dataset, batch_size=self.batch_size, num_workers=2)

    def test_dataloader(self):
        return DataLoader(self.test_dataset, batch_size=self.batch_size, num_workers=2)

# Instantiate the DataModule
dm = HybridDataModule(data_dir=DATA_DIR)
dm.setup()
print("PyTorch Lightning DataModule defined and initialized.")
print(f"Number of Classes: {dm.num_classes}")

Loading training dataset...
Loading validation dataset...
Dataset loaded successfully. Found 3 classes.
PyTorch Lightning DataModule defined and initialized.
Number of Classes: 3


In [25]:
import timm
import torch.nn as nn
from torchvision import models
from torch.nn import functional as F
import torch
import pytorch_lightning as pl

class HybridFeatureExtractor(nn.Module):
    def __init__(self):
        super().__init__()

        # 1. MobileNetV2 (CNN Branch) - CORRECTED
        print("Loading MobileNetV2 (CNN) pre-trained weights...")
        # FIX APPLIED HERE: Using pretrained=True
        self.mobilenet = models.mobilenet_v2(pretrained=True)

        self.mobilenet.classifier = nn.Identity()
        self.mobilenet_output_dim = 1280

        # 2. ViT-Small (Transformer Branch)
        print("Loading ViT-Small (Transformer) pre-trained weights...")
        self.vit = timm.create_model('vit_small_patch16_224', pretrained=True)
        self.vit.head = nn.Identity()
        self.vit_output_dim = 384

        # --- Freeze Feature Extractor Weights (for Transfer Learning) ---
        for param in self.mobilenet.parameters():
            param.requires_grad = False
        for param in self.vit.parameters():
            param.requires_grad = False

    def forward(self, x):
        cnn_features = self.mobilenet(x)
        vit_features = self.vit(x)
        combined_features = torch.cat((cnn_features, vit_features), dim=1)

        return combined_features


class HybridClassifier(pl.LightningModule):
    def __init__(self, num_classes, learning_rate=1e-4):
        super().__init__()
        self.save_hyperparameters()

        self.feature_extractor = HybridFeatureExtractor()

        # Total input features: 1280 (MobileNet) + 384 (ViT) = 1664
        input_dim = self.feature_extractor.mobilenet_output_dim + self.feature_extractor.vit_output_dim

        # Classification Head (Fully Connected Layers)
        self.classifier_head = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(input_dim, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes) # Final layer uses 3 classes
        )

    def forward(self, x):
        features = self.feature_extractor(x)
        logits = self.classifier_head(features)
        return logits

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y)
        self.log('train_loss', loss)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y)
        self.log('val_loss', loss, on_step=False, on_epoch=True)
        preds = torch.argmax(logits, dim=1)
        accuracy = (preds == y).float().mean()
        self.log('val_acc', accuracy, on_step=False, on_epoch=True)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.hparams.learning_rate)
        return optimizer

# Test the instantiation
try:
    # dm must be defined from the successful Step 2B run
    hybrid_model_pl = HybridClassifier(num_classes=dm.num_classes)
    print("\nHybrid Model successfully instantiated!")
    print(f"Total input features to classifier head: 1664 (MobileNet + ViT)")
except Exception as e:
    print(f"\nFailed to instantiate model: {e}")



Loading MobileNetV2 (CNN) pre-trained weights...
Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth


100%|██████████| 13.6M/13.6M [00:00<00:00, 118MB/s]


Loading ViT-Small (Transformer) pre-trained weights...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/88.2M [00:00<?, ?B/s]


Hybrid Model successfully instantiated!
Total input features to classifier head: 1664 (MobileNet + ViT)


In [28]:
from torch.utils.data import Dataset
from torchvision.datasets.folder import default_loader
from PIL import UnidentifiedImageError
import torch
import logging
import os
from torch.utils.data import DataLoader
from torchvision import transforms, datasets
import pytorch_lightning as pl

# Configure logging to see which files are skipped
logging.basicConfig(level=logging.WARNING, format='%(levelname)s: %(message)s')

class RobustImageFolder(Dataset):
    """A wrapper for ImageFolder that skips corrupted images."""
    def __init__(self, dataset):
        self.dataset = dataset
        self.loader = default_loader
        self.transform = dataset.transform

    def __getitem__(self, index):
        path, target = self.dataset.samples[index]

        try:
            sample = self.loader(path)

            if self.transform is not None:
                sample = self.transform(sample)

            return sample, target

        except (UnidentifiedImageError, OSError) as e:
            # If the image is corrupted or cannot be read, log the error and recursively call __getitem__
            logging.warning(f"Skipping corrupted file: {path}")

            # Get a random new index to fetch a valid sample instead
            new_index = torch.randint(0, len(self), (1,)).item()
            return self.__getitem__(new_index)

    def __len__(self):
        return len(self.dataset)

In [29]:
# --- CORRECTED DATA_DIR ---
DATA_DIR = "/content/gdrive/MyDrive/Automobile-Defect-Detection/Classification_Data_FINAL"

# Define standard transformations
IMAGE_TRANSFORM = transforms.Compose([
    transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

class HybridDataModule(pl.LightningDataModule):
    def __init__(self, data_dir: str, batch_size: int = 32):
        super().__init__()
        self.data_dir = data_dir
        self.batch_size = batch_size
        self.transform = IMAGE_TRANSFORM

        self.train_dir = os.path.join(self.data_dir, 'train')
        self.val_dir = os.path.join(self.data_dir, 'valid')
        self.test_dir = os.path.join(self.data_dir, 'test')

    def setup(self, stage=None):
        try:
            print("Loading training dataset...")
            # Using ImageFolder to list files, then wrapping it in the RobustImageFolder
            train_base = datasets.ImageFolder(root=self.train_dir, transform=self.transform)
            self.train_dataset = RobustImageFolder(train_base)

            print("Loading validation dataset...")
            val_base = datasets.ImageFolder(root=self.val_dir, transform=self.transform)
            self.val_dataset = RobustImageFolder(val_base)

            print("Loading test dataset...")
            test_base = datasets.ImageFolder(root=self.test_dir, transform=self.transform)
            self.test_dataset = RobustImageFolder(test_base)

            self.num_classes = len(train_base.classes)
            print(f"Dataset loaded successfully. Found {self.num_classes} classes.")

        except Exception as e:
            # Fallback to DUMMY data remains as a safety net
            print(f"⚠️ Warning: Failed to load real data. Error: {e}. Creating DUMMY data.")
            self.num_classes = 3

            self.train_dataset = [(torch.randn(3, 224, 224), torch.randint(0, self.num_classes, (1,)).item()) for _ in range(100)]
            self.val_dataset = [(torch.randn(3, 224, 224), torch.randint(0, self.num_classes, (1,)).item()) for _ in range(20)]
            self.test_dataset = [(torch.randn(3, 224, 224), torch.randint(0, self.num_classes, (1,)).item()) for _ in range(20)]


    def train_dataloader(self):
        return DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True, num_workers=2)

    def val_dataloader(self):
        return DataLoader(self.val_dataset, batch_size=self.batch_size, num_workers=2)

    def test_dataloader(self):
        return DataLoader(self.test_dataset, batch_size=self.batch_size, num_workers=2)

# Instantiate the DataModule - this re-initializes dm with the robust logic
dm = HybridDataModule(data_dir=DATA_DIR)
dm.setup()
print("PyTorch Lightning DataModule defined and initialized.")
print(f"Number of Classes: {dm.num_classes}")

Loading training dataset...
Loading validation dataset...
Loading test dataset...
Dataset loaded successfully. Found 3 classes.
PyTorch Lightning DataModule defined and initialized.
Number of Classes: 3


In [30]:
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelSummary

# --- Instantiate Trainer ---
# accelerator='auto' uses the best available device (GPU in Colab)
# devices=1 uses a single device
# max_epochs is set low for a quick test run
trainer = pl.Trainer(
    accelerator='auto',
    devices=1,
    max_epochs=3,
    enable_model_summary=True,
    # Adding a logger for visualization (optional)
    logger=pl.loggers.TensorBoardLogger("lightning_logs/", name="hybrid_draft")
)

print("\n--- Starting Hybrid Model Training Draft (3 Epochs) ---")
# The .fit() command starts the training loop using your real data
trainer.fit(hybrid_model_pl, dm)

print("\nDraft training complete! The hybrid architecture is functional.")

INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores



--- Starting Hybrid Model Training Draft (3 Epochs) ---
Loading training dataset...


INFO:pytorch_lightning.callbacks.model_summary:
  | Name              | Type                   | Params | Mode 
---------------------------------------------------------------------
0 | feature_extractor | HybridFeatureExtractor | 23.9 M | train
1 | classifier_head   | Sequential             | 854 K  | train
---------------------------------------------------------------------
854 K     Trainable params
23.9 M    Non-trainable params
24.7 M    Total params
98.974    Total estimated model params size (MB)
494       Modules in train mode
0         Modules in eval mode


Loading validation dataset...
Loading test dataset...
Dataset loaded successfully. Found 3 classes.


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]



Validation: |          | 0/? [00:00<?, ?it/s]



Validation: |          | 0/? [00:00<?, ?it/s]



Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=3` reached.



Draft training complete! The hybrid architecture is functional.


In [32]:
import torch
import os

# --- Define Paths ---
# This path is the successfully verified root folder on your Google Drive:
DATA_DIR = "/content/gdrive/MyDrive/Automobile-Defect-Detection/Classification_Data_FINAL"

# Define the filename for the weights
MODEL_FILENAME = 'hybrid_mobilenet_vit_final_weights.pth'

# Define the full path where the weights will be saved (in your Drive folder)
MODEL_PATH = os.path.join(DATA_DIR, MODEL_FILENAME)

print(f"Starting to save model state dictionary to: {MODEL_PATH}")

# 1. Get the state dictionary (the dictionary containing all the weights)
model_weights = hybrid_model_pl.state_dict()

# 2. Use torch.save() to write the weights to the specified path on Google Drive
torch.save(model_weights, MODEL_PATH)

print(f"\n✅ Model weights successfully saved to Google Drive as: {MODEL_FILENAME}")


Starting to save model state dictionary to: /content/gdrive/MyDrive/Automobile-Defect-Detection/Classification_Data_FINAL/hybrid_mobilenet_vit_final_weights.pth

✅ Model weights successfully saved to Google Drive as: hybrid_mobilenet_vit_final_weights.pth


In [38]:
import pandas as pd
import os
import glob
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator

# --- Define Paths ---
DATA_DIR = "/content/gdrive/MyDrive/Automobile-Defect-Detection/Classification_Data_FINAL"
LOG_DIR = "/content/lightning_logs/hybrid_draft"

# 1. Dynamically Find the Latest Version Folder
version_folders = glob.glob(os.path.join(LOG_DIR, "version_*"))

if not version_folders:
    print("❌ ERROR: No version folders found.")
else:
    latest_version_folder = sorted(version_folders, key=os.path.getmtime, reverse=True)[0]

    # Define the path to the event file directory
    EVENTS_FILE_DIR = latest_version_folder
    METRICS_FILE_DEST = os.path.join(DATA_DIR, 'training_history.csv')

    print(f"✅ Reading TensorBoard events from: {EVENTS_FILE_DIR}")

    try:
        # 2. Initialize EventAccumulator to read the logs (FIXED VARIABLE NAME)
        event_acc = EventAccumulator(EVENTS_FILE_DIR)
        event_acc.Reload()

        # 3. Extract the necessary scalar tags (metrics logged in your validation_step)
        # Note: These names are derived from your self.log() calls: 'val_loss', 'val_acc'
        val_loss_events = event_acc.Scalars('val_loss')
        val_acc_events = event_acc.Scalars('val_acc')

        # 4. Build the DataFrame
        history = {
            'epoch': [e.step for e in val_loss_events],
            'val_loss': [e.value for e in val_loss_events],
            'val_acc': [e.value for e in val_acc_events],
        }

        metrics_df = pd.DataFrame(history)

        # 5. Save the DataFrame to Google Drive
        metrics_df.to_csv(METRICS_FILE_DEST, index=False)

        print(f"✅ Training metrics successfully extracted and exported to: {METRICS_FILE_DEST}")
        print("Final Validation Accuracy is located in the 'val_acc' column of the CSV.")

    except Exception as e:
        print(f"⚠️ FINAL FAILURE: Could not read and extract metrics. Error: {e}")

✅ Reading TensorBoard events from: /content/lightning_logs/hybrid_draft/version_1
✅ Training metrics successfully extracted and exported to: /content/gdrive/MyDrive/Automobile-Defect-Detection/Classification_Data_FINAL/training_history.csv
Final Validation Accuracy is located in the 'val_acc' column of the CSV.
