# 🦟 Mosquito Detection Model Training

This notebook trains a deep learning model to detect mosquitoes in images using transfer learning with EfficientNet-B0.

## Important Note
To prevent Git from detecting output changes, make sure to clear all cell outputs before committing:
- **VS Code**: Use `Notebook: Clear All Outputs` from the command palette
- **Jupyter**: Cell → All Output → Clear

## Training Process
1. Load and preprocess the dataset
2. Set up transfer learning with EfficientNet-B0
3. Train the model using feature extraction
4. Save the trained model for deployment

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
from torchvision.models import EfficientNet_B0_Weights
import time
import copy

In [None]:
# device config
device = torch.device('mps' if torch.backends.mps.is_available() else 'cpu')

# Hyper params
num_epochs = 20
batch_size = 32

In [None]:
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((224,224)),
        transforms.RandomHorizontalFlip(0.5),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),

    'test': transforms.Compose([
        transforms.Resize((224,224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
}

In [None]:
# Load the dataset from your data folder
data_dir = '/Volumes/Sandisk/Pythonprojects/Mosquito-Detection/data'  # Adjust this path

# Load full dataset
full_dataset = datasets.ImageFolder(data_dir, transform=data_transforms['train'])

# Get class names
class_names = full_dataset.classes
print(f"Classes found: {class_names}")

# Calculate split sizes
total_size = len(full_dataset)
train_size = int(total_size * 0.8)
test_size = total_size - train_size

print(f'Train images size: {train_size}')
print(f'Test images size: {test_size}')

In [None]:
# Split dataset
train_dataset, test_dataset = torch.utils.data.random_split(full_dataset, [train_size, test_size])

In [None]:
train_loader = torch.utils.data.DataLoader(dataset = train_dataset, batch_size = batch_size, shuffle= True)
test_loader = torch.utils.data.DataLoader(dataset = test_dataset, batch_size = batch_size, shuffle= False)

print('Dataset split complete')

In [None]:
# Use the already created train_dataset and test_dataset
image_datasets = {'train': train_dataset, 'val': test_dataset}
dataloaders = {
    'train': train_loader,
    'val': test_loader
}
dataset_sizes = {'train': train_size, 'val': test_size}
# class_names is already defined in the notebook

In [None]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=20):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range (num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs-1))
        print('-' * 10)

        # Each epoch has training and validation phase
        for phase in ['train', 'val']:
            if phase =='train':
                model.train()
            else:
               model.eval()

            running_loss = 0.0
            running_corrects = 0
            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        optimizer.zero_grad()
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.float() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model


In [None]:
# Use this code if you want full fine tuning (training from scratch) instead of transfer learning.
# model = models.efficientnet_b0(weights=EfficientNet_B0_Weights.DEFAULT)
# num_ftrs = model.classifier[1].in_features

# model.classifier[1] = nn.Linear(num_ftrs, 2)
# model = model.to(device)

# criterion = nn.CrossEntropyLoss()
# optimizer = optim.SGD(model.parameters(), lr = 0.001)

# step_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

In [None]:
model_conv = torchvision.models.efficientnet_b0(weights=EfficientNet_B0_Weights.DEFAULT)
criterion = nn.CrossEntropyLoss()

for param in model_conv.parameters():
    param.requires_grad = False

In [None]:
num_ftrs = model_conv.classifier[1].in_features
model_conv.classifier[1] = nn.Linear(num_ftrs, 2)

model_conv = model_conv.to(device)

In [None]:
optimizer_conv = optim.SGD(model_conv.classifier[1].parameters(), lr=0.001, momentum=0.9)

In [None]:
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_conv, step_size=5, gamma = 0.1)

model_conv = train_model(model_conv, criterion, optimizer_conv, exp_lr_scheduler, num_epochs=num_epochs)

In [None]:
# Create model directory and save the trained model
import os

# Create model directory if it doesn't exist
model_dir = 'model'
os.makedirs(model_dir, exist_ok=True)

# Save the trained model state dict
model_path = os.path.join(model_dir, 'mosquito_detection_model.pth')
torch.save(model_conv.state_dict(), model_path)

# Also save the class names for later use
class_names_path = os.path.join(model_dir, 'class_names.txt')
with open(class_names_path, 'w') as f:
    for class_name in class_names:
        f.write(f"{class_name}\n")

print(f"Model saved to: {model_path}")
print(f"Class names saved to: {class_names_path}")
print(f"Classes: {class_names}")