<div align="center" style="background-color:#f9f7d9;
                           padding:10px 20px;
                           border-radius:10px;
                           border:1px solid #f0e68c;">
<h2>🌾 Harvest Classifier Project</h2>
<h4>Hyperparameter Tuning Results</h4>
</div>


In [1]:
import os
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import time
import torchvision.models as models
from matplotlib import pyplot as plt
import optuna

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

<div align="center" style="background-color:#fff3cd; padding:8px 15px; border-radius:8px; border:1px solid #ffeeba;">
<h3>📂 Load Data</h3>
</div>


In [3]:
image_transforms = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.Resize((224, 224)),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [4]:
dataset_path = "./FreshHarvest_Dataset"
dataset = datasets.ImageFolder(dataset_path, transform=image_transforms)

In [5]:
len(dataset)

16000

In [6]:
class_names = dataset.classes
class_names 

['F_Banana',
 'F_Lemon',
 'F_Lulo',
 'F_Mango',
 'F_Orange',
 'F_Strawberry',
 'F_Tamarillo',
 'F_Tomato',
 'S_Banana',
 'S_Lemon',
 'S_Lulo',
 'S_Mango',
 'S_Orange',
 'S_Strawberry',
 'S_Tamarillo',
 'S_Tomato']

In [7]:
num_classes = len(dataset.classes)
num_classes

16

In [8]:
train_size = int(0.75*len(dataset))
val_size = len(dataset) - train_size

train_size, val_size

(12000, 4000)

In [9]:
from torch.utils.data import random_split

train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

In [10]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=True)

<div align="center" style="background-color:#fff3cd; padding:8px 15px; border-radius:8px; border:1px solid #ffeeba;">
<h3>🧠 Model Training & Hyperparameter Tuning Using Optuna</h3>
</div>

In [11]:
# Load the pre-trained ResNet model
class HarvestClassifierResNet(nn.Module):
    def __init__(self, num_classes, dropout_rate=0.5):
        super().__init__()
        self.model = models.resnet50(weights='DEFAULT')
        # Freeze all layers except the final fully connected layer
        for param in self.model.parameters():
            param.requires_grad = False
            
        # Unfreeze layer4 and fc layers
        for param in self.model.layer4.parameters():
            param.requires_grad = True            
            
        # Replace the final fully connected layer
        self.model.fc = nn.Sequential(
            nn.Dropout(dropout_rate),
            nn.Linear(self.model.fc.in_features, num_classes)
        )

    def forward(self, x):
        x = self.model(x)
        return x

In [14]:
# Define the objective function for Optuna
def objective(trial):
    # Suggest values for the hyperparameters
    lr = trial.suggest_float('lr', 1e-5, 1e-2, log=True)
    dropout_rate = trial.suggest_float('dropout_rate', 0.2, 0.7)
    
    # Load the model
    model = HarvestClassifierResNet(num_classes=num_classes, dropout_rate=dropout_rate).to(device)
    
    # Define the loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=lr)
    
    # Training loop (using fewer epochs for faster hyperparameter tuning)
    epochs = 3
    start = time.time()
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for batch_num, (images, labels) in enumerate(train_loader):
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * images.size(0)

        epoch_loss = running_loss / len(train_loader.dataset)
        
        # Validation loop
        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        accuracy = 100 * correct / total
        
        # Report intermediate result to Optuna
        trial.report(accuracy, epoch)
        
        # Handle pruning (if applicable)
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    end = time.time()
    print(f"Execution time: {end - start} seconds")
    
    return accuracy

In [15]:
# Create the study and optimize
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=20)

[I 2025-10-21 16:09:23,499] A new study created in memory with name: no-name-e94198b4-4c74-4d81-a0d7-501b7fa84e96
[I 2025-10-21 17:30:28,441] Trial 0 finished with value: 99.6 and parameters: {'lr': 7.387264434645023e-05, 'dropout_rate': 0.4391107410937022}. Best is trial 0 with value: 99.6.


Execution time: 4864.277827739716 seconds


[I 2025-10-21 18:53:42,272] Trial 1 finished with value: 99.125 and parameters: {'lr': 1.184549673826961e-05, 'dropout_rate': 0.605581493239805}. Best is trial 0 with value: 99.6.


Execution time: 4993.286517620087 seconds


[I 2025-10-21 20:04:45,689] Trial 2 finished with value: 99.85 and parameters: {'lr': 0.0005080289521971769, 'dropout_rate': 0.31056388232009735}. Best is trial 2 with value: 99.85.


Execution time: 4262.826082468033 seconds


[I 2025-10-21 20:58:54,540] Trial 3 finished with value: 99.575 and parameters: {'lr': 1.5977363872336738e-05, 'dropout_rate': 0.5374008314566319}. Best is trial 2 with value: 99.85.


Execution time: 3248.5497648715973 seconds


[I 2025-10-21 21:52:56,290] Trial 4 finished with value: 99.9 and parameters: {'lr': 0.0001073294451814306, 'dropout_rate': 0.6033590800832881}. Best is trial 4 with value: 99.9.


Execution time: 3241.4819300174713 seconds


[I 2025-10-21 22:46:38,581] Trial 5 finished with value: 99.775 and parameters: {'lr': 0.0013346871235203615, 'dropout_rate': 0.34384163090417064}. Best is trial 4 with value: 99.9.


Execution time: 3221.7860808372498 seconds


[I 2025-10-21 23:47:15,277] Trial 6 finished with value: 99.775 and parameters: {'lr': 0.00011202035135942334, 'dropout_rate': 0.3631964198154356}. Best is trial 4 with value: 99.9.


Execution time: 3636.428502559662 seconds


[I 2025-10-22 00:05:45,819] Trial 7 pruned. 
[I 2025-10-22 00:24:10,744] Trial 8 pruned. 
[I 2025-10-22 00:42:19,907] Trial 9 pruned. 
[I 2025-10-22 01:36:41,373] Trial 10 finished with value: 99.8 and parameters: {'lr': 7.852751617018129e-05, 'dropout_rate': 0.21835947777966028}. Best is trial 4 with value: 99.9.


Execution time: 3261.1672275066376 seconds


[I 2025-10-22 01:54:51,447] Trial 11 pruned. 
[I 2025-10-22 02:12:58,079] Trial 12 pruned. 
[I 2025-10-22 02:31:01,760] Trial 13 pruned. 
[I 2025-10-22 02:49:07,445] Trial 14 pruned. 
[I 2025-10-22 03:07:11,314] Trial 15 pruned. 
[I 2025-10-22 03:25:13,037] Trial 16 pruned. 
[I 2025-10-22 03:43:15,089] Trial 17 pruned. 
[I 2025-10-22 04:01:21,600] Trial 18 pruned. 
[I 2025-10-22 04:19:27,189] Trial 19 pruned. 


In [16]:
study.best_params

{'lr': 0.0001073294451814306, 'dropout_rate': 0.6033590800832881}

<div align="center" style="background-color:#f0f8ff; padding:15px 25px; border-radius:12px; border:1px solid #add8e6;">

<h2 style="color:#0077b6;">🔍 Best Hyperparameters Found</h2>

<p style="font-size:16px; color:#333;">
After performing extensive <b>hyperparameter tuning</b>, the following configuration achieved the best performance:
</p>

<table style="width:60%; margin:auto; border-collapse:collapse;">
  <tr style="background-color:#0077b6; color:white;">
    <th style="padding:10px; border:1px solid #ccc;">Parameter</th>
    <th style="padding:10px; border:1px solid #ccc;">Best Value</th>
  </tr>
  <tr style="background-color:#e8f4fa;">
    <td style="padding:8px; border:1px solid #ccc;"><b>Learning Rate (<code>lr</code>)</b></td>
    <td style="padding:8px; border:1px solid #ccc;"><code>0.0001073294451814306</code></td>
  </tr>
  <tr style="background-color:#f7fbfe;">
    <td style="padding:8px; border:1px solid #ccc;"><b>Dropout Rate (<code>dropout_rate</code>)</b></td>
    <td style="padding:8px; border:1px solid #ccc;"><code>0.6033590800832881</code></td>
  </tr>
</table>

<p style="font-size:15px; color:#444; margin-top:10px;">
✅ These hyperparameters provided the most <b>optimal training stability</b> and <b>validation performance</b> for the Harvest Classifier model.
</p>

</div>
