In [None]:
# Import necessary libraries
import os
import matplotlib.pyplot as plt
import random
from PIL import Image

# Set paths for each category
dataset_path = 'Fruit And Vegetable Diseases Dataset'
apple_folder = os.path.join(dataset_path, 'Apple__Healthy')
rotten_apple_folder = os.path.join(dataset_path, 'Apple__Rotten')

# Count images in each folder
apple_images = os.listdir(apple_folder)
rotten_apple_images = os.listdir(rotten_apple_folder)

print(f"Number of images in 'apple' folder: {len(apple_images)}")
print(f"Number of images in 'rotten apples' folder: {len(rotten_apple_images)}")


Number of images in 'apple' folder: 2438
Number of images in 'rotten apples' folder: 2930


In [8]:

# Function to display random sample images from a folder
def display_sample_images(folder, title, sample_size=5):
    images = os.listdir(folder)
    sample_images = random.sample(images, min(sample_size, len(images)))
    
    plt.figure(figsize=(15, 5))
    plt.suptitle(title)
    
    for i, img_name in enumerate(sample_images):
        img_path = os.path.join(folder, img_name)
        img = Image.open(img_path)
        plt.subplot(1, sample_size, i + 1)
        plt.imshow(img)
        plt.axis('off')
    
    plt.show()

# Display sample images from each category
    # TODO: UNcomment later
# display_sample_images(apple_folder, "Sample Images - Apples", sample_size=5)
# display_sample_images(rotten_apple_folder, "Sample Images - Rotten Apples", sample_size=5)

In [10]:
#  Define the target size (e.g., 224x224)
target_size = (224, 224)

# Create directories to save resized images (optional step to keep original images intact)
resized_path = 'Resized_Fruit_And_Vegetable_Diseases_Dataset'
os.makedirs(os.path.join(resized_path, 'apple'), exist_ok=True)
os.makedirs(os.path.join(resized_path, 'rotten apples'), exist_ok=True)

def resize_images(input_folder, output_folder, size):
    # Resize and save each image in the specified folder
    for img_name in os.listdir(input_folder):
        img_path = os.path.join(input_folder, img_name)
        try:
            img = Image.open(img_path)
            img = img.resize(size)  # Resize to target size
            if img.mode == 'RGBA':
                img = img.convert('RGB')

            img.save(os.path.join(output_folder, img_name))  # Save resized image
        except Exception as e:
            print(f"Error processing {img_name}: {e}")

# Resize images in both 'apple' and 'rotten apples' folders
resize_images(apple_folder, os.path.join(resized_path, 'apple'), target_size)
resize_images(rotten_apple_folder, os.path.join(resized_path, 'rotten apples'), target_size)

print("Resizing complete. All images are now 224x224.")

Resizing complete. All images are now 224x224.


In [11]:

# Define paths to the resized folders
resized_apple_folder = 'Resized_Fruit_And_Vegetable_Diseases_Dataset/apple'
resized_rotten_apple_folder = 'Resized_Fruit_And_Vegetable_Diseases_Dataset/rotten apples'

# Count the images in each folder
num_apple_images = len(os.listdir(resized_apple_folder))
num_rotten_apple_images = len(os.listdir(resized_rotten_apple_folder))

# Print the counts
print(f"Number of resized images in 'apple' folder: {num_apple_images}")
print(f"Number of resized images in 'rotten apples' folder: {num_rotten_apple_images}")

Number of resized images in 'apple' folder: 2438
Number of resized images in 'rotten apples' folder: 2930


In [14]:

import torchvision.transforms as transforms

# Define augmentation pipeline using torchvision
augmentation = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(degrees=45),
    transforms.RandomResizedCrop(size=(224, 224), scale=(0.8, 1.2)),
    transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5),
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
])

# Define paths to the resized folders for healthy and rotten apples
apple_folder = 'Resized_Fruit_And_Vegetable_Diseases_Dataset/apple'
rotten_apple_folder = 'Resized_Fruit_And_Vegetable_Diseases_Dataset/rotten apples'

# Number of augmented images to generate per original image
augment_per_image = 5

# Function to augment images in a given folder and save them back to the same folder
def augment_images(folder, augment_pipeline, num_augmentations=5):
    for img_name in os.listdir(folder):
        img_path = os.path.join(folder, img_name)
        try:
            img = Image.open(img_path)
            for i in range(num_augmentations):
                augmented_img = augment_pipeline(img)
                augmented_img.save(os.path.join(folder, f"{img_name.split('.')[0]}_aug_{i}.jpg"))
        except Exception as e:
            print(f"Error processing {img_name}: {e}")

# Apply augmentation to images in both the "apple" and "rotten apples" folders
augment_images(apple_folder, augmentation, augment_per_image)
augment_images(rotten_apple_folder, augmentation, augment_per_image)

print("Data augmentation complete.")


Data augmentation complete.


In [15]:
# Count the images in each folder
num_apple_images = len(os.listdir(resized_apple_folder))
num_rotten_apple_images = len(os.listdir(resized_rotten_apple_folder))

# Print the counts
print(f"Number of resized images in 'apple' folder: {num_apple_images}")
print(f"Number of resized images in 'rotten apples' folder: {num_rotten_apple_images}")

Number of resized images in 'apple' folder: 6248
Number of resized images in 'rotten apples' folder: 5965


In [17]:
import shutil
from sklearn.model_selection import train_test_split

# Paths to the augmented image folders
apple_folder = 'Resized_Fruit_And_Vegetable_Diseases_Dataset/apple'
rotten_apple_folder = 'Resized_Fruit_And_Vegetable_Diseases_Dataset/rotten apples'

# Define paths for the split dataset
train_path = 'dataset_split/train'
val_path = 'dataset_split/val'
test_path = 'dataset_split/test'

# Create directories for train, validation, and test sets
for split in [train_path, val_path, test_path]:
    os.makedirs(os.path.join(split, 'apple'), exist_ok=True)
    os.makedirs(os.path.join(split, 'rotten apples'), exist_ok=True)

# Function to split and copy images
def split_and_copy_images(folder, label):
    images = os.listdir(folder)
    train_images, temp_images = train_test_split(images, test_size=0.2, random_state=42)  # 80% train, 20% temp
    val_images, test_images = train_test_split(temp_images, test_size=0.5, random_state=42)  # Split remaining into 10% val, 10% test
    
    # Copy images to respective folders
    for img_name in train_images:
        shutil.copy(os.path.join(folder, img_name), os.path.join(train_path, label, img_name))
    for img_name in val_images:
        shutil.copy(os.path.join(folder, img_name), os.path.join(val_path, label, img_name))
    for img_name in test_images:
        shutil.copy(os.path.join(folder, img_name), os.path.join(test_path, label, img_name))

# Split and copy images for both categories
split_and_copy_images(apple_folder, 'apple')
split_and_copy_images(rotten_apple_folder, 'rotten apples')

print("Dataset split into train, validation, and test sets.")

Dataset split into train, validation, and test sets.


In [18]:

# Function to count images in a given folder
def count_images_in_folder(folder):
    apple_count = len(os.listdir(os.path.join(folder, 'apple')))
    rotten_apple_count = len(os.listdir(os.path.join(folder, 'rotten apples')))
    return apple_count, rotten_apple_count

# Count images in each split
train_apple, train_rotten = count_images_in_folder(train_path)
val_apple, val_rotten = count_images_in_folder(val_path)
test_apple, test_rotten = count_images_in_folder(test_path)

# Print the results
print(f"Train set: {train_apple} healthy apples, {train_rotten} rotten apples")
print(f"Validation set: {val_apple} healthy apples, {val_rotten} rotten apples")
print(f"Test set: {test_apple} healthy apples, {test_rotten} rotten apples")

print("\nTotal counts:")
print(f"Healthy apples: {train_apple + val_apple + test_apple}")
print(f"Rotten apples: {train_rotten + val_rotten + test_rotten}")
print(f"Total images: {train_apple + train_rotten + val_apple + val_rotten + test_apple + test_rotten}")

Train set: 4998 healthy apples, 4772 rotten apples
Validation set: 625 healthy apples, 596 rotten apples
Test set: 625 healthy apples, 597 rotten apples

Total counts:
Healthy apples: 6248
Rotten apples: 5965
Total images: 12213


In [20]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader

# 1. Define Data Augmentation and Normalization
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

# Load the dataset splits
data_dir = 'dataset_split'
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x])
                  for x in ['train', 'val']}
dataloaders = {x: DataLoader(image_datasets[x], batch_size=32, shuffle=True, num_workers=4)
               for x in ['train', 'val']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
class_names = image_datasets['train'].classes

# 2. Load ResNet18 Pre-trained Model and Modify the Final Layer
model = models.resnet18(pretrained=True)

# Freeze all layers except the final one
for param in model.parameters():
    param.requires_grad = False

# Modify the final layer for binary classification
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, 2)  # 2 classes: "rotten" and "non-rotten"

# Move the model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# 3. Define Loss Function, Optimizer, and Scheduler
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.fc.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

# 4. Train the Model
num_epochs = 10
for epoch in range(num_epochs):
    print(f'Epoch {epoch+1}/{num_epochs}')
    print('-' * 10)
    
    for phase in ['train', 'val']:
        if phase == 'train':
            model.train()
        else:
            model.eval()
        
        running_loss = 0.0
        running_corrects = 0

        # Iterate over data
        for inputs, labels in dataloaders[phase]:
            inputs, labels = inputs.to(device), labels.to(device)
            
            optimizer.zero_grad()
            
            # Forward pass
            with torch.set_grad_enabled(phase == 'train'):
                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                loss = criterion(outputs, labels)
                
                # Backward pass and optimization in training phase
                if phase == 'train':
                    loss.backward()
                    optimizer.step()
            
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)
        
        # Adjust learning rate
        if phase == 'train':
            scheduler.step()
        
        epoch_loss = running_loss / dataset_sizes[phase]
        epoch_acc = running_corrects.double() / dataset_sizes[phase]
        
        print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')
    
    print()

print("Training complete.")

# 5. Evaluate on Test Set
test_dataset = datasets.ImageFolder(os.path.join(data_dir, 'test'), data_transforms['val'])
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4)
model.eval()
running_corrects = 0

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        running_corrects += torch.sum(preds == labels.data)

test_acc = running_corrects.double() / len(test_dataset)
print(f'Test Accuracy: {test_acc:.4f}')

# Save the model
torch.save(model.state_dict(), 'apple_classifier_resnet18.pth')



Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /Users/suhanishokeen/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:01<00:00, 43.6MB/s]

Epoch 1/10
----------





train Loss: 0.3576 Acc: 0.8397
val Loss: 0.3043 Acc: 0.8624

Epoch 2/10
----------
train Loss: 0.2592 Acc: 0.8904
val Loss: 0.1753 Acc: 0.9345

Epoch 3/10
----------
train Loss: 0.2481 Acc: 0.8966
val Loss: 0.1561 Acc: 0.9443

Epoch 4/10
----------
train Loss: 0.2278 Acc: 0.9026
val Loss: 0.1819 Acc: 0.9312

Epoch 5/10
----------
train Loss: 0.2229 Acc: 0.9044
val Loss: 0.1771 Acc: 0.9263

Epoch 6/10
----------
train Loss: 0.2127 Acc: 0.9093
val Loss: 0.1900 Acc: 0.9197

Epoch 7/10
----------
train Loss: 0.2285 Acc: 0.9048
val Loss: 0.1363 Acc: 0.9484

Epoch 8/10
----------
train Loss: 0.2062 Acc: 0.9137
val Loss: 0.1547 Acc: 0.9402

Epoch 9/10
----------
train Loss: 0.2055 Acc: 0.9156
val Loss: 0.1470 Acc: 0.9394

Epoch 10/10
----------
train Loss: 0.2055 Acc: 0.9141
val Loss: 0.1365 Acc: 0.9484

Training complete.
Test Accuracy: 0.9615


### Model Training and Validation Summary

This analysis covers the training and validation performance of a ResNet18 model fine-tuned to classify apples as "rotten" or "non-rotten" over 10 epochs.

#### Key Results
- **Initial Epoch**: The model starts with a train accuracy of 83.97% and validation accuracy of 86.24%. The close training and validation losses suggest effective generalization from the start.
- **Epochs 2-6**: Both training and validation accuracy steadily improve, reaching 90%+ as the model learns more meaningful features.
- **Final Epoch (10)**: The model achieves a train accuracy of 91.41% and validation accuracy of 94.84%, with stable, low losses, indicating minimal overfitting.

#### Test Set Evaluation
- **Test Accuracy**: 96.15%, confirming that the model generalizes well to unseen data.

#### Summary
The model demonstrated consistent learning with minimal overfitting and reached 96.15% accuracy on the test set. This high performance indicates readiness for deployment or further fine-tuning, achieving reliable classification of apple quality.