In [1]:
import os
import torch
import torchvision.transforms as transforms
from torchvision import datasets, models
from torch.utils.data import DataLoader
from PIL import Image
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [2]:
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

Mounted at /content/drive


In [13]:
# Define dataset directory
data_dir = '/content/drive/MyDrive/Dataset/DryFruit_Dataset'

In [4]:
from sklearn.model_selection import train_test_split

# Create a function to split the dataset
def split_dataset(data_dir):
    # List all classes (subfolders)
    classes = os.listdir(data_dir)

    # Create lists to hold file paths and labels
    file_paths = []
    labels = []

    for label, class_name in enumerate(classes):
        class_dir = os.path.join(data_dir, class_name)
        for img_file in os.listdir(class_dir):
            file_paths.append(os.path.join(class_dir, img_file))
            labels.append(label)

    # Split into train and temp sets (70% train, 30% temp)
    train_files, temp_files, train_labels, temp_labels = train_test_split(
        file_paths, labels, test_size=0.3, random_state=42)

    # Further split the temp set into validation and test sets (33.3% val, 66.7% test of temp -> 20% test, 10% val)
    val_files, test_files, val_labels, test_labels = train_test_split(
        temp_files, temp_labels, test_size=2/3, random_state=42)

    return (train_files, train_labels), (val_files, val_labels), (test_files, test_labels)

(train_data, train_labels), (val_data, val_labels), (test_data, test_labels) = split_dataset(data_dir)

In [5]:
# Define transformations for training and validation/test datasets
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Create custom datasets
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, file_paths, labels, transform=None):
        self.file_paths = file_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.file_paths)

    def __getitem__(self, idx):
        img_path = self.file_paths[idx]
        image = Image.open(img_path)
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label

# Create DataLoaders
train_dataset = CustomDataset(train_data, train_labels, transform=transform)
val_dataset = CustomDataset(val_data, val_labels, transform=transform)
test_dataset = CustomDataset(test_data, test_labels, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)
test_loader = DataLoader(test_dataset, batch_size=32)

In [12]:
class_names = {0: "Almond", 1: "Cashew", 2: "Fig", 3: "Raisin"}

In [11]:
# Load pre-trained SqueezeNet model
model = models.squeezenet1_1(pretrained=True)

# Get the number of input features for the last layer
num_input_features = model.classifier[1].in_channels

# Replace the final classifier layer
num_classes = len(class_names)  # Number of classes (4 for Almond, Cashew, Fig, Raisin)
model.classifier[1] = torch.nn.Linear(num_input_features, num_classes)  # Update output layer
# Define loss function and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

# Training loop
num_epochs = 10  # Set number of epochs

for epoch in range(num_epochs):
    model.train()
    for images, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

RuntimeError: mat1 and mat2 shapes cannot be multiplied (212992x13 and 512x4)

In [None]:
# Save the trained model
torch.save(model.state_dict(), 'squeezenet_dry_fruits.pth')
print("Model saved successfully.")

In [None]:
# # Validation section
# model.eval()  # Set the model to evaluation mode
# val_loss = 0.0
# correct = 0
# total = 0

# with torch.no_grad():
#     for images, labels in val_loader:
#         outputs = model(images)
#         loss = criterion(outputs, labels)
#         val_loss += loss.item()

#         _, predicted = torch.max(outputs.data, 1)
#         total += labels.size(0)
#         correct += (predicted == labels).sum().item()

# val_accuracy = correct / total
# print(f'Validation Loss: {val_loss/len(val_loader):.4f}, Validation Accuracy: {val_accuracy:.4f}')

In [None]:
for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0.0
    correct = 0
    total = 0

    for images, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    # Calculate average training loss
    train_loss = epoch_loss / len(train_loader)
    train_losses.append(train_loss)

    # Validation step
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in val_loader:
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_accuracy = correct / total
    val_losses.append(val_loss / len(val_loader))
    val_accuracies.append(val_accuracy)

    print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Validation Loss: {val_loss/len(val_loader):.4f}, Validation Accuracy: {val_accuracy:.4f}')

In [None]:
# Plotting Loss and Accuracy
import matplotlib.pyplot as plt

# Plot Loss
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(train_losses, label='Training Loss')
plt.plot(val_losses, label='Validation Loss')
plt.title('Loss over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

# Plot Accuracy
plt.subplot(1, 2, 2)
plt.plot(val_accuracies, label='Validation Accuracy')
plt.title('Validation Accuracy over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
# Test section
model.eval()  # Set the model to evaluation mode
test_loss = 0.0
correct = 0
total = 0

with torch.no_grad():
    for images, labels in test_loader:
        outputs = model(images)
        loss = criterion(outputs, labels)
        test_loss += loss.item()

        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

test_accuracy = correct / total
print(f'Test Loss: {test_loss/len(test_loader):.4f}, Test Accuracy: {test_accuracy:.4f}')