In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
from torch.utils.data import DataLoader
from torchvision import transforms, datasets
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score
import matplotlib.pyplot as plt
import numpy as np

# Define the transforms to be applied to the data
# Here, assume that the images are grayscale and need to be resized to 32x32
transform = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])



In [9]:
# Load the custom dataset
# Here, assume that the dataset is stored in the following directories:
# - train: contains training images
# - test: contains testing images
train_data = datasets.ImageFolder(root="E:/Project_A/project_share/Project_A_image_full/train", transform=transform)
test_data = datasets.ImageFolder(root="E:/Project_A/project_share/Project_A_image_full/class", transform=transform)
train_size = int (0.875*len(train_data))
train_dataset = data.Subset(train_data,range(train_size))
# Create the data loaders
batch_size = 32
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)



In [10]:
# Define the LeNet5 model architecture
class LeNet5(nn.Module):
    def __init__(self):
        super(LeNet5, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, kernel_size=5)
        # here we have 6 feature maps size 28x28
        self.avgpool1 = nn.AvgPool2d(kernel_size=2)
        # here we have 6 feature maps size 14x14
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5)
        # here we have 16 feature maps size 10x10
        self.avgpool2 = nn.AvgPool2d(kernel_size=2)
        # here  we have 16 feature maps size 5x5
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 5)

    def forward(self, x):
        x = self.conv1(x)
        x = torch.relu(x)
        x = self.avgpool1(x)
        x = self.conv2(x)
        x = torch.relu(x)
        x = self.avgpool2(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = torch.relu(x)
         # Apply Inception module
        inception_output = self.inception(x)

        x = torch.relu(inception_output)
        x = torch.flatten(x, 1)
        x = self.fc2(x)
        x = torch.relu(x)
        x = self.fc3(x)
        return x
# Create the model instance and set the device to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = LeNet5().to(device)

# Define the loss function, optimizer, and learning rate
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [11]:
# Train the model
epochs = 6
for epoch in range(epochs):
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        if i % 100 == 99:
            print(f"Epoch: {epoch + 1}, Batch: {i + 1}, Loss: {running_loss / 100:.3f}")
            running_loss = 0.0


RuntimeError: Expected 3D (unbatched) or 4D (batched) input to conv2d, but got input of size: [32, 120]

In [None]:
# Evaluate the model
model.eval()

# Create empty lists to store the predicted labels and ground truth labels
pred_labels = []
true_labels = []
correct = 0
total = 0
with torch.no_grad():
    for data in test_loader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        pred_labels += predicted.tolist()
        true_labels += labels.tolist()

print('Accuracy of the model on the test images: {:.2f}%'.format(100 * correct / total))

In [None]:

# calculate the confusion matrix using scikit-learn
cm = confusion_matrix(true_labels, pred_labels)
f1 = f1_score(true_labels, pred_labels, average='macro')
print(f"f1 = {f1}")

# Define class labels and tick marks
class_names = ['Autoriskshwa', 'Bicycle','Fullsize_car','Midsize_car','Truck']
tick_marks = np.arange(len(class_names))

# Plot confusion matrix
plt.imshow(cm, cmap=plt.cm.Blues)
plt.title('Confusion matrix')
plt.xticks(tick_marks, class_names, rotation=45)
plt.yticks(tick_marks, class_names)
plt.colorbar()

# Add labels to the plot
thresh = cm.max() / 2.
for i, j in np.ndindex(cm.shape):
    plt.text(j, i, format(cm[i, j], 'd'),
             ha="center", va="center",
             color="white" if cm[i, j] > thresh else "black")

# Add axis labels
plt.xlabel('Predicted label')
plt.ylabel('True label')

# Display the plot
plt.show()

In [None]:
# Calculate the percentage of predictions for each class
cm_perc = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] * 100

# Define class labels and tick marks
class_names = ['Autoriskshwa', 'Bicycle','Fullsize_car','Midsize_car','Truck']
tick_marks = np.arange(len(class_names))

# Plot confusion matrix
plt.imshow(cm_perc, cmap=plt.cm.Blues)
plt.title('Confusion matrix')
plt.xticks(tick_marks, class_names, rotation=45)
plt.yticks(tick_marks, class_names)
plt.colorbar()

# Add labels to the plot
thresh = cm_perc.max() / 2.
for i, j in np.ndindex(cm_perc.shape):
    plt.text(j, i, format(cm_perc[i, j], '.2f') + '%',
             ha="center", va="center",
             color="black" )

# Add axis labels
plt.xlabel('Predicted label')
plt.ylabel('True label')

# Display the plot
plt.show()