In [72]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt

In [73]:
# Set device (GPU if available, else CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [74]:
# Define transforms to normalize the data
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

In [75]:
# Load CIFAR-10 dataset and apply transforms
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)

# Split the training set into training and validation set
trainset, valset = torch.utils.data.random_split(trainset, [40000, 10000])

trainloader = torch.utils.data.DataLoader(trainset, batch_size=32,
                                          shuffle=True, num_workers=2)

valloader = torch.utils.data.DataLoader(valset, batch_size=32,
                                        shuffle=False, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=32,
                                         shuffle=False, num_workers=2)

Files already downloaded and verified
Files already downloaded and verified


In [76]:
# Define the network architecture
class Net(nn.Module):
    def __init__(self, hidden_size):
        super(Net, self).__init__()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(32 * 32 * 3, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, 10)

    def forward(self, x):
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

In [77]:
hidden_size = 2048  # Number of neurons in the hidden layer

In [78]:
# Create an instance of the network
net = Net(hidden_size).to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.003, momentum=0.9)

In [None]:
# Initialize lists to store training loss and errors
train_losses = []
train_errors = []
test_errors = []
val_accuracies = []

# Training the network
for epoch in range(100):  # Number of epochs
    running_loss = 0.0
    net.train()  # Enable training mode
    correct = 0
    total = 0

    for i, data in enumerate(trainloader, 0):
        inputs, labels = data[0].to(device), data[1].to(device)

        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    train_loss = running_loss / len(trainloader)
    train_accuracy = 100 * correct / total
    train_losses.append(train_loss)
    train_errors.append(100 - train_accuracy)

    # Evaluation on validation dataset
    net.eval()  # Enable evaluation mode
    val_correct = 0
    val_total = 0

    with torch.no_grad():
        for data in valloader:
            inputs, labels = data[0].to(device), data[1].to(device)
            outputs = net(inputs)
            _, predicted = torch.max(outputs.data, 1)
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()

    val_accuracy = 100 * val_correct / val_total
    val_accuracies.append(val_accuracy)

    # Evaluation on test dataset
    net.eval()  # Enable evaluation mode
    test_correct = 0
    test_total = 0

    with torch.no_grad():
        for data in testloader:
            inputs, labels = data[0].to(device), data[1].to(device)
            outputs = net(inputs)
            _, predicted = torch.max(outputs.data, 1)
            test_total += labels.size(0)
            test_correct += (predicted == labels).sum().item()

    test_accuracy = 100 * test_correct / test_total
    test_errors.append(100 - test_accuracy)

    print(f'Epoch: {epoch+1}, Training Loss: {train_loss:.4f}, Training Accuracy: {train_accuracy:.2f}%, Validation Accuracy: {val_accuracy:.2f}%, Test Accuracy: {test_accuracy:.2f}%')

print("Training finished!")

Epoch: 1, Training Loss: 1.6714, Training Accuracy: 41.38%, Validation Accuracy: 46.13%, Test Accuracy: 47.36%
Epoch: 2, Training Loss: 1.4445, Training Accuracy: 49.75%, Validation Accuracy: 46.64%, Test Accuracy: 47.64%
Epoch: 3, Training Loss: 1.3375, Training Accuracy: 53.70%, Validation Accuracy: 49.50%, Test Accuracy: 50.12%
Epoch: 4, Training Loss: 1.2523, Training Accuracy: 56.61%, Validation Accuracy: 51.19%, Test Accuracy: 51.13%
Epoch: 5, Training Loss: 1.1737, Training Accuracy: 59.42%, Validation Accuracy: 51.37%, Test Accuracy: 51.38%
Epoch: 6, Training Loss: 1.1060, Training Accuracy: 61.86%, Validation Accuracy: 52.50%, Test Accuracy: 52.75%
Epoch: 7, Training Loss: 1.0401, Training Accuracy: 64.43%, Validation Accuracy: 51.84%, Test Accuracy: 52.37%
Epoch: 8, Training Loss: 0.9788, Training Accuracy: 66.33%, Validation Accuracy: 51.54%, Test Accuracy: 51.94%
Epoch: 9, Training Loss: 0.9182, Training Accuracy: 68.52%, Validation Accuracy: 52.77%, Test Accuracy: 52.45%
E

In [None]:
# Evaluating the network on test dataset
net.eval()
test_correct = 0
test_total = 0

with torch.no_grad():
    for data in testloader:
        inputs, labels = data[0].to(device), data[1].to(device)
        outputs = net(inputs)
        _, predicted = torch.max(outputs.data, 1)
        test_total += labels.size(0)
        test_correct += (predicted == labels).sum().item()

test_accuracy = 100 * test_correct / test_total
print(f'Test Accuracy: {test_accuracy:.2f}%')

In [None]:
# Plotting the training loss as a function of the training epoch
plt.plot(range(1, len(train_losses)+1), train_losses)
plt.xlabel('Epoch')
plt.ylabel('Training Loss')
plt.title('Training Loss vs. Epoch')
plt.show()

# Plotting the training error and testing error as a function of the number of training data points
train_data_points = [int(len(trainset) * i / 10) for i in range(1, 101)]
plt.plot(train_data_points, train_errors, label='Training Error')
plt.plot(train_data_points, test_errors, label='Testing Error')
plt.xlabel('Number of Training Data Points')
plt.ylabel('Error')
plt.title('Training Error and Testing Error vs. Number of Training Data Points')
plt.legend()
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

# Define class names
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

# Evaluate the model on the test set and obtain predictions
net.eval()
all_predictions = []
all_labels = []

with torch.no_grad():
    for data in testloader:
        inputs, labels = data[0].to(device), data[1].to(device)
        outputs = net(inputs)
        _, predicted = torch.max(outputs.data, 1)
        all_predictions.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Compute the confusion matrix
conf_matrix = confusion_matrix(all_labels, all_predictions)

# Plot the confusion matrix as a heatmap with numbers in each cell
plt.figure(figsize=(10, 8))
plt.imshow(conf_matrix)
plt.colorbar()

num_classes = len(classes)
tick_marks = np.arange(num_classes)
plt.xticks(tick_marks, classes, rotation=45)
plt.yticks(tick_marks, classes)

thresh = conf_matrix.max() / 2.0
for i in range(num_classes):
    for j in range(num_classes):
        plt.text(j, i, str(conf_matrix[i, j]), ha="center", va="center",
                 color="black" if conf_matrix[i, j] > thresh else "white")

plt.xlabel("Predicted Labels")
plt.ylabel("True Labels")
plt.title("Confusion Matrix")
plt.tight_layout()
plt.show()
