In [1]:
import matplotlib.pyplot as plt
import torch
from torchvision import datasets, transforms
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torch.optim as optim

  warn(


In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

Using device: cuda


In [3]:
transform = transforms.Compose([transforms.ToTensor()])
mnist_pytorch = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
train_loader_pytorch = torch.utils.data.DataLoader(mnist_pytorch, batch_size=1, shuffle=False)

In [4]:
# MNIST SIZE 28X28
class CNN5C3D(nn.Module):
	def __init__(self):
		super(CNN5C3D, self).__init__()
		# Feature Extraction Layers
		self.features = nn.Sequential( # Attribute for feature extraction part of NN
			nn.Conv2d(1, 96, kernel_size=11, stride=4, padding=0),
			nn.ReLU(inplace=False),
			nn.MaxPool2d(kernel_size=3, stride=2,padding=0),
			nn.LocalResponseNorm(size=5, alpha=0.0001, beta= 0.75, k=2.0), # self-tuned params

			nn.Conv2d(96,256, kernel_size=2, stride=2, padding=2),
			nn.ReLU(inplace=False),
			nn.MaxPool2d(kernel_size=3, stride=1,padding=0),
			nn.LocalResponseNorm(size=5, alpha=0.0001, beta= 0.75, k=2.0), # self-tuned params

			nn.Conv2d(256,384, kernel_size=3, stride=1, padding=1),
			nn.ReLU(inplace=False),

			nn.Conv2d(384,384, kernel_size=3, stride=1, padding=1),
			nn.ReLU(inplace=False),

			nn.Conv2d(384,256, kernel_size=3, stride=1, padding=1),
			nn.ReLU(inplace=False),
			nn.MaxPool2d(kernel_size=1, stride=2,padding=0),
		)

		self.classifier = nn.Sequential( # Attribute for classification on features
			nn.Linear(256*1*1, 4096),
			nn.ReLU(inplace=True),
			nn.Dropout(p=0.5, inplace=False),

			nn.Linear(4096, 4096),
			nn.ReLU(inplace=True),
			nn.Dropout(p=0.5, inplace=False),
			
			nn.Linear(4096, 1000),
			nn.Softmax(dim=1)
		)

	def forward(self, input):
		inputFeatures = self.features(input)
		# Flatten the tensor to match the expected input of classifier
		inputFeatures = inputFeatures.view(inputFeatures.size(0), -1)
		classification = self.classifier(inputFeatures)
		return classification

In [None]:
# MNIST SIZE 256X256
class CNN5C3D(nn.Module):
	def __init__(self):
		super(CNN5C3D, self).__init__()
		# Feature Extraction Layers
		self.features = nn.Sequential( # Attribute for feature extraction part of NN
			nn.Conv2d(1, 96, kernel_size=11, stride=4, padding=0),
			nn.ReLU(inplace=False),
			nn.MaxPool2d(kernel_size=3, stride=2,padding=0),
			nn.LocalResponseNorm(size=5, alpha=0.0001, beta= 0.75, k=2.0), # self-tuned params

			nn.Conv2d(96,256, kernel_size=2, stride=2, padding=2),
			nn.ReLU(inplace=False),
			nn.MaxPool2d(kernel_size=3, stride=1,padding=0),
			nn.LocalResponseNorm(size=5, alpha=0.0001, beta= 0.75, k=2.0), # self-tuned params

			nn.Conv2d(256,384, kernel_size=3, stride=1, padding=1),
			nn.ReLU(inplace=False),

			nn.Conv2d(384,384, kernel_size=3, stride=1, padding=1),
			nn.ReLU(inplace=False),

			nn.Conv2d(384,256, kernel_size=3, stride=1, padding=1),
			nn.ReLU(inplace=False),
			nn.MaxPool2d(kernel_size=1, stride=2,padding=0),
		)

		self.classifier = nn.Sequential( # Attribute for classification on features
			nn.Linear(256*1*1, 4096),
			nn.ReLU(inplace=True),
			nn.Dropout(p=0.5, inplace=False),

			nn.Linear(4096, 4096),
			nn.ReLU(inplace=True),
			nn.Dropout(p=0.5, inplace=False),
			
			nn.Linear(4096, 1000),
			nn.Softmax(dim=1)
		)

	def forward(self, input):
		inputFeatures = self.features(input)
		# Flatten the tensor to match the expected input of classifier
		inputFeatures = inputFeatures.view(inputFeatures.size(0), -1)
		classification = self.classifier(inputFeatures)
		return classification

In [11]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))  # Normalize with mean and std of MNIST
])

trainset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
trainloader = DataLoader(trainset, batch_size=80, shuffle=True, num_workers=2)

testset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)
testloader = DataLoader(testset, batch_size=80, shuffle=False, num_workers=2)

model = CNN5C3D().to(device) # Model to GPU
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=0.0005)

In [12]:
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass and optimize
        loss.backward()
        optimizer.step()

        # Print statistics
        running_loss += loss.item()
        if i % 100 == 99:  # Print every 100 mini-batches
            print(f'Epoch [{epoch + 1}/{num_epochs}], Step [{i + 1}/{len(trainloader)}], Loss: {running_loss / 100:.4f}')
            running_loss = 0.0

    # 4. Evaluate on the Test Set
    model.eval()  # Set the model to evaluation mode
    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f'Accuracy of the model on the 10,000 test images: {100 * correct / total:.2f}%')

print('Finished Training')

Epoch [1/10], Step [100/750], Loss: 6.9078
Epoch [1/10], Step [200/750], Loss: 6.9078
Epoch [1/10], Step [300/750], Loss: 6.9078
Epoch [1/10], Step [400/750], Loss: 6.9078
Epoch [1/10], Step [500/750], Loss: 6.9078
Epoch [1/10], Step [600/750], Loss: 6.9078
Epoch [1/10], Step [700/750], Loss: 6.9078
Accuracy of the model on the 10,000 test images: 0.00%
Epoch [2/10], Step [100/750], Loss: 6.9078
Epoch [2/10], Step [200/750], Loss: 6.9078
Epoch [2/10], Step [300/750], Loss: 6.9078
Epoch [2/10], Step [400/750], Loss: 6.9078
Epoch [2/10], Step [500/750], Loss: 6.9078
Epoch [2/10], Step [600/750], Loss: 6.9078
Epoch [2/10], Step [700/750], Loss: 6.9078
Accuracy of the model on the 10,000 test images: 0.00%
Epoch [3/10], Step [100/750], Loss: 6.9078
Epoch [3/10], Step [200/750], Loss: 6.9078
Epoch [3/10], Step [300/750], Loss: 6.9078
Epoch [3/10], Step [400/750], Loss: 6.9078
Epoch [3/10], Step [500/750], Loss: 6.9078
Epoch [3/10], Step [600/750], Loss: 6.9078
Epoch [3/10], Step [700/750], 