In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_openml
from sklearn import metrics
from sklearn.svm import SVC
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
X, y = fetch_openml('mnist_784', version=1, return_X_y=True)

In [3]:
X = X /255.0

In [4]:
X_train, X_test = X[:60000], X[60000:]
y_train, y_test = y[:60000], y[60000:]

In [5]:
#svc model with kernel rbf, using regularization parameter of C=10.
model_svc_wr = SVC(C=10, gamma=0.001, kernel="rbf")

In [6]:
model_svc_wr.fit(X_train, y_train)

SVC(C=10, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=0.001, kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [7]:
y_pred_svc_wr = model_svc_wr.predict(X_test)

In [8]:
#svc model with kernel rbf, using regularization parameter of C=1000. Higher C values refers to no regularization, hence overfitting.
model_svc_wor = SVC(C=1000, gamma=0.001, kernel="rbf")

In [9]:
model_svc_wor.fit(X_train, y_train)

SVC(C=1000, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=0.001, kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [10]:
y_pred_svc_wor = model_svc_wor.predict(X_test)

In [11]:
print("SVC Model with regularization accuracy: ", metrics.accuracy_score(y_test, y_pred_svc_wr), "\n")
print("SVC Model without regularization accuracy:", metrics.accuracy_score(y_test, y_pred_svc_wor), "\n")

SVC Model with regularization accuracy:  0.9576 

SVC Model without regularization accuracy: 0.9739 



In [12]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets

In [13]:
train_dataset = dsets.MNIST(root='./data', train=True, transform=transforms.ToTensor(), download=True)
test_dataset = dsets.MNIST(root='./data', train=False, transform=transforms.ToTensor())

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [14]:
batch = 100
iters = 5000
epochs = int(iters / (len(train_dataset) / batch))

In [15]:
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch, shuffle=False)

In [16]:
class LRModel(nn.Module):
    def __init__(self, input_size, num_classes):
        super(LRModel, self).__init__()
        self.linear = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        out = self.linear(x)
        return out

In [17]:
input_dim = 28*28
output_dim = 10

model = LRModel(input_dim, output_dim)

In [18]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)

LRModel(
  (linear): Linear(in_features=784, out_features=10, bias=True)
)

In [23]:
criterion = nn.CrossEntropyLoss()

In [24]:
learning_rate = 0.001

In [25]:
#By applying no weight decay, I also not apply any regularization.
optimizer_wo_regu = torch.optim.SGD(model.parameters(), lr=learning_rate) 

In [26]:
iter = 0
for epoch in range(epochs):
    for i, (images, labels) in enumerate(train_loader):

        images = images.view(-1, 28*28).requires_grad_().to(device)
        labels = labels.to(device)

        optimizer_wo_regu.zero_grad()

        outputs = model(images)

        loss = criterion(outputs, labels)

        loss.backward()

        optimizer_wo_regu.step()

        iter = iter + 1

        if iter % 500 == 0:         
            correct = 0
            total = 0
            
            for images, labels in test_loader:
                
                images = images.view(-1, 28*28).to(device)

                outputs = model(images)

                _, predicted = torch.max(outputs.data, 1)

                total = total + labels.size(0)

                if torch.cuda.is_available():
                    correct = correct + (predicted.cpu() == labels.cpu()).sum()
                else:
                    correct = correct + (predicted == labels).sum()

            accuracy = 100 * correct.item() / total

            # Print Loss
            print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iter, loss.item(), accuracy))

Iteration: 500. Loss: 1.8557612895965576. Accuracy: 70.32
Iteration: 1000. Loss: 1.4893296957015991. Accuracy: 77.18
Iteration: 1500. Loss: 1.3238154649734497. Accuracy: 79.7
Iteration: 2000. Loss: 1.1642907857894897. Accuracy: 81.08
Iteration: 2500. Loss: 1.1737653017044067. Accuracy: 82.1
Iteration: 3000. Loss: 1.0573378801345825. Accuracy: 82.67
Iteration: 3500. Loss: 0.9121861457824707. Accuracy: 83.48
Iteration: 4000. Loss: 0.9003533124923706. Accuracy: 83.87
Iteration: 4500. Loss: 0.753142237663269. Accuracy: 84.34


In [27]:
#By applying a weight decay, I also apply regularization.
optimizer_w_regu = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay=1e-5) 

In [28]:
iter = 0
for epoch in range(epochs):
    for i, (images, labels) in enumerate(train_loader):

        images = images.view(-1, 28*28).requires_grad_().to(device)
        labels = labels.to(device)

        optimizer_w_regu.zero_grad()

        outputs = model(images)

        loss = criterion(outputs, labels)

        loss.backward()

        optimizer_w_regu.step()

        iter = iter + 1

        if iter % 500 == 0:         
            correct = 0
            total = 0
            
            for images, labels in test_loader:
                
                images = images.view(-1, 28*28).to(device)

                outputs = model(images)

                _, predicted = torch.max(outputs.data, 1)

                total = total + labels.size(0)

                if torch.cuda.is_available():
                    correct = correct + (predicted.cpu() == labels.cpu()).sum()
                else:
                    correct = correct + (predicted == labels).sum()

            accuracy = 100 * correct.item() / total

            # Print Loss
            print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iter, loss.item(), accuracy))

Iteration: 500. Loss: 0.6618723273277283. Accuracy: 84.96
Iteration: 1000. Loss: 0.7047918438911438. Accuracy: 85.33
Iteration: 1500. Loss: 0.6529390215873718. Accuracy: 85.6
Iteration: 2000. Loss: 0.6847403049468994. Accuracy: 85.94
Iteration: 2500. Loss: 0.6104814410209656. Accuracy: 86.17
Iteration: 3000. Loss: 0.633811891078949. Accuracy: 86.24
Iteration: 3500. Loss: 0.614646852016449. Accuracy: 86.4
Iteration: 4000. Loss: 0.6620500087738037. Accuracy: 86.62
Iteration: 4500. Loss: 0.5577638149261475. Accuracy: 86.7
