In [189]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import pandas as pd
from torch import utils as t_utils
from torch.utils.data.dataset import random_split
from torch.utils.data import Dataset, TensorDataset, DataLoader
import matplotlib.pyplot as plt
import numpy as np
# device = torch.device("cuda" if torch.cuda.is_available() else "mps")
device = torch.device("cuda")


In [190]:

train_data = pd.read_csv('./data/train.csv',dtype = np.float32)

train_data['label'] = train_data['label'].astype(int)
# test_data['label'] = test_data['label'].astype(int)

# convert to numpy
targets_numpy = train_data.label.values
features_numpy = train_data.loc[:,train_data.columns != "label"].values

# convert to tensor
targets_tensor = torch.from_numpy(targets_numpy).to(device).long()
features_tensor = torch.from_numpy(features_numpy).to(device).float()

# create dataset
dataset = TensorDataset(features_tensor, targets_tensor)
 
# split dataset
rows = train_data.shape[0]
test_size = int(rows*0.2)
train_size = rows - test_size
trainDataset, testDataset = random_split(dataset, [train_size, test_size])

# init loaders
batch_size = 100
trainLoader = DataLoader(trainDataset, batch_size=batch_size)
testLoader = DataLoader(testDataset, batch_size=batch_size)

In [191]:
# train_features, train_labels = next(iter(trainLoader))
# img = torch.reshape(train_features[0], (28, 28)) 
# label = train_labels[0]
# plt.imshow(img)
# plt.title(str(label))
# plt.show()
# print(f"Label: {label}")

In [192]:
class SimpleLinearNeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(28*28, 10)

    def forward(self, x):
        logits = self.linear(x)
        return logits
    
simpleLinearNeuralNetworkModel = SimpleLinearNeuralNetwork().to(device)
print(simpleLinearNeuralNetworkModel)

SimpleLinearNeuralNetwork(
  (linear): Linear(in_features=784, out_features=10, bias=True)
)


In [193]:
def train_loop(dataloader, model, criteria, optimizer):
    size = len(dataloader.dataset) 
    count = 0
    for batch, (X, y) in enumerate(dataloader): 
        pred = model(X)
        loss = criteria(pred, y)
        count += 1
        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step() 
        
        if batch % 128 == 0: 
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")


In [194]:
def test_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    correct = 0
    test_loss = 0 
    with torch.no_grad():
        for (X, y) in dataloader: 
            pred = model(X) 
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item() 
 
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [195]:
learning_rate = 1e-3
batch_size = 64
epochs = 5

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(simpleLinearNeuralNetworkModel.parameters(), lr=learning_rate)

for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(trainLoader, simpleLinearNeuralNetworkModel, loss_fn, optimizer)
    test_loop(testLoader, simpleLinearNeuralNetworkModel, loss_fn)

print("Done!")

Epoch 1
-------------------------------
loss: 46.534233  [  100/33600]
loss: 16.875134  [12900/33600]
loss: 14.239372  [25700/33600]
Test Error: 
 Accuracy: 85.7%, Avg loss: 15.401696 

Epoch 2
-------------------------------
loss: 14.260887  [  100/33600]
loss: 18.739660  [12900/33600]
loss: 12.885386  [25700/33600]
Test Error: 
 Accuracy: 88.6%, Avg loss: 10.887407 

Epoch 3
-------------------------------
loss: 8.468897  [  100/33600]
loss: 11.996378  [12900/33600]
loss: 18.307690  [25700/33600]
Test Error: 
 Accuracy: 89.0%, Avg loss: 11.336481 

Epoch 4
-------------------------------
loss: 8.071048  [  100/33600]
loss: 14.319527  [12900/33600]
loss: 16.721651  [25700/33600]
Test Error: 
 Accuracy: 85.1%, Avg loss: 15.394375 

Epoch 5
-------------------------------
loss: 8.380723  [  100/33600]
loss: 16.370310  [12900/33600]
loss: 13.278831  [25700/33600]
Test Error: 
 Accuracy: 88.4%, Avg loss: 10.655853 

Done!


In [196]:
test_loop(testLoader, simpleLinearNeuralNetworkModel, loss_fn)




Test Error: 
 Accuracy: 88.4%, Avg loss: 10.655853 



In [197]:

loaderIterator = iter(testLoader) 

In [198]:

def predict_and_show(model, loaderIterator):
    train_features, train_labels = next(loaderIterator)
    with torch.no_grad():
        img = torch.reshape(train_features[0], (28, 28))  
        pred = model(train_features)
        label = torch.argmax(pred[0])  
        plt.imshow(img)
        plt.title(f"predicted: {str(label)}, actual: {str(train_labels[0])}")
        plt.show() 




In [199]:
# predict_and_show(simpleLinearNeuralNetworkModel, loaderIterator) 
 

In [246]:
class ComplexLinearNeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__() 
        self.linear_relu_stack = nn.Sequential(
            # nn.Conv2d(kernel_size=3, stride=1, padding=0, in_channels=1, out_channels=16),
            # nn.ReLU(),
            # nn.MaxPool2d(kernel_size=2, stride=2),
            
            # nn.Linear(32*4*4, 10),

            nn.Linear(28*28, 150),
            nn.ReLU(), 
            nn.Linear(150, 150),
            nn.Tanh(),
            nn.Linear(150, 150),
            nn.ELU(),
            nn.Linear(150, 10) 
        ) 

    def forward(self, x):
        # print('forward1')
        # print(x.size())
        # self.linear.weight = self.linear.weight.long()
        # print(self.linear.weight.dtype)
        # print(self.linear.bias.dtype)
        # print(x.dtype)
        # x = self.flatten(x)
        # print('forward2')
        logits = self.linear_relu_stack(x)
        
        return logits
    
complexLinearNeuralNetwork = ComplexLinearNeuralNetwork().to(device)
print(complexLinearNeuralNetwork)

ComplexLinearNeuralNetwork(
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=150, bias=True)
    (1): ReLU()
    (2): Linear(in_features=150, out_features=150, bias=True)
    (3): GELU(approximate='none')
    (4): Linear(in_features=150, out_features=10, bias=True)
  )
)


In [247]:
from IPython.display import clear_output

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(complexLinearNeuralNetwork.parameters(), lr=0.01)
epochs = 30
for t in range(epochs):
    clear_output(wait=False)
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(trainLoader, complexLinearNeuralNetwork, loss_fn, optimizer)
     
    # test_loop(testLoader, complexLinearNeuralNetwork, loss_fn)

Epoch 30
-------------------------------
loss: 0.006665  [  100/33600]
loss: 0.053503  [12900/33600]
loss: 0.041024  [25700/33600]


In [248]:
test_loop(testLoader, complexLinearNeuralNetwork, loss_fn)

Test Error: 
 Accuracy: 93.9%, Avg loss: 0.280582 



In [205]:
print(len(trainLoader.dataset))
print(len(testLoader.dataset))

33600
8400
