In [2]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, f1_score
from sklearn.preprocessing import MinMaxScaler, StandardScaler

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

import gc
import time

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
class ANN(nn.Module):
    def __init__(
        self,
        in_dim: int,
        hidden_dim_1: int,
        hidden_dim_2: int,
        hidden_dim_3: int,
        n_classes:int = 10,
        dropout: float = 0.3
    ):
        super().__init__()
        
        self.layer1 = nn.Sequential(
            nn.Linear(in_features=in_dim, out_features=hidden_dim_1),
            nn.ReLU(),
            nn.BatchNorm1d(hidden_dim_1),
            nn.Dropout(dropout),
        )
        self.layer2 = nn.Sequential(
            nn.Linear(in_features=hidden_dim_1, out_features=hidden_dim_2),
            nn.ReLU(),
            nn.BatchNorm1d(hidden_dim_2),
            nn.Dropout(dropout),
        )
        self.layer3 = nn.Sequential(
            nn.Linear(in_features=hidden_dim_2, out_features=hidden_dim_3),
            nn.ReLU(),
            nn.BatchNorm1d(hidden_dim_3),
            nn.Dropout(dropout),
        )
        self.output_layer = nn.Linear(in_features=hidden_dim_3, out_features=n_classes)
        
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """
            Args:
                x (torch.Tensor): (batch_size, in_dim) the input
            
            Output:
                (torch.Tensor): (batch_size, n_classes) the output
        """
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.output_layer(x)
        
        return x

In [4]:
class MNIST(Dataset):
    def __init__(
        self,
        data,
    ):
        self.data = data
    # def _build(self):
        # scaler = MinMaxScaler(feature_range=())
        # scaler = StandardScaler()
        
    def __getitem__(self, index) -> (torch.Tensor, torch.Tensor):
        return torch.tensor(self.data.iloc[index, 1:], dtype=torch.float32).to(device), torch.tensor(self.data.iloc[index, 0]).to(device)
    
    def __len__(self):
        return self.data.shape[0]

In [6]:
train = pd.read_csv('/kaggle/input/mnist-in-csv/mnist_train.csv')
test = pd.read_csv('/kaggle/input/mnist-in-csv/mnist_test.csv')

In [7]:
scaler = StandardScaler()
train.iloc[:, 1:] = scaler.fit_transform(X=train.iloc[:, 1:])
test.iloc[:, 1:] = scaler.transform(X=test.iloc[:, 1:])

  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):


In [8]:
train_dataset = MNIST(data=train)
test_dataset = MNIST(data=test)

In [9]:
train_batchsize = 512
val_batchsize = 512

In [10]:
train_dataloader = DataLoader(dataset=train_dataset, batch_size=train_batchsize, shuffle=True)
test_dataloader = DataLoader(dataset=test_dataset, batch_size=val_batchsize, shuffle=True)

In [11]:
model = ANN(
    in_dim=784,
    hidden_dim_1=784//2,
    hidden_dim_2=784//4,
    hidden_dim_3=784//8
).to(device)

In [12]:
n_epochs = 20

In [13]:
lr = 1e-3
optimiser = torch.optim.Adam(model.parameters(), lr=lr)

loss_fn = torch.nn.CrossEntropyLoss()

In [14]:
def train_epoch(
    model,
    dataloader,
    optimiser
):
    model.train()
    
    for batch in tqdm(dataloader):
        x, y = batch[0], batch[1]
        
        output = model(x)
        output = nn.Softmax(dim=-1)(output)
        loss = loss_fn(output, y)
        
        optimiser.zero_grad()
        loss.backward()
        optimiser.step()
        
        if sanity_check:
            break
        
def validate(
    model,
    dataloader
):
    model.eval()
    total_loss = 0
    predictions = []
    truths = []
    
    with torch.no_grad():
        for batch in tqdm(dataloader):
            x, y = batch[0], batch[1]
            
            output = model(x)
            output = nn.Softmax(dim=-1)(output)
            loss = loss_fn(output, y)
            total_loss += loss.detach().cpu().item()/len(dataloader)
            
            preds = torch.argmax(output, dim=-1)
            predictions.extend(preds.cpu())
            truths.extend(y.cpu())
            
            if sanity_check:
                break
        
    acc = accuracy_score(y_true=truths, y_pred=predictions)
    f1 = f1_score(y_true=truths, y_pred=predictions, average='macro')
    
    return total_loss, acc, f1

In [15]:
def train_model(
    model,
    train_dataloader,
    test_dataloader,
    optimiser,
):
    for epoch in range(1, n_epochs+1):
        start_time = time.time()
        
        print(f"========= EPOCH {epoch} STARTED =========")
        train_epoch(model=model, dataloader=train_dataloader, optimiser=optimiser)
        
        print(f"========= TRAIN EVALUATION STARTED =========")
        train_val_op = validate(model=model, dataloader=train_dataloader)
        
        print(f"========= TEST EVALUATION STARTED =========")
        test_val_op = validate(model=model, dataloader=test_dataloader)
        
        print(f"END OF {epoch} EPOCH")
        print(f"| Time taken: {time.time() - start_time: 7.3f} |")
        print(f"| Train Loss: {train_val_op[0]: 7.3f} | Train acc: {train_val_op[1]: 1.5f} | Train f1: {train_val_op[2]: 1.5f} |")
        print(f"| Test Loss: {test_val_op[0]: 7.3f}  | Test acc: {test_val_op[1]: 1.5f}  | Test f1: {test_val_op[2]: 1.5f}  |")
        
        if sanity_check:
            break
        

In [16]:
sanity_check=False

In [17]:
train_model(
    model=model,
    train_dataloader=train_dataloader,
    test_dataloader=test_dataloader,
    optimiser=optimiser,
)



  return torch.tensor(self.data.iloc[index, 1:], dtype=torch.float32).to(device), torch.tensor(self.data.iloc[index, 0]).to(device)
100%|██████████| 118/118 [02:14<00:00,  1.14s/it]




100%|██████████| 118/118 [02:11<00:00,  1.12s/it]




  return torch.tensor(self.data.iloc[index, 1:], dtype=torch.float32).to(device), torch.tensor(self.data.iloc[index, 0]).to(device)
100%|██████████| 20/20 [00:22<00:00,  1.11s/it]


END OF 1 EPOCH
| Time taken:  296.573 |
| Train Loss:   1.516 | Train acc:  0.95195 | Train f1:  0.95152 |
| Test Loss:   1.521  | Test acc:  0.94840  | Test f1:  0.94781  |


  return torch.tensor(self.data.iloc[index, 1:], dtype=torch.float32).to(device), torch.tensor(self.data.iloc[index, 0]).to(device)
100%|██████████| 118/118 [02:12<00:00,  1.12s/it]




100%|██████████| 118/118 [02:11<00:00,  1.12s/it]




  return torch.tensor(self.data.iloc[index, 1:], dtype=torch.float32).to(device), torch.tensor(self.data.iloc[index, 0]).to(device)
100%|██████████| 20/20 [00:21<00:00,  1.10s/it]


END OF 2 EPOCH
| Time taken:  293.952 |
| Train Loss:   1.496 | Train acc:  0.96782 | Train f1:  0.96759 |
| Test Loss:   1.504  | Test acc:  0.96020  | Test f1:  0.95992  |


  return torch.tensor(self.data.iloc[index, 1:], dtype=torch.float32).to(device), torch.tensor(self.data.iloc[index, 0]).to(device)
100%|██████████| 118/118 [02:11<00:00,  1.12s/it]




100%|██████████| 118/118 [02:12<00:00,  1.12s/it]




  return torch.tensor(self.data.iloc[index, 1:], dtype=torch.float32).to(device), torch.tensor(self.data.iloc[index, 0]).to(device)
100%|██████████| 20/20 [00:21<00:00,  1.09s/it]


END OF 3 EPOCH
| Time taken:  293.738 |
| Train Loss:   1.491 | Train acc:  0.97267 | Train f1:  0.97248 |
| Test Loss:   1.500  | Test acc:  0.96310  | Test f1:  0.96277  |


  return torch.tensor(self.data.iloc[index, 1:], dtype=torch.float32).to(device), torch.tensor(self.data.iloc[index, 0]).to(device)
100%|██████████| 118/118 [02:11<00:00,  1.12s/it]




100%|██████████| 118/118 [02:12<00:00,  1.12s/it]




  return torch.tensor(self.data.iloc[index, 1:], dtype=torch.float32).to(device), torch.tensor(self.data.iloc[index, 0]).to(device)
100%|██████████| 20/20 [00:22<00:00,  1.11s/it]


END OF 4 EPOCH
| Time taken:  294.105 |
| Train Loss:   1.485 | Train acc:  0.97827 | Train f1:  0.97815 |
| Test Loss:   1.493  | Test acc:  0.96980  | Test f1:  0.96964  |


  return torch.tensor(self.data.iloc[index, 1:], dtype=torch.float32).to(device), torch.tensor(self.data.iloc[index, 0]).to(device)
100%|██████████| 118/118 [02:13<00:00,  1.13s/it]




100%|██████████| 118/118 [02:12<00:00,  1.12s/it]




  return torch.tensor(self.data.iloc[index, 1:], dtype=torch.float32).to(device), torch.tensor(self.data.iloc[index, 0]).to(device)
100%|██████████| 20/20 [00:22<00:00,  1.11s/it]


END OF 5 EPOCH
| Time taken:  295.684 |
| Train Loss:   1.481 | Train acc:  0.98113 | Train f1:  0.98102 |
| Test Loss:   1.491  | Test acc:  0.97070  | Test f1:  0.97037  |


  return torch.tensor(self.data.iloc[index, 1:], dtype=torch.float32).to(device), torch.tensor(self.data.iloc[index, 0]).to(device)
100%|██████████| 118/118 [02:12<00:00,  1.12s/it]




100%|██████████| 118/118 [02:12<00:00,  1.12s/it]




  return torch.tensor(self.data.iloc[index, 1:], dtype=torch.float32).to(device), torch.tensor(self.data.iloc[index, 0]).to(device)
100%|██████████| 20/20 [00:22<00:00,  1.10s/it]


END OF 6 EPOCH
| Time taken:  294.187 |
| Train Loss:   1.480 | Train acc:  0.98185 | Train f1:  0.98178 |
| Test Loss:   1.490  | Test acc:  0.97230  | Test f1:  0.97208  |


  return torch.tensor(self.data.iloc[index, 1:], dtype=torch.float32).to(device), torch.tensor(self.data.iloc[index, 0]).to(device)
100%|██████████| 118/118 [02:11<00:00,  1.12s/it]




100%|██████████| 118/118 [02:12<00:00,  1.12s/it]




  return torch.tensor(self.data.iloc[index, 1:], dtype=torch.float32).to(device), torch.tensor(self.data.iloc[index, 0]).to(device)
100%|██████████| 20/20 [00:21<00:00,  1.10s/it]


END OF 7 EPOCH
| Time taken:  293.921 |
| Train Loss:   1.479 | Train acc:  0.98340 | Train f1:  0.98332 |
| Test Loss:   1.490  | Test acc:  0.97240  | Test f1:  0.97212  |


  return torch.tensor(self.data.iloc[index, 1:], dtype=torch.float32).to(device), torch.tensor(self.data.iloc[index, 0]).to(device)
100%|██████████| 118/118 [02:12<00:00,  1.12s/it]




100%|██████████| 118/118 [02:12<00:00,  1.12s/it]




  return torch.tensor(self.data.iloc[index, 1:], dtype=torch.float32).to(device), torch.tensor(self.data.iloc[index, 0]).to(device)
100%|██████████| 20/20 [00:22<00:00,  1.10s/it]


END OF 8 EPOCH
| Time taken:  294.584 |
| Train Loss:   1.477 | Train acc:  0.98483 | Train f1:  0.98479 |
| Test Loss:   1.489  | Test acc:  0.97300  | Test f1:  0.97278  |


  return torch.tensor(self.data.iloc[index, 1:], dtype=torch.float32).to(device), torch.tensor(self.data.iloc[index, 0]).to(device)
100%|██████████| 118/118 [02:11<00:00,  1.12s/it]




100%|██████████| 118/118 [02:12<00:00,  1.12s/it]




  return torch.tensor(self.data.iloc[index, 1:], dtype=torch.float32).to(device), torch.tensor(self.data.iloc[index, 0]).to(device)
100%|██████████| 20/20 [00:22<00:00,  1.10s/it]


END OF 9 EPOCH
| Time taken:  294.116 |
| Train Loss:   1.475 | Train acc:  0.98635 | Train f1:  0.98629 |
| Test Loss:   1.487  | Test acc:  0.97470  | Test f1:  0.97449  |


  return torch.tensor(self.data.iloc[index, 1:], dtype=torch.float32).to(device), torch.tensor(self.data.iloc[index, 0]).to(device)
100%|██████████| 118/118 [02:12<00:00,  1.12s/it]




100%|██████████| 118/118 [02:12<00:00,  1.12s/it]




  return torch.tensor(self.data.iloc[index, 1:], dtype=torch.float32).to(device), torch.tensor(self.data.iloc[index, 0]).to(device)
100%|██████████| 20/20 [00:22<00:00,  1.10s/it]


END OF 10 EPOCH
| Time taken:  294.842 |
| Train Loss:   1.475 | Train acc:  0.98713 | Train f1:  0.98711 |
| Test Loss:   1.487  | Test acc:  0.97460  | Test f1:  0.97441  |


  return torch.tensor(self.data.iloc[index, 1:], dtype=torch.float32).to(device), torch.tensor(self.data.iloc[index, 0]).to(device)
100%|██████████| 118/118 [02:12<00:00,  1.12s/it]




100%|██████████| 118/118 [02:11<00:00,  1.12s/it]




  return torch.tensor(self.data.iloc[index, 1:], dtype=torch.float32).to(device), torch.tensor(self.data.iloc[index, 0]).to(device)
100%|██████████| 20/20 [00:22<00:00,  1.11s/it]


END OF 11 EPOCH
| Time taken:  294.011 |
| Train Loss:   1.473 | Train acc:  0.98828 | Train f1:  0.98822 |
| Test Loss:   1.488  | Test acc:  0.97330  | Test f1:  0.97298  |


  return torch.tensor(self.data.iloc[index, 1:], dtype=torch.float32).to(device), torch.tensor(self.data.iloc[index, 0]).to(device)
100%|██████████| 118/118 [02:12<00:00,  1.12s/it]




100%|██████████| 118/118 [02:11<00:00,  1.12s/it]




  return torch.tensor(self.data.iloc[index, 1:], dtype=torch.float32).to(device), torch.tensor(self.data.iloc[index, 0]).to(device)
100%|██████████| 20/20 [00:21<00:00,  1.10s/it]


END OF 12 EPOCH
| Time taken:  293.517 |
| Train Loss:   1.473 | Train acc:  0.98848 | Train f1:  0.98841 |
| Test Loss:   1.487  | Test acc:  0.97420  | Test f1:  0.97402  |


  return torch.tensor(self.data.iloc[index, 1:], dtype=torch.float32).to(device), torch.tensor(self.data.iloc[index, 0]).to(device)
100%|██████████| 118/118 [02:12<00:00,  1.12s/it]




100%|██████████| 118/118 [02:11<00:00,  1.12s/it]




  return torch.tensor(self.data.iloc[index, 1:], dtype=torch.float32).to(device), torch.tensor(self.data.iloc[index, 0]).to(device)
100%|██████████| 20/20 [00:21<00:00,  1.10s/it]


END OF 13 EPOCH
| Time taken:  293.769 |
| Train Loss:   1.472 | Train acc:  0.98993 | Train f1:  0.98989 |
| Test Loss:   1.485  | Test acc:  0.97640  | Test f1:  0.97619  |


  return torch.tensor(self.data.iloc[index, 1:], dtype=torch.float32).to(device), torch.tensor(self.data.iloc[index, 0]).to(device)
100%|██████████| 118/118 [02:14<00:00,  1.14s/it]




100%|██████████| 118/118 [02:11<00:00,  1.12s/it]




  return torch.tensor(self.data.iloc[index, 1:], dtype=torch.float32).to(device), torch.tensor(self.data.iloc[index, 0]).to(device)
100%|██████████| 20/20 [00:22<00:00,  1.10s/it]


END OF 14 EPOCH
| Time taken:  296.116 |
| Train Loss:   1.471 | Train acc:  0.99057 | Train f1:  0.99052 |
| Test Loss:   1.485  | Test acc:  0.97660  | Test f1:  0.97641  |


  return torch.tensor(self.data.iloc[index, 1:], dtype=torch.float32).to(device), torch.tensor(self.data.iloc[index, 0]).to(device)
100%|██████████| 118/118 [02:11<00:00,  1.11s/it]




100%|██████████| 118/118 [02:11<00:00,  1.12s/it]




  return torch.tensor(self.data.iloc[index, 1:], dtype=torch.float32).to(device), torch.tensor(self.data.iloc[index, 0]).to(device)
100%|██████████| 20/20 [00:21<00:00,  1.10s/it]


END OF 15 EPOCH
| Time taken:  293.383 |
| Train Loss:   1.471 | Train acc:  0.99107 | Train f1:  0.99102 |
| Test Loss:   1.487  | Test acc:  0.97420  | Test f1:  0.97396  |


  return torch.tensor(self.data.iloc[index, 1:], dtype=torch.float32).to(device), torch.tensor(self.data.iloc[index, 0]).to(device)
100%|██████████| 118/118 [02:12<00:00,  1.12s/it]




100%|██████████| 118/118 [02:11<00:00,  1.11s/it]




  return torch.tensor(self.data.iloc[index, 1:], dtype=torch.float32).to(device), torch.tensor(self.data.iloc[index, 0]).to(device)
100%|██████████| 20/20 [00:22<00:00,  1.11s/it]


END OF 16 EPOCH
| Time taken:  293.618 |
| Train Loss:   1.470 | Train acc:  0.99125 | Train f1:  0.99122 |
| Test Loss:   1.485  | Test acc:  0.97630  | Test f1:  0.97606  |


  return torch.tensor(self.data.iloc[index, 1:], dtype=torch.float32).to(device), torch.tensor(self.data.iloc[index, 0]).to(device)
100%|██████████| 118/118 [02:11<00:00,  1.11s/it]




100%|██████████| 118/118 [02:11<00:00,  1.11s/it]




  return torch.tensor(self.data.iloc[index, 1:], dtype=torch.float32).to(device), torch.tensor(self.data.iloc[index, 0]).to(device)
100%|██████████| 20/20 [00:21<00:00,  1.09s/it]


END OF 17 EPOCH
| Time taken:  292.984 |
| Train Loss:   1.470 | Train acc:  0.99178 | Train f1:  0.99175 |
| Test Loss:   1.484  | Test acc:  0.97800  | Test f1:  0.97778  |


  return torch.tensor(self.data.iloc[index, 1:], dtype=torch.float32).to(device), torch.tensor(self.data.iloc[index, 0]).to(device)
100%|██████████| 118/118 [02:12<00:00,  1.12s/it]




100%|██████████| 118/118 [02:11<00:00,  1.11s/it]




  return torch.tensor(self.data.iloc[index, 1:], dtype=torch.float32).to(device), torch.tensor(self.data.iloc[index, 0]).to(device)
100%|██████████| 20/20 [00:22<00:00,  1.10s/it]


END OF 18 EPOCH
| Time taken:  293.467 |
| Train Loss:   1.470 | Train acc:  0.99203 | Train f1:  0.99202 |
| Test Loss:   1.484  | Test acc:  0.97740  | Test f1:  0.97726  |


  return torch.tensor(self.data.iloc[index, 1:], dtype=torch.float32).to(device), torch.tensor(self.data.iloc[index, 0]).to(device)
100%|██████████| 118/118 [02:10<00:00,  1.11s/it]




100%|██████████| 118/118 [02:12<00:00,  1.12s/it]




  return torch.tensor(self.data.iloc[index, 1:], dtype=torch.float32).to(device), torch.tensor(self.data.iloc[index, 0]).to(device)
100%|██████████| 20/20 [00:22<00:00,  1.10s/it]


END OF 19 EPOCH
| Time taken:  292.649 |
| Train Loss:   1.469 | Train acc:  0.99212 | Train f1:  0.99208 |
| Test Loss:   1.485  | Test acc:  0.97700  | Test f1:  0.97678  |


  return torch.tensor(self.data.iloc[index, 1:], dtype=torch.float32).to(device), torch.tensor(self.data.iloc[index, 0]).to(device)
100%|██████████| 118/118 [02:11<00:00,  1.11s/it]




100%|██████████| 118/118 [02:11<00:00,  1.12s/it]




  return torch.tensor(self.data.iloc[index, 1:], dtype=torch.float32).to(device), torch.tensor(self.data.iloc[index, 0]).to(device)
100%|██████████| 20/20 [00:21<00:00,  1.09s/it]


END OF 20 EPOCH
| Time taken:  292.908 |
| Train Loss:   1.469 | Train acc:  0.99247 | Train f1:  0.99247 |
| Test Loss:   1.484  | Test acc:  0.97730  | Test f1:  0.97716  |
