In [11]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler,LabelEncoder
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset,DataLoader
import torch.optim as optim
import matplotlib.pyplot as plt

In [12]:
device = 'cuda'

In [13]:
df = pd.read_csv("fmnist_small.csv")

In [14]:
X = df.iloc[:,1:]
Y = df['label']

In [15]:
x_train, x_test, y_train, y_test = train_test_split(X,Y, test_size=0.2, random_state=42)

x_train = np.array(x_train)/255.0 # scaling
x_test = np.array(x_test)/255.0
y_train = np.array(y_train)
y_test = np.array(y_test)

In [16]:
class CustomDataset(Dataset):
    def __init__(self,features,labels):
        # convert row in 2d matrix
        self.features = torch.tensor(features,dtype=torch.float32,device=device).reshape(-1,1,28,28)
        self.labels = torch.tensor(labels,dtype=torch.long,device=device)
    def __len__(self):
        return self.features.shape[0]
    def __getitem__(self,index):
        return self.features[index] , self.labels[index]

train_dataset = CustomDataset(x_train,y_train)
train_loder = DataLoader(dataset=train_dataset,batch_size=32,shuffle=True)

test_dataset = CustomDataset(x_test,y_test)
test_loader = DataLoader(dataset=test_dataset,batch_size=32,shuffle=True)


In [17]:
class MySimpleNN(nn.Module):
    def __init__(self, num_features):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(in_channels=num_features,out_channels=32,kernel_size=3,padding='same',device=device),
            nn.ReLU(),
            nn.BatchNorm2d(32), # improves cnn
            nn.MaxPool2d(kernel_size=2,stride=2),
            nn.Conv2d(in_channels=32,out_channels=64,kernel_size=3,padding='same',device=device),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(kernel_size=2,stride=2)
        )   
        
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64*7*7,128,device=device),
            nn.BatchNorm1d(128), # after layer before activation
            nn.ReLU(),
            nn.Dropout(p=0.3),
            nn.Linear(128,64,device=device),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Dropout(p=0.3), # afert RELU
            nn.Linear(64,10,device=device) # soft max activation is internaly applied by defalut    
        )
    def forward(self, X):
        X = self.features(X)
        return self.classifier(X)

In [21]:
lr = 0.1
epoches = 100

In [22]:
model = MySimpleNN(1)

model = model.to(device) # directly conver computation to GPU no need to make all tensors to GPU

loss_function = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(),lr = lr,weight_decay=1e-4)

In [23]:
for epoche in range(epoches):
    epoch_avg_loss = 0
    batch_size = 0
    for batch_features , batch_labels in train_loder:
        
        batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)
        
        y_pred = model(batch_features)
        
        loss = loss_function(y_pred,batch_labels.long())
        
        epoch_avg_loss += loss
        
        batch_size +=1
        
        optimizer.zero_grad()
        
        loss.backward()
        
        optimizer.step()
    print('Epoch: ', epoche+1," ---> ","loss :",epoch_avg_loss/batch_size)

Epoch:  1  --->  loss : tensor(0.8352, device='cuda:0', grad_fn=<DivBackward0>)
Epoch:  2  --->  loss : tensor(0.5129, device='cuda:0', grad_fn=<DivBackward0>)
Epoch:  3  --->  loss : tensor(0.4220, device='cuda:0', grad_fn=<DivBackward0>)
Epoch:  4  --->  loss : tensor(0.3619, device='cuda:0', grad_fn=<DivBackward0>)
Epoch:  5  --->  loss : tensor(0.3191, device='cuda:0', grad_fn=<DivBackward0>)
Epoch:  6  --->  loss : tensor(0.2745, device='cuda:0', grad_fn=<DivBackward0>)
Epoch:  7  --->  loss : tensor(0.2562, device='cuda:0', grad_fn=<DivBackward0>)
Epoch:  8  --->  loss : tensor(0.2196, device='cuda:0', grad_fn=<DivBackward0>)
Epoch:  9  --->  loss : tensor(0.2027, device='cuda:0', grad_fn=<DivBackward0>)
Epoch:  10  --->  loss : tensor(0.1681, device='cuda:0', grad_fn=<DivBackward0>)
Epoch:  11  --->  loss : tensor(0.1558, device='cuda:0', grad_fn=<DivBackward0>)
Epoch:  12  --->  loss : tensor(0.1326, device='cuda:0', grad_fn=<DivBackward0>)
Epoch:  13  --->  loss : tensor(0.111

In [28]:
# on test data
model.eval()
total = 0
correct = 0
with torch.no_grad():

    for batch_features, batch_labels in test_loader:

      outputs = model(batch_features) # ouput is 32X10 matrix  give for each image  prob that each will be

      _, predicted = torch.max(outputs, 1) # max prob label is extracted

      total = total + batch_labels.shape[0]

      correct = correct + (predicted == batch_labels).sum().item()

print(correct/total)

0.8766666666666667
