Importing key libraries, and reading data

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

np.random.seed(322)

In [None]:
from tqdm import tqdm_notebook

In [None]:
df_train = pd.read_csv('../input/digit-recognizer/train.csv')
df_test = pd.read_csv('../input/digit-recognizer/test.csv')

In [None]:
df_train.head() # 784 features, 1 label

## Splitting into training and validation dataset

In [None]:
df_features = df_train.iloc[:, 1:785]
df_label = df_train.iloc[:, 0]

X_test = df_test.iloc[:, 0:784]

print(X_test.shape)
print(df_features.shape)
print(df_label.shape)

In [None]:
sample = df_features.sample(1)

In [None]:
plt.figure()
plt.imshow(sample.values.reshape(28,28), cmap='gray')
plt.show()


In [None]:
plt.figure()
plt.imshow(df_features.mean(0).values.reshape(28,28), cmap='gray')
plt.show()


In [None]:
plt.figure()
plt.imshow(df_features.std(0).values.reshape(28,28), cmap='gray')
plt.show()

In [None]:
plt.figure()
plt.imshow(df_features.max(0).values.reshape(28,28), cmap='gray')
plt.show()

In [None]:
# df_features
mean_img = []
for i in range(10):
    mean_img.append(df_features[df_label==i].mean(0))
    
fig, axs = plt.subplots(2, 5, figsize=(15,7))
fig.suptitle('Vertically stacked subplots')
for i in range(2):
    for j in range(5):
        
        item = mean_img[i*5+j]

        axs[i,j].imshow(item.values.reshape(28,28), cmap='bwr')
        axs[i,j].set_title('Цифра '+str(i*5+j))
plt.show()


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F


from torch.utils.data import Dataset, DataLoader

In [None]:
class MLP(nn.Module):
    
    def __init__(self, input_size):
        super().__init__()
        
        self.layer = nn.Sequential(
            
            nn.Linear(input_size, 10),
            
        )
        
    def forward(self, x):
        x = self.layer(x)
        return x

In [None]:
class MLPDataset(Dataset):
    
    def __init__(self, X, Y):
        super().__init__()
        self.X = X
        self.Y = Y
    
    def __len__(self):
        return self.X.shape[0]
    
    def __getitem__(self, idx):
        y = np.zeros(10)
        y[self.Y[idx]] = 1
        return self.X[idx], y 

In [None]:
_dataset = MLPDataset(df_features.values, df_label.values)
train_dataloader = DataLoader(dataset=_dataset, batch_size=128, shuffle=True)

In [None]:

mlp = MLP(input_size=784)

criterion = nn.BCEWithLogitsLoss()

op = torch.optim.Adam(
    
    lr=0.00001,
    params=mlp.parameters(),
    
    weight_decay=0.0001
)

In [None]:
mlp.train()

torch.set_grad_enabled(True)
if torch.cuda.is_available() :
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
mlp.to(device)

In [None]:
epoch_count = 15

mean_loss = []
for ep in range(epoch_count):
    
    for batch_id, (batch_x, batch_y) in enumerate(tqdm_notebook(train_dataloader)):
        
        batch_x = batch_x.to(device)
        batch_y = batch_y.to(device)
        
        y_pred = mlp(batch_x.float())
        
        
        loss = criterion(y_pred, batch_y.float())
        
        mean_loss.append(loss.detach().cpu().numpy())
        
        
        loss.backward()
        op.step()
          
        
#         if not batch_id % 50:
    print(ep, batch_id, np.mean(mean_loss))
    mean_loss = []

In [None]:
W, b = list(mlp.layer[0].parameters())
weight = W.detach().cpu().numpy()

In [None]:
fig, axs = plt.subplots(2, 5, figsize=(15,7))
fig.suptitle('Vertically stacked subplots')
for i in range(2):
    for j in range(5):
        
        item = weight[i*5+j]

        axs[i,j].imshow(item.reshape(28,28), cmap='bwr')
        axs[i,j].set_title('Цифра '+str(i*5+j))
plt.show()


In [None]:
class MLP2(nn.Module):
    def __init__(self, input_size):
        super().__init__()
        
        self.layer1 = nn.Sequential(
            
            nn.Linear(input_size, 32),
            nn.Tanh(),
            
        )
        
        self.layer2 = nn.Sequential(
            nn.Linear(32, 10),
        )
        
    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        return x

In [None]:
mlp2 = MLP2(input_size=784)
criterion = nn.BCEWithLogitsLoss()
op =  torch.optim.Adam(
    lr=0.00001,
    params=mlp.parameters(),
    weight_decay=0.0001
)

In [None]:
mlp2.train()
torch.set_grad_enabled(True)
if torch.cuda.is_available() :
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
mlp2.to(device)

In [None]:
epoch_count = 3

mean_loss = []
for ep in range(epoch_count):
    
    for batch_id, (batch_x, batch_y) in enumerate(tqdm_notebook(train_dataloader)):
        
        batch_x = batch_x.to(device)
        batch_y = batch_y.to(device)
        y_pred = mlp2(batch_x.float())
        loss = criterion(y_pred, batch_y.float())
        
        mean_loss.append(loss.detach().cpu().numpy())
        
        loss.backward()
        op.step()
        
    print(ep, batch_id, np.mean(mean_loss))
    mean_loss = []

In [None]:
W, b = list(mlp2.layer1[0].parameters())
weight = W.detach().cpu().numpy()
outlayer =  list(mlp2.layer2[0].parameters())[0].detach().cpu().numpy()

In [None]:
weight.shape

In [None]:
fig, axs = plt.subplots(4, 8, figsize=(15,7))
fig.suptitle('Vertically stacked subplots')
for i in range(4):
    for j in range(8):
        item = weight[i*8+j]
        axs[i,j].imshow(item.reshape(28,28), cmap='bwr')
plt.show()


In [None]:
plt.figure()
plt.imshow(outlayer, cmap='bwr')
plt.show()