In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

dataset = pd.read_csv('train.csv')
dataset

Unnamed: 0,pix1,pix2,pix3,pix4,pix5,pix6,pix7,pix8,pix9,pix10,...,pix1016,pix1017,pix1018,pix1019,pix1020,pix1021,pix1022,pix1023,pix1024,label
0,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,7
1,4,4,4,4,4,4,5,5,5,6,...,4,4,4,4,4,4,4,4,4,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,6
3,2,2,2,2,2,2,2,2,2,2,...,2,2,2,2,2,2,2,2,2,3
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
55995,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,7
55996,5,5,5,5,5,5,5,5,5,5,...,5,5,5,5,5,5,5,5,5,9
55997,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2
55998,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,3


In [2]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(dataset.drop(columns=['label']), dataset.label, test_size=0.2, random_state=42)
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.25, random_state=42) 

In [3]:
import torch
import torch.utils.data as data_utils
train_x = torch.tensor(x_train.values.reshape((-1, 1, 32, 32)).astype(np.float32))
train_y = torch.tensor(y_train.values.astype(np.int64))
train_tensor = data_utils.TensorDataset(train_x, train_y) 
trainloader = data_utils.DataLoader(dataset = train_tensor, batch_size = 32)

val_y = torch.tensor(y_val.values.astype(np.int64))
val_x = torch.tensor(x_val.values.reshape((-1, 1, 32, 32)).astype(np.float32))
val_tensor = data_utils.TensorDataset(val_x, val_y) 
validloader = data_utils.DataLoader(dataset = val_tensor, batch_size = 64)

test_y = torch.tensor(y_test.values.astype(np.int64))
test_x = torch.tensor(x_test.values.reshape((-1, 1, 32, 32)).astype(np.float32))
test_tensor = data_utils.TensorDataset(test_x, test_y) 
testloader = data_utils.DataLoader(dataset = test_tensor, batch_size = 64)

In [4]:
from torch import nn
import torch.nn.functional as F
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels = 1, out_channels = 32, kernel_size = 3)
        self.norm = nn.BatchNorm2d(32)
        self.pool1 = nn.MaxPool2d(kernel_size = 2, stride = 2)
        self.conv2 = nn.Conv2d(in_channels = 32, out_channels = 16, kernel_size = 3)
        self.pool2 = nn.MaxPool2d(kernel_size = 2, stride = 2)
        self.fco = nn.Linear(in_features=16*6*6, out_features=10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool1(x)
        x = F.relu(self.conv2(x))
        x = self.pool2(x)
        x = x.flatten(start_dim = 1)
        x = self.fco(x)
        return x
    
model = MLP()
for oneBatch in trainloader:
    inputs, labels = oneBatch
    outputs = model(inputs)

In [5]:
loss_fn = torch.nn.CrossEntropyLoss()

loss = loss_fn(outputs, labels)
print('Total loss for this batch: {}'.format(loss.item()))

Total loss for this batch: 18.090517044067383


In [6]:
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

In [7]:
def train_one_epoch(model, loss_fn, optimizer, trainloader):
    running_cum_loss = 0.

    for i, data in enumerate(trainloader):
        inputs, labels = data
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = loss_fn(outputs, labels)
        loss.backward()
        optimizer.step()

        last_mean_loss = loss.item()
        running_cum_loss += last_mean_loss * inputs.shape[0]

    return running_cum_loss / len(x_train)

loss = train_one_epoch(model, loss_fn, optimizer, trainloader)
print(f"Celková trénovací chyba: {loss}")

Celková trénovací chyba: 2.97970865385873


In [9]:
EPOCHS = 15

for epoch in range(EPOCHS):
    print('EPOCH {}:'.format(epoch + 1))

    avg_loss = train_one_epoch(model, loss_fn, optimizer, trainloader)

    running_cum_vloss = 0.0
    vcorrect = 0
    for i, vdata in enumerate(validloader):
        vinputs, vlabels = vdata
        with torch.no_grad():
            voutputs = model(vinputs)
            vloss = loss_fn(voutputs, vlabels)
        running_cum_vloss += vloss * vinputs.shape[0]
        vcorrect += (voutputs.argmax(1) == vlabels).float().sum()

    avg_vloss = running_cum_vloss / len(x_val)
    vacc = vcorrect / len(x_val)
    
    print(f"TRAIN loss: {avg_loss:.3f}, VALIDATION loss: {avg_vloss:.3f}, accuraccy: {vacc:.3f}")

EPOCH 1:
TRAIN loss: 2.303, VALIDATION loss: 2.303, accuraccy: 0.099
EPOCH 2:
TRAIN loss: 2.303, VALIDATION loss: 2.303, accuraccy: 0.099
EPOCH 3:
TRAIN loss: 2.303, VALIDATION loss: 2.303, accuraccy: 0.099
EPOCH 4:
TRAIN loss: 2.303, VALIDATION loss: 2.303, accuraccy: 0.099
EPOCH 5:
TRAIN loss: 2.303, VALIDATION loss: 2.303, accuraccy: 0.099
EPOCH 6:
TRAIN loss: 2.303, VALIDATION loss: 2.303, accuraccy: 0.099
EPOCH 7:
TRAIN loss: 2.303, VALIDATION loss: 2.303, accuraccy: 0.099
EPOCH 8:
TRAIN loss: 2.303, VALIDATION loss: 2.303, accuraccy: 0.099
EPOCH 9:
TRAIN loss: 2.303, VALIDATION loss: 2.303, accuraccy: 0.099
EPOCH 10:
TRAIN loss: 2.303, VALIDATION loss: 2.303, accuraccy: 0.099
EPOCH 11:
TRAIN loss: 2.303, VALIDATION loss: 2.303, accuraccy: 0.099
EPOCH 12:
TRAIN loss: 2.303, VALIDATION loss: 2.303, accuraccy: 0.099
EPOCH 13:
TRAIN loss: 2.303, VALIDATION loss: 2.303, accuraccy: 0.099
EPOCH 14:
TRAIN loss: 2.303, VALIDATION loss: 2.303, accuraccy: 0.099
EPOCH 15:
TRAIN loss: 2.303, 

In [10]:
test_predictions = np.zeros(len(x_test))
test_y = np.zeros(len(x_test))
print(test_predictions.shape)
ii = 0
for i, vdata in enumerate(testloader):
    vinputs, vlabels = vdata
    with torch.no_grad():
        voutputs = model(vinputs)
        vloss = loss_fn(voutputs, vlabels)
    test_predictions[ii:(ii + vinputs.shape[0])] = voutputs.argmax(1).numpy()
    test_y[ii:(ii + vinputs.shape[0])] = vlabels.numpy()
    ii += vinputs.shape[0]

(11200,)


In [11]:
from sklearn.metrics import accuracy_score
print(f"Testovací přesnost: {accuracy_score(test_y, test_predictions)}")

Testovací přesnost: 0.09678571428571428
