In [1]:
from tqdm.notebook import tqdm
import numpy as np

import sklearn
import sklearn.datasets
import sklearn.model_selection
import sklearn.metrics

import torch
import torchvision
from torch import nn
from torch.nn import functional as F


PYTORCH_DEVICE = torch.device("cpu") # "cpu" if not torch.cuda.is_available() else "cuda:0"
print(PYTORCH_DEVICE)

cpu


In [2]:
breast_cancer_datset = sklearn.datasets.load_breast_cancer()

X = breast_cancer_datset['data']
Y = breast_cancer_datset['target']

print("X_shape", X.shape)
print("Y_shape", Y.shape)

X_train, X_test, Y_train, Y_test = sklearn.model_selection.train_test_split(X, Y, test_size=0.3, random_state=67)

X_shape (569, 30)
Y_shape (569,)


In [31]:
class SimpleDataset(torch.utils.data.Dataset):
    def __init__(self, X, Y):
        assert X.shape[0] == Y.shape[0]
        self.X = torch.FloatTensor(X)
        self.Y = torch.LongTensor(Y)

    def __getitem__(self, index):
        return self.X[index], self.Y[index]

    def __len__(self):
        return self.X.shape[0]

train_dataset = SimpleDataset(X_train, Y_train)
test_dataset = SimpleDataset(X_test, Y_test)

BATCH_SIZE = 64

train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=True)

In [32]:
def infinite_dataloader_wrapper(dataloader):
    while True:
        for batch in dataloader:
            yield batch
            
infinite_train_dataloader = infinite_dataloader_wrapper(train_dataloader)
infinite_test_dataloader = infinite_dataloader_wrapper(test_dataloader)

In [33]:
class SimpleClassificationModel(torch.nn.Module):
    def __init__(self, input_features_count, num_classes):
        super(SimpleClassificationModel, self).__init__()
        self.some_sequential_block = nn.Sequential(
            nn.Linear(input_features_count, input_features_count),
            nn.ReLU(),
        )
        self.final_fc = nn.Linear(input_features_count, num_classes)

    def forward(self, x):
        x = self.some_sequential_block(x)
        x = self.final_fc(x)
        return x

In [34]:
model = SimpleClassificationModel(X.shape[1], max(Y.tolist()) + 1).to(PYTORCH_DEVICE)
loss_function = torch.nn.CrossEntropyLoss().to(PYTORCH_DEVICE)

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [37]:
def process_batch(generator, is_train, iteration):
    if is_train:
        model.train()
    else:
        model.eval()

    x_batch, y_batch = next(generator)
    x_batch = x_batch.to(PYTORCH_DEVICE)
    y_batch = y_batch.to(PYTORCH_DEVICE)
    
    
    if is_train:
        optimizer.zero_grad()

    y_predicted = model(x_batch)
    assert not torch.any(torch.isnan(y_predicted))
    loss = loss_function(y_predicted, y_batch)

    if is_train:
        loss.backward()
        optimizer.step()

    train_or_test = "train" if is_train else "test"

    y_batch_numpy = y_batch.cpu().detach().numpy()
    y_predicted_class_numpy = y_predicted.cpu().detach().numpy().argmax(axis=1)

    if iteration % 1000 == 0:
        # better way here is to calculate loss every time and push it to something like tensorboard
        print(iteration, train_or_test)
        print("loss", loss.cpu().detach().numpy())
        print("accuracy", sklearn.metrics.accuracy_score(y_batch_numpy, y_predicted_class_numpy))


for iteration in tqdm(range(10_000)):
    process_batch(infinite_train_dataloader, True, iteration)
    with torch.no_grad():
        process_batch(infinite_test_dataloader, False, iteration)

  0%|          | 0/10000 [00:00<?, ?it/s]

0 train
loss 0.1141901
accuracy 0.96875
0 test
loss 0.038228974
accuracy 0.984375
1000 train
loss 0.11041064
accuracy 0.96875
1000 test
loss 0.07006152
accuracy 0.9767441860465116
2000 train
loss 0.055502225
accuracy 0.96875
2000 test
loss 0.16307618
accuracy 0.953125
3000 train
loss 0.043022852
accuracy 0.96875
3000 test
loss 0.028703675
accuracy 1.0
4000 train
loss 0.0076737422
accuracy 1.0
4000 test
loss 0.28652117
accuracy 0.8837209302325582
5000 train
loss 0.011009355
accuracy 1.0
5000 test
loss 0.12113373
accuracy 0.953125
6000 train
loss 0.048908528
accuracy 0.984375
6000 test
loss 0.27344567
accuracy 0.9375
7000 train
loss 0.05113535
accuracy 0.96875
7000 test
loss 0.098095074
accuracy 0.9534883720930233
8000 train
loss 0.05593279
accuracy 0.96875
8000 test
loss 0.07460085
accuracy 0.953125
9000 train
loss 0.014173485
accuracy 1.0
9000 test
loss 0.13026385
accuracy 0.953125


In [38]:
# hacking of last layer on neural network and train only him
for param in model.parameters():
    model.requires_grad = False

model.fc = nn.Sequential(
    nn.Linear(X.shape[1], 10),
    nn.ReLU(),
    nn.Linear(X.shape[1], 2),
)

for iteration in tqdm(range(10_000)):
    process_batch(infinite_train_dataloader, True, iteration)
    with torch.no_grad():
        process_batch(infinite_test_dataloader, False, iteration)

  0%|          | 0/10000 [00:00<?, ?it/s]

0 train
loss 0.028271738
accuracy 1.0
0 test
loss 0.13713504
accuracy 0.9534883720930233
1000 train
loss 0.016699992
accuracy 1.0
1000 test
loss 0.03694469
accuracy 0.984375
2000 train
loss 0.0047024027
accuracy 1.0
2000 test
loss 0.13072795
accuracy 0.96875
3000 train
loss 0.05325662
accuracy 0.96875
3000 test
loss 0.20876445
accuracy 0.9302325581395349
4000 train
loss 0.007911827
accuracy 1.0
4000 test
loss 0.3298788
accuracy 0.9375
5000 train
loss 0.024022082
accuracy 0.984375
5000 test
loss 0.04369533
accuracy 0.984375
6000 train
loss 0.01381294
accuracy 0.984375
6000 test
loss 0.1008489
accuracy 0.9767441860465116
7000 train
loss 0.062712364
accuracy 0.96875
7000 test
loss 0.25300378
accuracy 0.953125
8000 train
loss 0.046902932
accuracy 0.984375
8000 test
loss 0.13067846
accuracy 0.96875
9000 train
loss 0.0071885777
accuracy 1.0
9000 test
loss 0.57413906
accuracy 0.9069767441860465


In [41]:
# bonus for image tasks. for getting dataset for images and transform it. you coud use such code
# from torchvision import transforms as T

# transform = T.Compose([
#     T.transforms.ToTensor(), 
#     T.transforms.Normalize([0.4, 0.4, 0.4], [0.4, 0.4, 0.4])]

#     # Augmentations,
#     # https://pytorch.org/vision/main/auto_examples/plot_transforms.html#sphx-glr-auto-examples-plot-transforms-py
    
# )

# dataset = ImageFolder("your/folder", transform=transform)