In [4]:
import torch
from torch import nn
import torchvision
import torchvision.transforms as transforms
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import numpy as np
 
import torch.nn as nn
import torch.nn.functional as F

In [5]:
# torch.cuda.set_device(1)
device = torch.device("cuda")
# device = torch.device('mps')
# device = torch.device('cpu')

In [6]:
device

device(type='cuda')

In [7]:
import os
import random

def set_all_seeds(seed):
    os.environ["PL_GLOBAL_SEED"] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

set_all_seeds(42)

In [8]:
VALIDATION_PERCENTAGE = 0.10
TRAIN_PATH = "/kaggle/input/trainset"

In [9]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

batch_size = 32

trainFolder = torchvision.datasets.ImageFolder(root=TRAIN_PATH,
                                               transform=transform)

train_set, val_set = torch.utils.data.random_split(trainFolder, [79210, 8801])

trainloader = torch.utils.data.DataLoader(train_set, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

validloader = torch.utils.data.DataLoader(val_set, batch_size=1,
                                          shuffle=True, num_workers=2)

In [10]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        ## Warstwa konwolucyjna
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=1, kernel_size=5, stride=2, padding=0)
        ## Warstwa max pooling 
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(1, 3, 5)
        self.pool2 = nn.MaxPool2d(2)
        self.fc1 = nn.Linear(75, 120)
        self.fc2 = nn.Linear(120, 60)
        self.fc3 = nn.Linear(60, 50)

    def forward(self, x):
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [11]:
net = Net()
net = net.to(device)
net

Net(
  (conv1): Conv2d(3, 1, kernel_size=(5, 5), stride=(2, 2))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(1, 3, kernel_size=(5, 5), stride=(1, 1))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=75, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=60, bias=True)
  (fc3): Linear(in_features=60, out_features=50, bias=True)
)

In [12]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)

# Accuracy and accuracy per class

In [13]:
def get_accuracy(model, dataset):
    correct = 0
    total = 0
    model.eval() #*********#
    for x, labels in dataset:
        # moving to device
        x, labels = x.to(device), labels.to(device)

        output = model(x)
        pred = output.max(1, keepdim=True)[1] # get the index of the max logit
        correct += pred.eq(labels.view_as(pred)).sum().item()
        total += x.shape[0]
    return correct / total

In [14]:
def get_acccuracy_per_class(model, loader, classes):

    # prepare to count predictions for each class
    correct_pred = {classname: 0 for classname in classes}
    total_pred = {classname: 0 for classname in classes}
    acurracy_sum = 0

    # again no gradients needed
    with torch.no_grad():
        for data in loader:
            images, labels = data    
            images = images.to(device)
            outputs = net(images).cpu()   
            _, predictions = torch.max(outputs, 1)
            # collect the correct predictions for each class
            for label, prediction in zip(labels, predictions):
                if label == prediction:
                    correct_pred[classes[label]] += 1
                total_pred[classes[label]] += 1


    # print accuracy for each class
    for classname, correct_count in correct_pred.items():
        accuracy = 100 * float(correct_count) / total_pred[classname]
        print("Accuracy for class {:5s} is: {:.2f} %".format(classname, 
                                                       accuracy))
        acurracy_sum += accuracy
        
    print("Average accuracy for a class is: {:.2f} %".format(acurracy_sum/len(classes)))
    
    # division by 100
    return acurracy_sum/(len(classes) * 100)

# Training

In [15]:
%%time
NUM_EPOCHS = 1
for epoch in range(NUM_EPOCHS): 
    epoch_loss = 0
    train_accuracy = 0
    val_accuracy = 0
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
#         print(i)
        net.train()
        
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()

    print(f'[{epoch+1}/{NUM_EPOCHS}] loss: {running_loss * 1000}')
    running_loss = 0.0

print('Finished Training')

[1/1] loss: 8811964.530706406
Finished Training
CPU times: user 13.7 s, sys: 3.48 s, total: 17.2 s
Wall time: 3min 19s


# Calculating accuracy

In [16]:
classes = os.listdir(TRAIN_PATH)

In [17]:
%%time
avg_acc = get_acccuracy_per_class(net, validloader, classes)

Accuracy for class beetle is: 7.19 %
Accuracy for class spider is: 12.70 %
Accuracy for class turtle is: 9.20 %
Accuracy for class snake is: 8.82 %
Accuracy for class memorial is: 0.00 %
Accuracy for class tomato is: 0.00 %
Accuracy for class saw   is: 2.29 %
Accuracy for class swine is: 5.03 %
Accuracy for class fungus is: 0.60 %
Accuracy for class flower is: 7.22 %
Accuracy for class truck is: 0.00 %
Accuracy for class bread is: 41.49 %
Accuracy for class bomb  is: 11.36 %
Accuracy for class spoon is: 0.00 %
Accuracy for class carbon is: 11.63 %
Accuracy for class bird  is: 0.52 %
Accuracy for class antenna is: 0.00 %
Accuracy for class spice is: 0.55 %
Accuracy for class battery is: 0.00 %
Accuracy for class acoustic is: 0.00 %
Accuracy for class bacteria is: 9.89 %
Accuracy for class motor is: 6.25 %
Accuracy for class kangaroo is: 6.32 %
Accuracy for class towel is: 7.32 %
Accuracy for class worm  is: 28.82 %
Accuracy for class gauge is: 3.72 %
Accuracy for class tea   is: 30.59 %

In [18]:
avg_acc

0.10515634143815264

# Saving model to file

In [19]:
# saving model to file
state_dict = net.state_dict()
torch.save(state_dict, "neural_net.tar")

# Reading test images

In [20]:
# custom test path
TEST_PATH = "/kaggle/input/d/franeksakowski/testset"

In [29]:
# reading file names (adjust path)
test_files_names = os.listdir(TEST_PATH + "/test_all")

In [37]:
testFolder = torchvision.datasets.ImageFolder(root=TEST_PATH,
                                               transform=transforms.ToTensor())

testloader = torch.utils.data.DataLoader(testFolder, batch_size=1,
                                         shuffle=False, num_workers=2)

# Predictions on testset

In [38]:
preds = []

In [39]:
%%time
for i, data in enumerate(testloader, 0):
    x, labels = data
    true_values.append(labels)
    x, labels = x.to(device), labels.to(device)
    preds.append(net(x).max(1, keepdim=True)[1].item())

CPU times: user 19.7 s, sys: 4.48 s, total: 24.2 s
Wall time: 34.9 s


In [76]:
# saving to file
pd.DataFrame({'A': test_files_names, 'B': preds}).to_csv("preds.csv", index=False, header=False)

In [82]:
pd.read_csv("preds.csv", header=None)

Unnamed: 0,0,1
0,7386844264429369.JPEG,2
1,24419493975293927.JPEG,36
2,7742907835373426.JPEG,46
3,38209545241358567.JPEG,36
4,3569300609925149.JPEG,5
...,...,...
9995,5239559584274834.JPEG,37
9996,1312880239038562.JPEG,37
9997,014146161727582651.JPEG,37
9998,7728901188836401.JPEG,41


# Training loop + saving best model

In [None]:
%%time

epochs, losses, train_acc, val_acc = [], [], [], []

best_model = None
best_valid_acc = None

NUM_EPOCHS = 5
for epoch in range(NUM_EPOCHS): 
    train_correct_count = 0
    train_data_len = 0
    running_loss = 0.0
    for i, data in enumerate(train_dataloader, 0):
        # print(i)
        net.train()
        
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        

        # print statistics
        outputs_for_train_correct_count = outputs.max(1, keepdim=True)[1].squeeze()
        train_correct_count += (outputs_for_train_correct_count == labels).sum()
        train_data_len += len(labels)

        # # clear memory
        # del inputs, labels, outputs
        # torch.mps.empty_cache()

        running_loss_single_iter = loss.item()
        running_loss += running_loss_single_iter
        
    

    new_train_acc = train_correct_count / train_data_len
    train_acc.append(new_train_acc)
    
    running_loss /= 1000
    losses.append(running_loss)
    new_val_acc = get_accuracy()
    val_acc.append(new_val_acc)
    epochs.append(epoch+1)
    
    # replacing best model if necessary
    if best_model == None or best_valid_acc < new_val_acc:
        best_model = net
        best_valid_acc = new_val_acc
        
        # saving model to file
        state_dict = best_model.state_dict()
        torch.save(state_dict, "neural_net.tar")

    print(f'epoch {epoch+1} train running accuracy = {new_train_acc}, val_accuracy = {new_val_acc}')
    print(f'[{epoch+1}/{NUM_EPOCHS}] running loss: {round(running_loss, 4)}, macro avg accuracy: {round(get_accuracy_per_class(), 4)}\n')
    running_loss = 0.0

print('Finished Training')