In [31]:
from torchvision import datasets
from torchvision import transforms
from torch.utils import data
from torch.utils.data import Dataset
from natsort import natsorted
from PIL import Image
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
from torch.nn import init

In [3]:
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0))
device = torch.device("cuda")

True
Tesla T4


# Load the data

Download from Google Drive

In [4]:
# from google.colab import drive
# drive.mount('/content/drive')

# train_val_data_zip_path = '/content/drive/My Drive/ssne/train.zip'
# test_data_zip_path = '/content/drive/My Drive/ssne/test_all.zip'

Mounted at /content/drive


Unzip the data

In [5]:
import zipfile

train_val_data_path = 'train'
test_data_path = 'test_all'


In [None]:
train_val_data_zip_path = train_val_data_path + '.zip'
test_data_zip_path = test_data_path + '.zip'

In [6]:
import os

if not os.path.exists(train_val_data_path):
    with zipfile.ZipFile(train_val_data_zip_path, 'r') as zip_ref:
        zip_ref.extractall('.')

if not os.path.exists(test_data_path):
    with zipfile.ZipFile(test_data_zip_path, 'r') as zip_ref:
        zip_ref.extractall('.')

Load the unzipped files

In [20]:
transform = transforms.Compose(
    [transforms.RandomHorizontalFlip(0.5),
     transforms.RandomVerticalFlip(0.5),
     transforms.RandomRotation(24),
     transforms.ToTensor(),
     transforms.RandomErasing(0.4),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

train_val_dataset = datasets.ImageFolder(
    root=train_val_data_path,
    transform=transform
)

In [8]:
DEBUG = False
def print_debug(*inputs):
    if DEBUG:
        print(*inputs)


In [83]:
split = 0.8
batch_size = 128
drop_last = False

train_val_dataset_size = len(train_val_dataset)
train_dataset_size = int(split * train_val_dataset_size)
val_dataset_size = train_val_dataset_size - train_dataset_size

train_dataset, val_dataset = data.random_split(
    train_val_dataset,
    [train_dataset_size, val_dataset_size]
)

def create_dloader(dataset: data.Dataset, batch_size: int, drop_last: bool) -> tuple[data.DataLoader, int]:
    sample_count = len(dataset) - len(dataset) % batch_size

    dloader = data.DataLoader(
        dataset,
        batch_size=batch_size,
        shuffle=True,
        num_workers=2,
        drop_last=drop_last
    )

    return (dloader, sample_count)

train_dloader, train_sample_count = create_dloader(train_dataset, batch_size, drop_last)
print(f'Train sample count: {train_sample_count}')

val_dloader, val_sample_count = create_dloader(val_dataset, batch_size, drop_last)
print(f'Validation sample count: {val_sample_count}')

Train sample count: 70400
Validation sample count: 17536


# Create the network

In [104]:
class CnnClassifier(nn.Module):
    def __init__(self, dropout: float) -> None:
        super().__init__()

        conv_1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1)
        conv_2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)
        conv_3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)
        conv_4 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1)



        self.conv_layers = nn.Sequential(
            conv_1,
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),


            conv_2,
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            conv_3,
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            conv_4,
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

        )
        init.xavier_normal_(conv_1.weight)
        init.xavier_normal_(conv_2.weight)
        init.xavier_normal_(conv_3.weight)
        init.xavier_normal_(conv_4.weight)

        linear_1 = nn.Linear(256 * 4 *4, 4096)
        linear_2 = nn.Linear(4096, 2048)
        linear_3 = nn.Linear(2048, 512)
        linear_4 = nn.Linear(512, 50)

        # Dense layers
        self.dense_layers = nn.Sequential(
            linear_1,
            nn.BatchNorm1d(4096),
            nn.ReLU(),
            nn.Dropout(dropout),

            linear_2,
            nn.BatchNorm1d(2048),
            nn.ReLU(),
            nn.Dropout(dropout),

            linear_3,
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(dropout),

            linear_4,
        )
        init.xavier_normal_(linear_1.weight)
        init.xavier_normal_(linear_2.weight)
        init.xavier_normal_(linear_3.weight)
        init.xavier_normal_(linear_4.weight)

    def forward(self, x) -> torch.Tensor:
        print_debug(f"Input shape: {x.shape}")
        x = self.conv_layers(x)
        print_debug(f"Conv layers output shape: {x.shape}")
        x = torch.flatten(x, 1)
        x = self.dense_layers(x)
        print_debug(f"Dense layers output shape: {x.shape}")
        return x

train_loss_func = nn.CrossEntropyLoss()
val_loss_func = nn.MSELoss()
lr = 8e-4
epoch_count = 30
dropout = 0.3

model = CnnClassifier(dropout)
model = model.to(device)
optimizer = optim.Adam(model.parameters(), lr=lr)

DEBUG = True
model.eval()
x, _ = next(iter(train_dloader))
x = x.to(device)
res = model(x)

Input shape: torch.Size([128, 3, 64, 64])
Conv layers output shape: torch.Size([128, 256, 4, 4])
Dense layers output shape: torch.Size([128, 50])


# Validate

In [105]:
def validate():
  classes = train_val_dataset.classes

  correct_pred = {classname: 0 for classname in classes}
  total_pred = {classname: 0 for classname in classes}

  model.eval()

  with torch.no_grad():
      for (images, labels) in val_dloader:
          images, labels = images.to(device), labels.to(device)

          outputs = model.forward(images).cpu()

          _, predictions = torch.max(outputs, 1)


          for label, prediction in zip(labels, predictions):
              if label == prediction:
                  correct_pred[classes[label]] += 1
              total_pred[classes[label]] += 1


  total_correct_count = 0
  total_pred_count = 0
  for classname, correct_count in correct_pred.items():
      total_correct_count += correct_count
      total_pred_count += total_pred[classname]
  accuracy = 100 * float(total_correct_count) / float(total_pred_count)
  print(f'Total accuracy: {accuracy:2f}%')

# Train

In [106]:
DEBUG = False


for epoch_no in range(epoch_count):
    model.train()
    running_loss = 0.0
    for i, (images, labels) in enumerate(train_dloader):

        images, labels = images.to(device), labels.to(device)

        preds = model.forward(images)

        loss = train_loss_func(preds, labels)

        optimizer.zero_grad()

        loss.backward()

        optimizer.step()

        running_loss += loss.item()

    print(f'[%d/%d] loss: %.5f' %
          (epoch_no + 1, epoch_count, running_loss / train_sample_count))
    validate()

torch.save(model.state_dict(), 'model_state_dict1.pth')

[1/30] loss: 0.02384
Total accuracy: 27.381696%
[2/30] loss: 0.01991
Total accuracy: 33.403397%
[3/30] loss: 0.01822
Total accuracy: 39.004715%
[4/30] loss: 0.01704
Total accuracy: 39.913651%
[5/30] loss: 0.01618
Total accuracy: 42.095097%
[6/30] loss: 0.01546
Total accuracy: 43.810714%
[7/30] loss: 0.01487
Total accuracy: 45.656990%
[8/30] loss: 0.01435
Total accuracy: 46.066011%
[9/30] loss: 0.01387
Total accuracy: 48.718968%
Total accuracy: 49.525649%
[11/30] loss: 0.01306
Total accuracy: 49.832415%
[12/30] loss: 0.01270
Total accuracy: 52.371755%
[13/30] loss: 0.01237
Total accuracy: 49.622223%
[14/30] loss: 0.01206
Total accuracy: 52.627393%
[15/30] loss: 0.01176
Total accuracy: 53.559052%
[16/30] loss: 0.01147
Total accuracy: 54.206669%
[17/30] loss: 0.01115
Total accuracy: 55.058797%
[18/30] loss: 0.01082
Total accuracy: 55.905243%
[19/30] loss: 0.01059
Total accuracy: 56.660796%
[20/30] loss: 0.01038
Total accuracy: 54.746350%
[21/30] loss: 0.01015
Total accuracy: 57.796966%
[2

Save the model

In [107]:
torch.save(model, 'model_naj.pth')

In [108]:
# Zapisanie stanu modelu
torch.save(model.state_dict(), 'model_naj_state_dict.pth')

# Prediction

In [109]:
class TestDataSet(Dataset):
    def __init__(self, root, transform=None):
        self.root = root
        self.transform = transform
        all_imgs = os.listdir(root)
        self.total_imgs = natsorted(all_imgs)

    def __len__(self):
        return len(self.total_imgs)

    def __getitem__(self, idx):
        img_loc = os.path.join(self.root, self.total_imgs[idx])
        image = Image.open(img_loc).convert("RGB")
        tensor_image = self.transform(image)
        return tensor_image

    def getimgs(self):
        return self.total_imgs

In [110]:
test_dataset = TestDataSet(
    root=test_data_path,
    transform=transform
)

test_dloader = data.DataLoader(
        test_dataset,
        batch_size=1,
        shuffle=False,
        num_workers=2
    )



In [111]:
DEBUG = False

names = [name for name in test_dataset.getimgs()]
results = [int(model.forward(img.to(device)).argmax()) for img in test_dloader]




In [112]:
for name, result in (zip(names, results)):
  print("name: " + str(name) + ", result: " + str(result))

[1;30;43mStrumieniowane dane wyjściowe obcięte do 5000 ostatnich wierszy.[0m
name: 7445104538570929.JPEG, result: 19
name: 7445133525810329.JPEG, result: 33
name: 7445243535307735.JPEG, result: 30
name: 7445324209159112.JPEG, result: 20
name: 7445579691980897.JPEG, result: 3
name: 7445788004832391.JPEG, result: 25
name: 7445844261017004.JPEG, result: 17
name: 7446022014620364.JPEG, result: 46
name: 07446288741430196.JPEG, result: 16
name: 7446333302050159.JPEG, result: 15
name: 7447951558380705.JPEG, result: 0
name: 07448540799840242.JPEG, result: 30
name: 7452581662591922.JPEG, result: 22
name: 7452749632176873.JPEG, result: 47
name: 7453456451065721.JPEG, result: 1
name: 7454137726859544.JPEG, result: 31
name: 7455120773948685.JPEG, result: 39
name: 7455797725064308.JPEG, result: 6
name: 7456048943193149.JPEG, result: 39
name: 7456510415069201.JPEG, result: 29
name: 7457819511102084.JPEG, result: 10
name: 7458498801064791.JPEG, result: 0
name: 7459072179936826.JPEG, result: 11
name

Save to file

In [113]:
import csv

with open('Kubiszyn_Sobiech_answer.csv', 'w') as f:
    writer = csv.writer(f)
    writer.writerows(zip(names, results))