In [1]:
import os

import numpy as np
import pandas as pd
import torch
from torch import optim, nn
from torch.utils.data import DataLoader, Dataset
from tqdm.notebook import tqdm
from torchvision import datasets, transforms
from torchvision.models import mobilenet_v2, resnet50, densenet121

from torchsummary import summary

from sklearn.metrics import roc_auc_score

from PIL import Image

In [2]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
device

device(type='cuda')

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
def train(model, train_loader, device, optimizer, epochs, loss_func):
  log_interval = 10
  loss_sum = 0
  model.train()
  for data, target in tqdm(train_loader):
    target = target.type(torch.FloatTensor)
    data, target = data.to(device), target.to(device)
    optimizer.zero_grad()
    output = model(data).squeeze(1)
    # print(output)
    # print(target)
    loss = loss_func(output, target)
    loss_sum += loss.item()
    loss.backward()
    optimizer.step()

  print(f'Train loss: {loss_sum / len(train_loader.dataset)}')
    # log_interval = 20
    # model.train()
    # for batch_idx, (data, target) in enumerate(train_loader):
    #     # data = data.repeat(1, 3, 1, 1)
    #     data, target = data.to(device), target.to(device)
    #     optimizer.zero_grad()
    #     output = model(data)
    #     loss = loss_func(output, target)
    #     loss.backward()
    #     optimizer.step()
    #     if batch_idx % log_interval == 0:
    #         print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
    #             epoch, batch_idx * len(data), len(train_loader.dataset),
    #             100. * batch_idx / len(train_loader), loss.item()))


In [5]:
def test(model, test_loader, device, loss_func):
  model.eval()
  loss_sum = 0
  correct = 0
  with torch.no_grad():
    for data, target in tqdm(test_loader):
      true_t = target.type(torch.FloatTensor).to(device)
      data, target = data.to(device), target.to(device)
      output = model(data).squeeze(1)
      loss = loss_func(output, true_t)
      loss_sum += loss.item()
      # print(target)
      correct += roc_auc_score(target.cpu().numpy(), output.cpu().numpy())

  print(f'Valid loss: {loss_sum / len(test_loader.dataset)}, ROC AUC score: {correct / len(test_loader.dataset) * batch_size}')

In [6]:
import zipfile


file = '/content/drive/MyDrive/4course/miem-hse-ais-2024-lab-01.zip'
z = zipfile.ZipFile(file, "r")
z.extractall()

In [7]:
base = '/content/'

train_df = pd.read_csv(base + 'train.csv')
valid_df = pd.read_csv(base + 'valid.csv')
test_df = pd.read_csv(base + 'sample_submission.csv')

In [8]:
train_df['target_people'] = train_df['target_people'].astype(int)
valid_df['target_people'] = valid_df['target_people'].astype(int)

In [9]:
def max_width_height(df, img_directory):
  max_width = float(0)
  max_height = float(0)

  for img_id in df['id']:
    img_path = os.path.join(img_directory, img_id)
    with Image.open(img_path) as img:
      width, height = img.size

      if width > max_width:
        max_width = width
      if height > max_height:
        max_height = height

  return max_width, max_height

In [10]:
image_directory_train = base + 'train/'
image_directory_valid = base + 'valid/'
image_directory_test = base + 'test/'

size = []
size.append(max_width_height(train_df, image_directory_train))
size.append(max_width_height(valid_df, image_directory_valid))
size.append(max_width_height(test_df, image_directory_test))

img_width = max(max(size[0][0], size[1][0]), size[2][0])
img_height = max(max(size[0][1], size[1][1]), size[2][1])
img_width, img_height

(160, 138)

In [68]:
transform = transforms.Compose([
    transforms.Resize((340, 340)),
    transforms.ToTensor(),
    transforms.Normalize((0.2), (0.3))
])

In [45]:
class DatasetPeople(Dataset):

    def __init__(self, data, root_path, transform=None):
        self.data = data
        self.root_path = root_path
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        image_path = os.path.join(self.root_path, self.data.loc[index, 'id'])
        image = datasets.folder.default_loader(image_path)
        label = self.data.loc[index, 'target_people']

        if self.transform:
            image = self.transform(image)

        return image, label

In [64]:
batch_size = 32
learning_rate = 0.02
epochs = 5

In [69]:
train_dataset = DatasetPeople(train_df, base + 'train/', transform)
valid_dataset = DatasetPeople(valid_df, base + 'valid/', transform)
test_dataset = DatasetPeople(test_df, base + 'test/', transform)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [51]:
model_rnet = resnet50()
model_rnet.fc = torch.nn.Sequential(torch.nn.Linear(
    in_features=model_rnet.fc.in_features, out_features=1),
                               torch.nn.Sigmoid())
model_rnet.to(device)
# model = mobilenet_v2(pretrained=True)
# model.classifier[1] = nn.Sequential(torch.nn.Linear(in_features=model.classifier[1].in_features, out_features=1),
#                                     nn.Sigmoid)
# model.to(device)
summary(model_rnet, input_size=(3, 190, 130), batch_size=batch_size)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [32, 64, 95, 65]           9,408
       BatchNorm2d-2           [32, 64, 95, 65]             128
              ReLU-3           [32, 64, 95, 65]               0
         MaxPool2d-4           [32, 64, 48, 33]               0
            Conv2d-5           [32, 64, 48, 33]           4,096
       BatchNorm2d-6           [32, 64, 48, 33]             128
              ReLU-7           [32, 64, 48, 33]               0
            Conv2d-8           [32, 64, 48, 33]          36,864
       BatchNorm2d-9           [32, 64, 48, 33]             128
             ReLU-10           [32, 64, 48, 33]               0
           Conv2d-11          [32, 256, 48, 33]          16,384
      BatchNorm2d-12          [32, 256, 48, 33]             512
           Conv2d-13          [32, 256, 48, 33]          16,384
      BatchNorm2d-14          [32, 256,

In [None]:
# optimizer = optim.Adam(params=model.parameters(), lr=learning_rate)
optimizer = optim.Adadelta(params=model_rnet.parameters(), lr=learning_rate)
scheduler = StepLR(optimizer, step_size=1, gamma=0.7)
criterion = nn.BCELoss()

for epoch in range(epochs):
  print(f'Epoch: {epoch + 1}')
  train(model_rnet, train_loader, device, optimizer, epochs, loss_func=criterion)
  test(model_rnet, valid_loader, device, criterion)
  scheduler.step()

torch.save(model_rnet.state_dict(), 'people_classifier_resnet50.pts')

In [70]:
model = mobilenet_v2(pretrained=True)
model.classifier[1] = nn.Sequential(torch.nn.Linear(in_features=model.classifier[1].in_features, out_features=1),
                                    nn.Sigmoid())
model.to(device)
summary(model, input_size=(3, 350, 350), batch_size=batch_size)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [32, 32, 175, 175]             864
       BatchNorm2d-2         [32, 32, 175, 175]              64
             ReLU6-3         [32, 32, 175, 175]               0
            Conv2d-4         [32, 32, 175, 175]             288
       BatchNorm2d-5         [32, 32, 175, 175]              64
             ReLU6-6         [32, 32, 175, 175]               0
            Conv2d-7         [32, 16, 175, 175]             512
       BatchNorm2d-8         [32, 16, 175, 175]              32
  InvertedResidual-9         [32, 16, 175, 175]               0
           Conv2d-10         [32, 96, 175, 175]           1,536
      BatchNorm2d-11         [32, 96, 175, 175]             192
            ReLU6-12         [32, 96, 175, 175]               0
           Conv2d-13           [32, 96, 88, 88]             864
      BatchNorm2d-14           [32, 96,



In [71]:
from torch.optim.lr_scheduler import StepLR

# optimizer = optim.Adagrad(params=model.parameters(), lr=learning_rate)
optimizer = optim.Adadelta(params=model.parameters(), lr=learning_rate)
scheduler = StepLR(optimizer, step_size=1, gamma=0.7)
criterion = nn.BCELoss()



for epoch in range(epochs):
  print(f'Epoch: {epoch + 1}')
  train(model, train_loader, device, optimizer, epochs, loss_func=criterion)
  test(model, valid_loader, device, criterion)
  scheduler.step()

torch.save(model.state_dict(), 'people_classifier_mobilenet.pts')

Epoch: 1


  0%|          | 0/282 [00:00<?, ?it/s]

Train loss: 0.010251593750496116


  0%|          | 0/48 [00:00<?, ?it/s]

Valid loss: 0.009932381168861562, ROC AUC score: 0.9553000329263801
Epoch: 2


  0%|          | 0/282 [00:00<?, ?it/s]

Train loss: 0.007008703724898776


  0%|          | 0/48 [00:00<?, ?it/s]

Valid loss: 0.008627549575370116, ROC AUC score: 0.9578215571141179
Epoch: 3


  0%|          | 0/282 [00:00<?, ?it/s]

Train loss: 0.005885026733699566


  0%|          | 0/48 [00:00<?, ?it/s]

Valid loss: 0.00947325927457617, ROC AUC score: 0.9598402541070605
Epoch: 4


  0%|          | 0/282 [00:00<?, ?it/s]

Train loss: 0.004878209405848149


  0%|          | 0/48 [00:00<?, ?it/s]

Valid loss: 0.00931742455334946, ROC AUC score: 0.9594515454595501
Epoch: 5


  0%|          | 0/282 [00:00<?, ?it/s]

Train loss: 0.0042849710577562835


  0%|          | 0/48 [00:00<?, ?it/s]

Valid loss: 0.009231867598524937, ROC AUC score: 0.9601318491981679


In [62]:
ids = list(range(len(test_dataset)))
submission = test_df
predictions = []
real = []
for data, target in test_loader:
    data = data.to(device)
    output = model(data).squeeze(1)
    predictions += list(output.cpu().detach().numpy())
print(predictions)
submission['target_people'] = predictions
submission.to_csv('submission.csv', index=False)
print('Submission saved in: submission.csv')
submission.head()

[0.03280322, 0.0016866223, 0.22233629, 0.050727405, 0.0037189003, 0.0031246138, 0.76962954, 0.9995303, 0.014621907, 0.003658037, 0.6204883, 0.0828432, 0.01154568, 0.08474097, 0.14900301, 0.07935227, 0.9737988, 0.92010933, 0.9756477, 0.21580425, 0.011615166, 0.99578947, 0.052377854, 0.5595687, 0.07898008, 0.04662478, 0.084442444, 0.13561738, 0.013883147, 0.35834646, 0.21353173, 0.0089122, 0.9915303, 0.019507991, 0.9922903, 0.04631068, 0.4195296, 0.0067351605, 0.0018508496, 0.016922614, 0.986754, 0.53478926, 0.08629922, 0.99564314, 0.82860476, 0.7624063, 0.99615556, 0.0074371565, 0.018344527, 0.9929584, 0.051200546, 0.006403779, 0.07889873, 0.84882987, 0.0025373707, 0.97452617, 0.0026872684, 0.4981827, 0.9365413, 0.009778135, 0.0022734639, 0.004607319, 0.10380914, 0.0020611158, 0.0060870694, 0.0032675674, 0.06990419, 0.6420561, 0.0020179227, 0.33914798, 0.010386192, 0.09572289, 0.9999639, 0.005989944, 0.026734209, 0.0035912574, 0.061243262, 0.002003544, 0.0015329724, 0.96337247, 0.993526

Unnamed: 0,id,target_people
0,test0001.jpg,0.032803
1,test0002.jpg,0.001687
2,test0003.jpg,0.222336
3,test0004.jpg,0.050727
4,test0005.jpg,0.003719


In [None]:
#best params: size 350 350 batchsize 32 epochs 10 lr 0.01