In [1]:
#!unzip 00_test_img_input.zip

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: 00_test_img_input/train/images/0071.jpg  
  inflating: __MACOSX/00_test_img_input/train/images/._0071.jpg  
  inflating: 00_test_img_input/train/images/2100.jpg  
  inflating: __MACOSX/00_test_img_input/train/images/._2100.jpg  
  inflating: 00_test_img_input/train/images/0717.jpg  
  inflating: __MACOSX/00_test_img_input/train/images/._0717.jpg  
  inflating: 00_test_img_input/train/images/1409.jpg  
  inflating: __MACOSX/00_test_img_input/train/images/._1409.jpg  
  inflating: 00_test_img_input/train/images/0703.jpg  
  inflating: __MACOSX/00_test_img_input/train/images/._0703.jpg  
  inflating: 00_test_img_input/train/images/2114.jpg  
  inflating: __MACOSX/00_test_img_input/train/images/._2114.jpg  
  inflating: 00_test_img_input/train/images/0065.jpg  
  inflating: __MACOSX/00_test_img_input/train/images/._0065.jpg  
  inflating: 00_test_img_input/train/images/1353.jpg  
  inflating: __MACOSX/00_test_img

In [2]:
import torch
import torchvision as tv
from torch.utils.data import DataLoader, Dataset, random_split
import pandas as pd
import os
from PIL import Image

In [3]:
class MyDataset(Dataset):
  def __init__(self, csv, root_dir, transform=None):
    self.paths = pd.read_csv(csv)
    self.root_dir = root_dir
    self.transform = transform

  def __len__(self):
    return len(self.paths)

  def __getitem__(self, idx):
    img_path = os.path.join(self.root_dir, self.paths.iloc[idx, 0])
    image = Image.open(img_path).convert('RGB')
    label = int(self.paths.iloc[idx, 1])
    if self.transform:
      #image = np.array(image)
      #album_image = self.transform(image=image)
      #image = album_image['image']
      image = self.transform(image)
    return image, label

In [4]:
transform = tv.transforms.Compose([
    tv.transforms.Resize(256, antialias=False),
    tv.transforms.CenterCrop(224),
    tv.transforms.ToTensor(),
    tv.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# transform_album = A.Compose([
#   A.Resize(256,256),
#   A.HorizontalFlip(p=0.5),
#   A.RandomRotate90(p=0.5),
#   A.RandomBrightnessContrast(p=0.2),
#   A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
#   ToTensorV2(),
# ])

In [5]:
root = '00_test_img_input/'
dataset = MyDataset(csv=root+'train/gt.csv', root_dir=root+'train/images', transform=transform)


In [6]:
train_size = int(0.9 * len(dataset))
val_size = len(dataset) - train_size

In [7]:
train, val = random_split(dataset, [train_size, val_size])

In [8]:
train_loader = DataLoader(train, batch_size=32)
val_loader = DataLoader(val, batch_size=32)


In [9]:
from torchvision.models import mobilenet_v2

In [10]:
model = mobilenet_v2(pretrained=True)

Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth
100%|██████████| 13.6M/13.6M [00:00<00:00, 89.0MB/s]


In [11]:
for param in model.parameters():
  param.requires_grad = False

In [12]:
for param in model.features[-3:].parameters():
  param.requires_grad = True

In [13]:
import torch.nn as nn

In [14]:
model.classifier = nn.Sequential(
    nn.Dropout(0.2),
    nn.Linear(model.last_channel, 256),
    nn.ReLU(),
    nn.BatchNorm1d(256),
    nn.Linear(256, 50)
)

In [15]:
import torch.optim as optim
from tqdm import tqdm

lr = 0.0009
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=lr)

In [16]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


In [17]:




early_stop = False
epochs = 15
min_acc = 0.85

for epoch in range(epochs):
  train_loss = 0.0
  correct_pred = 0

  model.train()
  for images, labels in tqdm(train_loader, desc=f'Epoch {epoch+1}'):
    pred = model.forward(images.to(device))
    loss = criterion(pred, labels.to(device))
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()
    train_loss += loss.item() * images.to(device).size(0)

  model.eval()
  with torch.no_grad():
    for images, labels in val_loader:
      pred = model.forward(images.to(device))
      loss = criterion(pred, labels.to(device))
      _, predicted = torch.max(pred, 1)
      correct_pred += (predicted == labels.to(device)).sum().item()

  train_loss = train_loss / len(train_loader.dataset)
  val_acc = correct_pred / len(val_loader.dataset)
  print(f'Train Loss: {train_loss:.4f}, Accuracy: {val_acc:.4f}')

  if val_acc >= min_acc:
    early_stop = True
    torch.save(model.state_dict(), 'birds_model.pt')
    print('0.85 accuracy reached, saving model')
    break


if not early_stop:
  print('Training done')
  torch.save(model.state_dict(), 'birds_model.pt')

Epoch 1: 100%|██████████| 71/71 [04:35<00:00,  3.87s/it]


Train Loss: 2.0310, Accuracy: 0.7280


Epoch 2: 100%|██████████| 71/71 [04:25<00:00,  3.74s/it]


Train Loss: 0.7755, Accuracy: 0.7720


Epoch 3: 100%|██████████| 71/71 [04:36<00:00,  3.89s/it]


Train Loss: 0.3086, Accuracy: 0.7720


Epoch 4: 100%|██████████| 71/71 [04:38<00:00,  3.93s/it]


Train Loss: 0.1082, Accuracy: 0.8000


Epoch 5: 100%|██████████| 71/71 [04:44<00:00,  4.01s/it]


Train Loss: 0.0494, Accuracy: 0.7840


Epoch 6: 100%|██████████| 71/71 [04:30<00:00,  3.81s/it]


Train Loss: 0.0300, Accuracy: 0.7840


Epoch 7: 100%|██████████| 71/71 [04:51<00:00,  4.10s/it]


Train Loss: 0.0292, Accuracy: 0.8040


Epoch 8: 100%|██████████| 71/71 [04:41<00:00,  3.96s/it]


Train Loss: 0.0142, Accuracy: 0.8040


Epoch 9: 100%|██████████| 71/71 [04:42<00:00,  3.98s/it]


Train Loss: 0.0076, Accuracy: 0.8160


Epoch 10: 100%|██████████| 71/71 [04:44<00:00,  4.00s/it]


Train Loss: 0.0044, Accuracy: 0.8240


Epoch 11: 100%|██████████| 71/71 [04:42<00:00,  3.98s/it]


Train Loss: 0.0029, Accuracy: 0.8280


Epoch 12: 100%|██████████| 71/71 [04:39<00:00,  3.94s/it]


Train Loss: 0.0023, Accuracy: 0.8280


Epoch 13: 100%|██████████| 71/71 [04:36<00:00,  3.90s/it]


Train Loss: 0.0020, Accuracy: 0.8320


Epoch 14: 100%|██████████| 71/71 [04:44<00:00,  4.00s/it]


Train Loss: 0.0017, Accuracy: 0.8280


Epoch 15: 100%|██████████| 71/71 [04:39<00:00,  3.93s/it]


Train Loss: 0.0016, Accuracy: 0.8320
Training done
