# Notebook 2: Baseline

In [1]:
import os
import sys
import json
from pathlib import Path

import pandas as pd
import numpy as np
from skimage import io
import matplotlib.pyplot as plt

import torch
from torch.utils.data import DataLoader, random_split
from torch import optim, tensor
from torch.autograd import Variable
from torch import nn
from torch.nn import functional as F
from torch.nn import BCEWithLogitsLoss
from torchvision.utils import make_grid
from torchvision import models
from torchvision.transforms.functional import to_tensor

from google.colab import drive


%matplotlib inline

As usual, since we are working in Google Colab, we mount the drive, resolve the repo and data paths and import the modules we need.

In [2]:
DRIVE_PATH = Path('/') / 'content' / 'drive'

drive.mount(str(DRIVE_PATH))

Mounted at /content/drive


In [3]:
REPO_PATH = DRIVE_PATH / 'My Drive' / 'Foodvisor' / 'home-assignment'
DATA_PATH = REPO_PATH / 'data'

IMG_DIR_PATH = DATA_PATH / 'assignment_imgs'
ANNOTATIONS_PATH = DATA_PATH / 'img_annotations.json'
LABEL_MAPPING_PATH = DATA_PATH / 'label_mapping.csv'

In [None]:
print(IMG_DIR_PATH)

/content/drive/My Drive/Foodvisor/home-assignment/data/assignment_imgs


In [None]:
# !cp -R /content/drive/My\ Drive/Foodvisor/home-assignment/data/assignment_imgs .
#!ls assignment_imgs

In [54]:
sys.path.append(str(REPO_PATH))

from src.tomatoes_dataset import TomatoesDataset

In [36]:
def imshow_tensor(tensor):
  plt.imshow(tensor.numpy().transpose(1, 2, 0))

In [None]:
dataset = TomatoesDataset(Path('.')/ 'assignment_imgs', ANNOTATIONS_PATH, LABEL_MAPPING_PATH, sampling='under')

train_ratio = 0.8
train_size = int(len(dataset) * train_ratio)
test_size = len(dataset) - train_size

train_set, test_set = random_split(dataset, [train_size, test_size])

train_batch_size = 16
train_loader = DataLoader(train_set, shuffle=True, batch_size=train_batch_size)
test_loader = DataLoader(test_set, shuffle=False, batch_size=1)

In [None]:
model = models.alexnet(pretrained=False)

# Reshape model FC because we only have two classes
# model.fc = nn.Linear(512, 2)

model.classifier[6] = nn.Linear(in_features=4096, out_features=2, bias=True)

model = model.cuda()

criterion = BCEWithLogitsLoss().cuda()
optimizer = optim.Adam(model.parameters(), lr=0.001) #optim.SGD(model.parameters(), lr=0.005, momentum=0.9)

In [None]:
num_epochs = 10


for epoch in range(num_epochs):

  print(f'Epoch {epoch}')

  # FIRST WE TRAIN
  model.train()
  n_batches = 0
  running_loss = 0.0
  for inputs, classes_ids in train_loader:
    n_batches += 1

    # To optimize not beautiful
    labels = torch.reshape(tensor([([1., 0.] if class_id == 0 else [0., 1.]) for class_id in classes_ids]), (-1, 2))
    inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())
    
    outputs = model(inputs)
    loss = criterion(outputs, labels)

    #train_losses.append(loss.item())
    running_loss += loss.item()

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

  print(running_loss / n_batches)

  # THEN WE EVALUATE
  model.eval()
  n_batches = 0
  running_loss = 0.0
  for inputs, classes_ids in test_loader:
    n_batches += 1
    labels = torch.reshape(tensor([([1., 0.] if class_id == 0 else [0., 1.]) for class_id in classes_ids]), (-1, 2))
    inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())
    
    outputs = model(inputs)
    loss = criterion(outputs, labels)

    running_loss += loss.item()

    #test_losses.append(loss.item())
    
    #preds = torch.argmax(outputs, dim=1)
    #running_correct += torch.sum(torch.eq(preds.cpu(), classes_ids))

  print(running_loss / n_batches)

  #print(f'Epoch {epoch}')
  #print(f'Training loss {np.mean(train_losses)}')
  #print(f'Test loss {np.mean(test_losses)}')
  #print(f'Test correct stuff {np.mean(test_correct)}')
  print('---')

Epoch 0
0.0025226586926592525
0.0
---
Epoch 1
0.0
0.0
---
Epoch 2
0.0
0.0
---
Epoch 3
0.0
0.0
---
Epoch 4
0.0
0.0
---
Epoch 5
0.0
0.0
---
Epoch 6
0.0
0.0
---
Epoch 7
0.0
0.0
---
Epoch 8
0.0
0.0
---
Epoch 9
0.0
0.0
---
