In [4]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
from zipfile import ZipFile
from pathlib import Path
import cv2
from PIL import Image
import random
import wandb
import carvana_utils as utils
import torch
from torch.utils.data import DataLoader
import albumentations as A
from albumentations.pytorch import ToTensorV2
from skimage.transform import resize

In [5]:
# uncomment to login for weights and biases for logging the training process
# wandb.login()

In [6]:
# build folders and unzip
if not os.path.exists("./train_images"):
    os.makedirs("./train_images")
if not os.path.exists("./train_masks"):
    os.makedirs("./train_masks")
# uncomment below to create test directory
# if not os.path.exists("./test_masks"):
#     os.makedirs("./test_masks")

with ZipFile("../input/carvana-image-masking-challenge/train.zip", "r") as zip_:
    zip_.extractall(path="./train_images")
    
with ZipFile("../input/carvana-image-masking-challenge/train_masks.zip", "r") as zip_:
    zip_.extractall(path="./train_masks")

# uncomment below to unzip test images (many test images make it slow)
# with ZipFile("../input/carvana-image-masking-challenge/test.zip", "r") as zip_:
#     zip_.extractall(path="./test_images")

In [7]:
SEED = 42
DEVICE = "cuda"
IMG_DIR = "./train_images/train"
MASK_DIR = "./train_masks/train_masks"

In [8]:
random.seed(SEED)

In [9]:
# filename is e.g. '0d3adbbc9a8b_02.jpg'
img_ids = [filename[:-4] for filename in os.listdir(IMG_DIR)]
random.shuffle(img_ids)
# split validation and training dataset
val_ids = img_ids[:len(img_ids)//5]
train_ids = img_ids[len(img_ids)//5:]

In [10]:
# basic data augmentation (albumentation)
def get_albums(img_shape, mask_shape):
    img_albums = A.Compose([
        A.Resize(img_shape[0], img_shape[1]),
        ToTensorV2()])
    mask_albums = A.Compose([
        A.Resize(img_shape[0], img_shape[1]),
        ToTensorV2()])
    return img_albums, mask_albums

In [11]:
def run_training_unet(save_model=False):
    # wandb preferences (configurations and initialization)
    config_defaults = {
        "lr": 1e-4,
        "epochs": 20,
        "img_shape": (512, 512),
        "mask_shape": (512, 512)
    }
    wandb.init(project="carvana", config=config_defaults)
    config = wandb.config
    
    # get albumentations
    img_albums, mask_albums = get_albums(config.img_shape, config.mask_shape)
    
    datasets = {
        "train": utils.CarvanaDataset(train_ids, IMG_DIR, MASK_DIR, img_albums, mask_albums),
        "val": utils.CarvanaDataset(val_ids, IMG_DIR, MASK_DIR, img_albums, mask_albums)
    }
    dataloaders = {x: DataLoader(datasets[x], batch_size=8,
                                 num_workers=8, shuffle=x=="train")
                   for x in ["train", "val"]}
    model = utils.UNET()
    model.to(DEVICE)
    optimizer = torch.optim.Adam(model.parameters(), lr=config.lr)
    eng = utils.Engine(model, optimizer, device=DEVICE)
    
    best_loss = np.inf
    
    for epoch in range(config.epochs):
        train_loss = eng.train(dataloaders["train"])
        val_loss, val_dicecoeff = eng.evaluate(dataloaders["val"])
        print("Epoch [{}/{}] Train Loss: {:.4f} Valid Loss: {:.4f} Dice Coeff.: {:.4f}".format(
            epoch+1, config.epochs, train_loss, val_loss, val_dicecoeff))
        if val_loss < best_loss:
            best_loss = val_loss
            if save_model:
                torch.save(model, f"model.pth")
        
        wandb.log({
            "Training loss": train_loss,
            "Validation loss": val_loss,
            "Dice Coefficient": val_dicecoeff
        })
    return best_loss

In [12]:
# run_training_unet(save_model=True)