In [None]:
import wandb
import dsb18_utility as utils

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
from zipfile import ZipFile

import torch
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader
import albumentations as A
from albumentations.pytorch import ToTensorV2

In [None]:
# login for weights and biases for logging the training process
# wandb.login()

In [None]:
SEED = 42

# Unzip Data

In [None]:
# build folders and unzip
if not os.path.exists("./train"):
    os.makedirs("./train")
if not os.path.exists("./labels"):
    os.makedirs("./labels")

with ZipFile("../input/data-science-bowl-2018/stage1_train.zip", "r") as zip:
    zip.extractall(path="./train")
    
with ZipFile("../input/data-science-bowl-2018/stage1_train_labels.csv.zip", "r") as zip:
    zip.extractall(path="./labels")

# Train/Val Split

In [None]:
label_df = pd.read_csv("./labels/stage1_train_labels.csv")

In [None]:
train_df, val_df = train_test_split(label_df, test_size=0.2, random_state=SEED)

# save dataframes as csv
# train_df.to_csv("./labels/train_df.csv", index=False)
# val_df.to_csv("./labels/val_df.csv", index=False)

In [None]:
print(f"{len(train_df)} images for training set")
print(f"{len(val_df)} images for validation set")

# Simple Model Baseline
### Model with several Convolutional Layers

In [None]:
input_albums = A.Compose([
    A.Resize(256,256),
    ToTensorV2()])
mask_albums = A.Compose([
    A.Resize(256,256),
    ToTensorV2()])

In [None]:
DEVICE = "cuda"

In [None]:
def run_training_simplenet(epochs, save_model=False):
    datasets = {
        "train": utils.NucleiDataset("./train", train_df, input_albums, mask_albums),
        "val": utils.NucleiDataset("./train", val_df, input_albums, mask_albums)
    }
    dataloaders = {x: DataLoader(datasets[x], batch_size=8,
                                 num_workers=8, shuffle=x=="train")
                   for x in ["train", "val"]}
    model = utils.SimpleNet()
    model.to(DEVICE)
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
    eng = utils.Engine(model, optimizer, device=DEVICE)
    
    best_loss = np.inf
    
    for epoch in range(epochs):
        train_loss, train_iou = eng.train(dataloaders["train"])
        val_loss, val_iou = eng.evaluate(dataloaders["val"])
        print("epoch [{}/{}] train loss: {:.4f} train IOU: {:.4f} "
              "valid loss: {:.4f} valid IOU: {:.4f}".format(
                      epoch+1, epochs, train_loss, train_iou, val_loss, val_iou))
        if val_loss < best_loss:
            best_loss = val_loss
            if save_model:
                torch.save(model, f"model.pth")
    return best_loss

In [None]:
# uncomment to start training
# run_training_simplenet(5, save_model=True)

In [None]:
# runs training with simple net and prints inputs, masks and predictions
def run_training_simplenet_print(epochs):
    datasets = {
        "train": utils.NucleiDataset("./train", train_df, input_albums, mask_albums),
        "val": utils.NucleiDataset("./train", val_df, input_albums, mask_albums)
    }
    dataloaders = {x: DataLoader(datasets[x], batch_size=8,
                                 num_workers=8, shuffle=x=="train")
                   for x in ["train", "val"]}
    model = utils.SimpleNet()
    model.to(DEVICE)
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
    eng = utils.Engine(model, optimizer, device=DEVICE)
    
    img_types_train, img_id_train = utils.get_one_batch(dataloaders["train"])
    img_types_val, img_id_val = utils.get_one_batch(dataloaders["val"])
    
    best_loss = np.inf
    for epoch in range(epochs):
        train_loss, train_iou = eng.train(dataloaders["train"])
        val_loss, val_iou = eng.evaluate(dataloaders["val"])
        print("epoch [{}/{}] train loss: {:.4f} train IOU: {:.4f} "
              "valid loss: {:.4f} valid IOU: {:.4f}".format(
                      epoch+1, epochs, train_loss, train_iou, val_loss, val_iou))
        utils.check_output(img_types_train, img_id_train, model, DEVICE)
        utils.check_output(img_types_val, img_id_val, model, DEVICE)
        if val_loss < best_loss:
            best_loss = val_loss
    return best_loss

In [None]:
# uncomment to start training
# run_training_simplenet_print(15)

In [None]:
# runs training with u-net
def run_training_unet(epochs, save_model=False):
    datasets = {
        "train": utils.NucleiDataset("./train", train_df, input_albums, mask_albums),
        "val": utils.NucleiDataset("./train", val_df, input_albums, mask_albums)
    }
    dataloaders = {x: DataLoader(datasets[x], batch_size=8,
                                 num_workers=8, shuffle=x=="train")
                   for x in ["train", "val"]}
    model = utils.UNET()
    model.to(DEVICE)
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
    eng = utils.Engine(model, optimizer, device=DEVICE)
    
    best_loss = np.inf
    
    for epoch in range(epochs):
        train_loss, train_iou = eng.train(dataloaders["train"])
        val_loss, val_iou = eng.evaluate(dataloaders["val"])
        print("epoch [{}/{}] train loss: {:.4f} train IOU: {:.4f} "
              "valid loss: {:.4f} valid IOU: {:.4f}".format(
                      epoch+1, epochs, train_loss, train_iou, val_loss, val_iou))
        if val_loss < best_loss:
            best_loss = val_loss
            if save_model:
                torch.save(model, f"model.pth")
    return best_loss

In [None]:
# runs training with u-net and prints inputs, masks and predictions
def run_training_unet_print(epochs):
    datasets = {
        "train": utils.NucleiDataset("./train", train_df, input_albums, mask_albums),
        "val": utils.NucleiDataset("./train", val_df, input_albums, mask_albums)
    }
    dataloaders = {x: DataLoader(datasets[x], batch_size=8,
                                 num_workers=8, shuffle=x=="train")
                   for x in ["train", "val"]}
    model = utils.UNET()
    model.to(DEVICE)
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
    eng = utils.Engine(model, optimizer, device=DEVICE)
    
    img_types_train, img_id_train = utils.get_one_batch(dataloaders["train"])
    img_types_val, img_id_val = utils.get_one_batch(dataloaders["val"])
    
    best_loss = np.inf
    for epoch in range(epochs):
        train_loss, train_iou = eng.train(dataloaders["train"])
        val_loss, val_iou = eng.evaluate(dataloaders["val"])
        print("epoch [{}/{}] train loss: {:.4f} train IOU: {:.4f} "
              "valid loss: {:.4f} valid IOU: {:.4f}".format(
                      epoch+1, epochs, train_loss, train_iou, val_loss, val_iou))
        utils.check_output(img_types_train, img_id_train, model, DEVICE)
        utils.check_output(img_types_val, img_id_val, model, DEVICE)
        if val_loss < best_loss:
            best_loss = val_loss
    return best_loss

In [None]:
# uncomment to start training
# run_training_unet(10, save_model=True)