In [None]:
import os 
import datetime
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision as trv
from PIL import Image
import random
import utils
import logging

In [None]:
VOC_DIR = "dataset/VOCdevkit/VOC2012/"
BATCH_SIZE = 16
NUM_CLASSES = 21
NUM_EPOCHS = 30
Learning_Rate = 0.001
Weight_Decay = 1e-3

In [None]:
log_name = datetime.datetime.strftime(datetime.datetime.now(), "%m%d%H%M") + ".log"
log_path = os.path.join("logs", log_name)
logging.basicConfig(level=logging.DEBUG, filename=log_path, filemode='w', format="")
weights_name = datetime.datetime.strftime(datetime.datetime.now(), "%m%d%H%M") + ".pth"
save_path = os.path.join("weights", weights_name)
device = torch.device("cuda" if torch.cuda.is_available else "cpu")

Train_dataset = utils.VOC_dataset(VOC_DIR)
Valid_dataset = utils.VOC_dataset(VOC_DIR, train=False)

Train_loader = torch.utils.data.DataLoader(Train_dataset, BATCH_SIZE, shuffle=True, 
                                           drop_last=True, num_workers=4)
Valid_loader = torch.utils.data.DataLoader(Valid_dataset, BATCH_SIZE, shuffle=False, 
                                           drop_last=True, num_workers=4)

model = utils.ResNet18_FCN(NUM_CLASSES)

def loss_fun(inputs, targets):
    return F.cross_entropy(inputs, targets, reduction="none").mean(1).mean(1)

optimizer = torch.optim.SGD(model.parameters(), lr=Learning_Rate, weight_decay=Weight_Decay)

model.to(device);

for epoch in range(NUM_EPOCHS):
    
    print(f"Epoch: {epoch+1} ")
    logging.info(f"Epoch: {epoch+1} ")
    
    model.train()
    train_loss = 0.0
    sample_num = 0.0
    for inputs, labels in Train_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        loss = loss_fun(outputs, labels).sum()
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
        sample_num += labels.shape[0]

    train_loss = train_loss / sample_num
    print(f'train Loss: {train_loss:.4f}')
    logging.info(f'train Loss: {train_loss:.4f}')
    
    model.eval()
    valid_loss = 0.0
    sample_num = 0.0
    for inputs, labels in Valid_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        with torch.no_grad():
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            loss = loss_fun(outputs, labels).sum()
            valid_loss += loss.item()
            sample_num += labels.shape[0]

    valid_loss = valid_loss / sample_num
    print(f'valid Loss: {valid_loss:.4f}')
    logging.info(f'valid Loss: {valid_loss:.4f}')
        

model.to("cpu");

torch.save(model.state_dict(), save_path)

In [None]:
def predict(img):
    X = Test_loader.dataset.normalize_image(img).unsqueeze(0)
    pred = model(X.to(device)).argmax(dim=1)
    return pred.reshape(pred.shape[1], pred.shape[2])

In [None]:
def label2image(pred):
    colormap = torch.tensor(VOC_COLORMAP, device=device) 
    X = pred.long() 
    return colormap[X, :]

In [None]:
inputs, labels = next(iter(Valid_loader))

In [None]:
output = model(inputs).argmax(dim=1)[0]

In [None]:
VOC_COLORMAP = [[0, 0, 0], [128, 0, 0], [0, 128, 0], [128, 128, 0],
                [0, 0, 128], [128, 0, 128], [0, 128, 128], [128, 128, 128],
                [64, 0, 0], [192, 0, 0], [64, 128, 0], [192, 128, 0],
                [64, 0, 128], [192, 0, 128], [64, 128, 128], [192, 128, 128],
                [0, 64, 0], [128, 64, 0], [0, 192, 0], [128, 192, 0], [0, 64, 128]]

In [None]:
torch.unique(labels[0])

In [None]:
plt.imshow(label2image(output).to("cpu").numpy())

In [None]:
plt.imshow(inputs[0].permute(1, 2, 0).to("cpu").numpy()*[0.229, 0.224, 0.225]+[0.485, 0.456, 0.406])