In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
import glob
import os
import numpy as np
from PIL import Image
import torchvision.models as models
import mean_iou_evaluate
import viz_mask
import imageio
from tqdm import tqdm


In [2]:
class HW2IMGS(Dataset):
    def __init__(self, root, transform=None):
        " initial the dataset "
        self.X_image = None
        self.y_label = None
        self.X_filenames = []
        self.y_filenames = []
        self.root = root
        self.transform = transform

        # read filenames
        X_filenames = glob.glob(root+'/*.jpg')
        y_filenames = glob.glob(root+'/*.png')
        for i in range(len(X_filenames)):
            self.X_filenames.append(os.path.splitext(
                os.path.basename(X_filenames[i]))[0])
            self.y_filenames.append(os.path.splitext(
                os.path.basename(y_filenames[i]))[0])

        self.len = len(self.X_filenames)

    def __getitem__(self, index):
        X_filename, y_filename = self.X_filenames[index], self.y_filenames[index]

        X_image = Image.open(self.root+X_filename+'.jpg')
        X_shape = imageio.imread(self.root+X_filename+'.jpg').shape

        y_image = imageio.imread(self.root+y_filename+'.png')
        y_label = viz_mask.read_masks(y_image, X_shape)

        if self.transform is not None:
            X_image = self.transform(X_image)

        " get a sample from the dataset "
        # if torch.cuda.is_available():
        #   X_image, y_label = X_image.cuda(), y_label.cuda()

        return X_image, y_label

    def __len__(self):
        " Total number of sampler in the dataset "
        return self.len


In [3]:
from torchvision import transforms
trainset = HW2IMGS(root='p2_data/train/', transform=transforms.Compose([
    # transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
]))

valset = HW2IMGS(root='p2_data/validation/', transform=transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
]))
print(len(trainset))
print(len(valset))


2000
257


In [4]:
trainset_loader = DataLoader(trainset, batch_size=2, shuffle=True)
valset_loader = DataLoader(valset, batch_size=2, shuffle=False)


In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)


cuda


In [6]:
from torchsummary import summary
# summary(vgg, (3, 512, 512))


class FCN32(nn.Module):
    def __init__(self):
        super(FCN32, self).__init__()
        self.vgg_feature = models.vgg16(pretrained=True).features

        self.vgg_fc = nn.Sequential(
            nn.Conv2d(512, 4096, 1),
            nn.ReLU(),
            nn.Conv2d(4096, 4096, 1),
            nn.ReLU(),
            nn.Conv2d(4096, 7, 1),
            nn.ReLU(),
            nn.ConvTranspose2d(7, 7, 32, 32),
        )

    def forward(self, x):
        x = self.vgg_feature(x)
        x = self.vgg_fc(x)
        return x


model = FCN32().to(device)  # Remember to move the model to "device"


In [7]:
#summary(model, (3, 512, 512))

In [8]:
import matplotlib.pyplot as plt
train_acc_his, train_loss_his = [], []
val_acc_his, val_loss_his = [], []

In [9]:
def train(model, epoch, log_interval=200):
    optimizer = optim.AdamW(model.parameters(), lr=1e-5)
    scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)
    criterion = nn.CrossEntropyLoss()
    model.train()  # Important: set training mode
    for ep in range(epoch):
        iteration = 0
        correct = 0
        total_train = 0
        for batch_idx, (data, target) in enumerate(tqdm(trainset_loader)):

            loss = 0.0
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()

            output = model(data)

            output = torch.nn.functional.log_softmax(output, dim=1)
            target = torch.tensor(target, dtype=torch.long, device=device)
            loss = criterion(output, target)

            loss.backward()
            optimizer.step()



            # get the index of the max log-probability

            total_train += target.nelement()

            pred = output.max(1, keepdim=True)[1]
            pred = pred.eq(target.view_as(pred))
            # print(pred)
            correct += pred.eq(target.view_as(pred)).sum().item()
            iteration+=1



            if (iteration % log_interval == 0):
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}, Accuracy: {}/{} ({:.0f}%)'.format(
                    ep+1, batch_idx * len(data), len(trainset_loader.dataset),
                    100. * batch_idx / len(trainset_loader), loss.item(),
                    correct, total_train,
                    100. * correct / total_train))
                

        train_acc_his.append(100. * correct / len(trainset_loader.dataset))
        train_loss_his.append(loss.item())
        val(model)  # Evaluate at the end of each epoch
        scheduler.step()

    # summarize history for accuracy
    plt.plot(train_acc_his)
    plt.plot(val_acc_his)
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()

    # summarize history for loss plt.plot(history.history['loss']) plt.plot(history.history['val_loss']) plt.title('model loss')
    plt.plot(train_loss_his)
    plt.plot(val_loss_his)
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()


In [21]:
def val(model):
    criterion = nn.CrossEntropyLoss()
    model.eval()  # Important: set evaluation mode
    val_loss = 0
    correct = 0
    total_val = 0
    outs = np.empty((0,512,512))
    tars = []

    with torch.no_grad():  # This will free the GPU memory used for back-prop
        for data, target in valset_loader:
            
            data, target = data.to(device), target.to(device)
            output = model(data)
            tars.append(target)

            output = torch.nn.functional.log_softmax(output, dim=1)
            # print(output.shape, target.shape)
            target = torch.tensor(target, dtype=torch.long, device=device)
            # target.clone().detach()
            val_loss += criterion(output, target)
            total_val += target.nelement()
            # get the index of the max log-probability
            pred = output.max(1, keepdim=True)[1]
            correct += pred.eq(target.view_as(pred)).sum().item()

            pred = output.max(1, keepdim=False)[1].cpu().numpy()
            labels = target.cpu().numpy()
            # print(pred.shape)
            # print(labels.shape)
            mean_iou_evaluate.mean_iou_score(pred,labels)
            # outs.append(output.max(1)[1].numpy())
            
    for i in range(len(outs)):
        pass

    val_loss /= len(valset_loader.dataset)
    print('\nVal set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        val_loss, correct, total_val,
        100. * correct / total_val))

    val_acc_his.append(100. * correct / len(valset_loader.dataset))
    val_loss_his.append(val_loss)


In [22]:
# train(model, epoch=10)
val(model)


  target = torch.tensor(target, dtype=torch.long, device=device)
  iou = tp / (tp_fp + tp_fn - tp)


class #0 : 0.00000
class #1 : 0.00000
class #2 : 0.00000
class #3 : nan
class #4 : 0.00000
class #5 : 0.50351

mean_iou: nan

class #0 : 0.00877
class #1 : 0.00000
class #2 : 0.00000
class #3 : nan
class #4 : 0.00000
class #5 : 0.00000

mean_iou: nan

class #0 : 0.00000
class #1 : 0.00000
class #2 : 0.00000
class #3 : nan
class #4 : 0.00000
class #5 : 0.00000

mean_iou: nan

class #0 : 0.01014
class #1 : 0.00000
class #2 : 0.00000
class #3 : nan
class #4 : 0.00000
class #5 : 0.00664

mean_iou: nan

class #0 : 0.00818
class #1 : 0.00000
class #2 : 0.00000
class #3 : nan
class #4 : 0.00000
class #5 : 0.00000

mean_iou: nan

class #0 : 0.02088
class #1 : 0.00000
class #2 : 0.00000
class #3 : 0.00000
class #4 : 0.00008
class #5 : 0.00066

mean_iou: 0.003602

class #0 : 0.00139
class #1 : 0.00000
class #2 : 0.00001
class #3 : nan
class #4 : 0.00001
class #5 : 0.13579

mean_iou: nan

class #0 : 0.02748
class #1 : 0.00000
class #2 : 0.00000
class #3 : nan
class #4 : 0.00031
class #5 : 0.00000

KeyboardInterrupt: 

In [36]:
# test code for Mean iou
# a = torch.randn(1, 7, 3, 3)
# # print(a)
# print(a)
# a = torch.torch.nn.functional.log_softmax(a, dim=1)
# print(a.shape)
# print(a)
# a = a.numpy()
# print(a.shape)
b = torch.randn(7,512,512).numpy()
b= np.concatenate(b,b)
b.shape
# b = torch.argmax(b,dim = 0)
# b = b.numpy()
# mean_iou_evaluate.mean_iou_score(a,b)


(7, 512, 512)


TypeError: only integer scalar arrays can be converted to a scalar index

https://zhuanlan.zhihu.com/p/32506912

In [28]:
outs = np.empty((0,512,512))
outs.shape
print(outs)

[]


https://medium.com/ching-i/fully-convolutional-networks-%E8%AB%96%E6%96%87%E9%96%B1%E8%AE%80-246aa68ce4ad

https://github.com/JustinHeaton/fully-convolutional-networks/blob/master/FCN32.ipynb

https://www.malaoshi.top/show_1EF53OZZLiWQ.html

In [None]:
import torch
a = torch.randn(30,7, 512, 512)
# c = a.max(1, keepdim=True)[1]
c = a.max(1)[1].numpy()
b = torch.argmax(a, dim=1).numpy()
# print(c)
print(c.shape)
# print(b.shape)
mean_iou_evaluate.mean_iou_score(c,c)

# print(np.concatenate((b, b), axis=0).shape)



In [None]:
print(a)
print(b)


In [None]:
a = 