In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
import glob
import os
import numpy as np
from PIL import Image
import torchvision.models as models
import mean_iou_evaluate
import viz_mask
import imageio
from tqdm import tqdm


In [2]:
class HW2IMGS(Dataset):
    def __init__(self, root, transform=None):
        " initial the dataset "
        self.X_image = None
        self.y_label = None
        self.X_filenames = []
        self.y_filenames = []
        self.root = root
        self.transform = transform

        # read filenames
        X_filenames = glob.glob(root+'/*.jpg')
        y_filenames = glob.glob(root+'/*.png')
        for i in range(len(X_filenames)):
            self.X_filenames.append(os.path.splitext(
                os.path.basename(X_filenames[i]))[0])
            self.y_filenames.append(os.path.splitext(
                os.path.basename(y_filenames[i]))[0])

        self.len = len(self.X_filenames)

    def __getitem__(self, index):
        X_filename, y_filename = self.X_filenames[index], self.y_filenames[index]

        X_image = Image.open(self.root+X_filename+'.jpg')
        X_shape = imageio.imread(self.root+X_filename+'.jpg').shape

        y_image = imageio.imread(self.root+y_filename+'.png')
        y_label = viz_mask.read_masks(y_image, X_shape)

        if self.transform is not None:
            X_image = self.transform(X_image)

        " get a sample from the dataset "
        # if torch.cuda.is_available():
        #   X_image, y_label = X_image.cuda(), y_label.cuda()

        return X_image, y_label

    def __len__(self):
        " Total number of sampler in the dataset "
        return self.len


In [3]:
from torchvision import transforms
trainset = HW2IMGS(root='p2_data/train/', transform=transforms.Compose([
    # transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
]))

valset = HW2IMGS(root='p2_data/validation/', transform=transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
]))
print(len(trainset))
print(len(valset))


2000
257


In [4]:
trainset_loader = DataLoader(trainset, batch_size=1, shuffle=True)
valset_loader = DataLoader(valset, batch_size=1, shuffle=False)


In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)


cuda


In [6]:
from torchsummary import summary
# summary(vgg, (3, 512, 512))


class FCN32(nn.Module):
    def __init__(self):
        super(FCN32, self).__init__()
        self.vgg_feature = models.vgg16(pretrained=True).features

        self.vgg_fc = nn.Sequential(
            nn.Conv2d(512, 4096, 1),
            nn.ReLU(),
            nn.Conv2d(4096, 4096, 1),
            nn.ReLU(),
            nn.Conv2d(4096, 7, 1),
            nn.ReLU(),
            nn.ConvTranspose2d(7, 7, 32, 32),
        )

    def forward(self, x):
        x = self.vgg_feature(x)
        x = self.vgg_fc(x)
        return x


model = FCN32().to(device)  # Remember to move the model to "device"
print(model)


FCN32(
  (vgg_feature): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilat

In [7]:
summary(model, (3, 512, 512))


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 512, 512]           1,792
              ReLU-2         [-1, 64, 512, 512]               0
            Conv2d-3         [-1, 64, 512, 512]          36,928
              ReLU-4         [-1, 64, 512, 512]               0
         MaxPool2d-5         [-1, 64, 256, 256]               0
            Conv2d-6        [-1, 128, 256, 256]          73,856
              ReLU-7        [-1, 128, 256, 256]               0
            Conv2d-8        [-1, 128, 256, 256]         147,584
              ReLU-9        [-1, 128, 256, 256]               0
        MaxPool2d-10        [-1, 128, 128, 128]               0
           Conv2d-11        [-1, 256, 128, 128]         295,168
             ReLU-12        [-1, 256, 128, 128]               0
           Conv2d-13        [-1, 256, 128, 128]         590,080
             ReLU-14        [-1, 256, 1

In [8]:
import matplotlib.pyplot as plt

train_acc_his, train_loss_his = [], []
val_acc_his, val_loss_his = [], []


In [9]:
def train(model, epoch, log_interval=100):
    optimizer = optim.AdamW(model.parameters(), lr=1e-5)
    scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)
    criterion = nn.CrossEntropyLoss()
    model.train()  # Important: set training mode
    for ep in range(epoch):
        iteration = 0
        correct = 0
        total_train = 0
        for batch_idx, (data, target) in enumerate(tqdm(trainset_loader)):

            loss = 0.0
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()

            output = model(data)
            # print(output.shape)

            output = torch.nn.functional.log_softmax(output, dim=1)
            target = torch.tensor(target, dtype=torch.long, device=device)
            loss = criterion(output, target)
            # print(mean_iou_evaluate.mean_iou_score(output,target))
    #         # print(loss)
            loss.backward()

            optimizer.step()

            # get the index of the max log-probability

            total_train += target.nelement()

            pred = output.max(1, keepdim=True)[1]
            pred = pred.eq(target.view_as(pred))
            # print(pred)
            correct += pred.eq(target.view_as(pred)).sum().item()
            iteration+=1

            if (iteration % log_interval == 0):
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}, Accuracy: {}/{} ({:.0f}%)'.format(
                    ep+1, batch_idx * len(data), len(trainset_loader.dataset),
                    100. * batch_idx / len(trainset_loader), loss.item(),
                    correct, total_train,
                    100. * correct / total_train))
                

        train_acc_his.append(100. * correct / len(trainset_loader.dataset))
        train_loss_his.append(loss.item())
        val(model)  # Evaluate at the end of each epoch
        scheduler.step()

    # summarize history for accuracy
    plt.plot(train_acc_his)
    plt.plot(val_acc_his)
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()

    # summarize history for loss plt.plot(history.history['loss']) plt.plot(history.history['val_loss']) plt.title('model loss')
    plt.plot(train_loss_his)
    plt.plot(val_loss_his)
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()


In [10]:
def val(model):
    criterion = nn.CrossEntropyLoss()
    model.eval()  # Important: set evaluation mode
    val_loss = 0
    correct = 0
    total_val = 0
    with torch.no_grad():  # This will free the GPU memory used for back-prop
        for data, target in valset_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)

            output = torch.nn.functional.log_softmax(output, dim=1)
            # print(output.shape, target.shape)
            target = torch.tensor(target, dtype=torch.long, device=device)
            # target.clone().detach()
            val_loss += criterion(output, target)
            total_val += target.nelement()
            # get the index of the max log-probability
            pred = output.max(1, keepdim=True)[1]
            correct += pred.eq(target.view_as(pred)).sum().item()

    val_loss /= len(valset_loader.dataset)
    print('\nVal set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        val_loss, correct, total_val,
        100. * correct / total_val))

    val_acc_his.append(100. * correct / len(valset_loader.dataset))
    val_loss_his.append(val_loss)


In [11]:
train(model, epoch=10)


  target = torch.tensor(target, dtype=torch.long, device=device)
  5%|▌         | 100/2000 [00:29<09:24,  3.37it/s]



 10%|█         | 200/2000 [00:59<09:00,  3.33it/s]



 15%|█▌        | 300/2000 [01:30<08:45,  3.23it/s]



 20%|██        | 400/2000 [02:01<08:20,  3.20it/s]



 25%|██▌       | 500/2000 [02:33<07:54,  3.16it/s]



 30%|███       | 600/2000 [03:04<07:24,  3.15it/s]



 35%|███▌      | 700/2000 [03:36<06:52,  3.15it/s]



 40%|████      | 800/2000 [04:08<06:20,  3.15it/s]



 45%|████▌     | 900/2000 [04:39<05:47,  3.16it/s]



 50%|█████     | 1000/2000 [05:11<05:16,  3.15it/s]



 55%|█████▌    | 1100/2000 [05:43<04:45,  3.15it/s]



 60%|██████    | 1200/2000 [06:14<04:13,  3.16it/s]



 65%|██████▌   | 1300/2000 [06:46<03:41,  3.16it/s]



 70%|███████   | 1400/2000 [07:18<03:10,  3.15it/s]



 75%|███████▌  | 1500/2000 [07:49<02:37,  3.17it/s]



 80%|████████  | 1600/2000 [08:21<02:06,  3.17it/s]



 85%|████████▌ | 1700/2000 [08:53<01:34,  3.16it/s]



 90%|█████████ | 1800/2000 [09:24<01:03,  3.16it/s]



 95%|█████████▌| 1900/2000 [09:56<00:31,  3.15it/s]



100%|██████████| 2000/2000 [10:27<00:00,  3.19it/s]




  target = torch.tensor(target, dtype=torch.long, device=device)
  0%|          | 0/2000 [00:00<?, ?it/s]


Val set: Average loss: 0.9776, Accuracy: 42040401/67371008 (62%)



  5%|▌         | 100/2000 [00:31<09:45,  3.25it/s]



 10%|█         | 200/2000 [01:02<09:14,  3.24it/s]



 15%|█▌        | 300/2000 [01:33<08:48,  3.22it/s]



 20%|██        | 400/2000 [02:05<09:59,  2.67it/s]



 25%|██▌       | 500/2000 [02:36<07:44,  3.23it/s]



 30%|███       | 600/2000 [03:07<07:29,  3.12it/s]



 35%|███▌      | 700/2000 [03:38<06:47,  3.19it/s]



 40%|████      | 800/2000 [04:10<06:17,  3.18it/s]



 45%|████▌     | 900/2000 [04:41<05:38,  3.25it/s]



 50%|█████     | 1000/2000 [05:12<05:09,  3.23it/s]



 55%|█████▌    | 1100/2000 [05:43<04:41,  3.20it/s]



 60%|██████    | 1200/2000 [06:15<04:11,  3.18it/s]



 65%|██████▌   | 1300/2000 [06:46<03:37,  3.21it/s]



 70%|███████   | 1400/2000 [07:17<03:07,  3.20it/s]



 75%|███████▌  | 1500/2000 [07:48<02:38,  3.16it/s]



 80%|████████  | 1600/2000 [08:19<02:04,  3.22it/s]



 85%|████████▌ | 1700/2000 [08:50<01:34,  3.19it/s]



 90%|████████▉ | 1799/2000 [09:21<01:01,  3.26it/s]

In [None]:
# test code for Mean iou
a = torch.randn(1, 7, 3, 3)
# print(a)
print(a)
a = torch.torch.nn.functional.log_softmax(a, dim=1)
print(a.shape)
print(a)
a = a.numpy()
# print(a.shape)
# b = torch.randn(7,512,512)
# b = torch.argmax(b,dim = 0)
# b = b.numpy()
# mean_iou_evaluate.mean_iou_score(a,b)


tensor([[[[ 1.3770e-01, -4.3821e-01,  2.1104e-01],
          [ 6.3527e-01, -4.1212e-01,  4.8293e-01],
          [ 1.3069e+00,  1.1946e+00,  1.0628e-01]],

         [[ 2.9848e-01, -9.3795e-01, -2.0160e+00],
          [-2.3025e-01,  1.0400e+00, -9.9195e-02],
          [-9.2293e-01,  7.3687e-02,  1.2225e+00]],

         [[ 1.0634e+00, -5.5362e-03, -8.3723e-01],
          [-4.3611e-01,  9.8528e-02, -9.3485e-01],
          [ 1.0458e-03,  4.4920e-01, -1.5266e+00]],

         [[-1.1031e+00,  2.3266e-01,  2.2437e+00],
          [-5.2398e-01, -1.2317e+00,  1.5176e+00],
          [ 3.6768e-03,  1.6619e+00, -2.6280e-01]],

         [[-1.2646e+00, -4.5298e-01,  8.7688e-02],
          [-1.1462e-01,  1.1270e-01, -1.3377e+00],
          [ 2.1916e+00,  4.2946e-01,  5.7133e-01]],

         [[-4.7802e-01, -1.6848e+00,  2.5766e-01],
          [ 4.0986e-01, -7.2000e-01,  5.5814e-01],
          [-1.5496e+00, -4.0597e-01, -6.4538e-01]],

         [[ 8.2795e-02,  8.6383e-01, -6.0936e-01],
          [ 3.3994e

https://zhuanlan.zhihu.com/p/32506912

In [None]:
a.shape


(512, 512)

https://medium.com/ching-i/fully-convolutional-networks-%E8%AB%96%E6%96%87%E9%96%B1%E8%AE%80-246aa68ce4ad

In [None]:

# class VGG16FCN32(nn.Module):
#     def __init__(self, num_classes):
#         super().__init__()

#         self.feats = models.vgg16(pretrained=True).features
#         self.fconn = nn.Sequential(
#             nn.Conv2d(512, 4096, 7),
#             nn.ReLU(inplace=True),
#             nn.Dropout(),
#             nn.Conv2d(4096, 4096, 1),
#             nn.ReLU(inplace=True),
#             nn.Dropout(),
#             )
#         self.score = nn.Conv2d(4096, num_classes, 1)

#     def forward(self, x):
#         feats = self.feats(x)
#         fconn = self.fconn(feats)
#         score = self.score(fconn)

#         return F.upsample_bilinear(score, x.size()[2:])


#     class UNetEnc(nn.Module):

#         def __init__(self, in_channels, features, out_channels):
#             super().__init__()

#             self.up = nn.Sequential(
#                 nn.Conv2d(in_channels, features, 3),
#                 nn.ReLU(inplace=True),
#                 nn.Conv2d(features, features, 3),
#                 nn.ReLU(inplace=True),
#                 nn.ConvTranspose2d(features, out_channels, 2, stride=2),
#                 nn.ReLU(inplace=True),
#                 )

#     def forward(self, x):
#         return self.up(x)


https://github.com/JustinHeaton/fully-convolutional-networks/blob/master/FCN32.ipynb

In [None]:

# viz_mask.py
'''

img = imageio.imread('.\p2_data\\train\\0000_mask.png')
seg = imageio.imread('.\p2_data\\train\\0000_sat.jpg')
masks=viz_mask.read_masks(seg, img.shape)

print(masks)
cs = np.unique(masks)
print(cs)
cmap = viz_mask.cls_color
for c in cs:
    mask = np.zeros((img.shape[0], img.shape[1]))
    ind = np.where(masks==c)
    mask[ind[0], ind[1]] = 1
    img = viz_mask.viz_data(img, mask, color=cmap[c])
    # print(img[0].shape)
    # imageio.imsave('./exp.png', np.uint8(img))

'''


https://www.malaoshi.top/show_1EF53OZZLiWQ.html

In [None]:
import torch
a = torch.randn(7, 512, 512)
b = torch.argmax(a, dim=0)


In [None]:
print(a)
print(b)
