In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
import glob
import os
import numpy as np
from PIL import Image
import torchvision.models as models
import mean_iou_evaluate
import viz_mask
import imageio
from tqdm import tqdm
from torchsummary import summary


In [None]:
class HW2IMGS(Dataset):
    def __init__(self, root, transform=None):
        " initial the dataset "
        self.X_image = None
        self.y_label = None
        self.X_filenames = []
        self.y_filenames = []
        self.root = root
        self.transform = transform

        # read filenames
        X_filenames = glob.glob(root+'/*.jpg')
        y_filenames = glob.glob(root+'/*.png')
        for i in range(len(X_filenames)):
            self.X_filenames.append(os.path.splitext(os.path.basename(X_filenames[i]))[0])
            self.y_filenames.append(os.path.splitext(os.path.basename(y_filenames[i]))[0])

        self.len = len(self.X_filenames)

    def __getitem__(self, index):
        X_filename, y_filename = self.X_filenames[index], self.y_filenames[index]

        X_image = Image.open(self.root+X_filename+'.jpg')
        X_shape = imageio.imread(self.root+X_filename+'.jpg').shape

        y_image = imageio.imread(self.root+y_filename+'.png')
        y_label = viz_mask.read_masks(y_image, X_shape)

        if self.transform is not None:
            X_image = self.transform(X_image)
        return X_image, y_label

    def __len__(self):
        " Total number of sampler in the dataset "
        return self.len


In [None]:
from torchvision import transforms
trainset = HW2IMGS(root='p2_data/train/', transform=transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
]))

valset = HW2IMGS(root='p2_data/validation/', transform=transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
]))
print(len(trainset))
print(len(valset))


2000
257


In [None]:
trainset_loader = DataLoader(trainset, batch_size=4, shuffle=True)
valset_loader = DataLoader(valset, batch_size=4, shuffle=False)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)


cuda


In [None]:
# from torchsummary import summary
# # summary(vgg, (3, 512, 512))

# class FCN32(nn.Module):
#     def __init__(self):
#         super(FCN32, self).__init__()
#         self.vgg_feature = models.vgg16(pretrained=True).features

#         self.vgg_fc = nn.Sequential(
#             nn.Conv2d(512, 4096, 1),
#             nn.ReLU(),
#             nn.Conv2d(4096, 4096, 1),
#             nn.ReLU(),
#             nn.Conv2d(4096, 7, 1),
#             nn.ReLU(),
#             nn.ConvTranspose2d(7, 7, 32, 32),
#         )

#     def forward(self, x):
#         x = self.vgg_feature(x)
#         x = self.vgg_fc(x)
#         return x


# model = FCN32().to(device)  # Remember to move the model to "device"
# #summary(model, (3, 512, 512))

In [7]:
class FCN8(nn.Module):
    def __init__(self):
        super(FCN8, self).__init__()
        self.vgg_feature = models.vgg16(pretrained=True).features

        self.p0_p3 = self.vgg_feature[:17]
        self.p3_p4 = self.vgg_feature[17:24]
        self.p4_p5 = self.vgg_feature[24:31]
        
        #upsample 2x
        self.up2x = nn.Upsample(scale_factor= 2 , mode='bilinear', align_corners=True)
        #upsample 8x
        self.ct8x = nn.Sequential(
            nn.ConvTranspose2d(256,7,8,8),
            nn.ReLU()
        )
        #shape channel from 512 to 256
        self.ct = nn.Sequential(
            nn.ConvTranspose2d(512,256,2,stride=2),
            nn.ReLU()
        )

    def forward(self, x):
        p3 = self.p0_p3(x)
        p4 = self.p3_p4(p3)
        p5 = self.p4_p5(p4)

        p4p5 = self.up2x(p5) + p4
        p4p5 = self.ct(p4p5)
        p3p4p5 = p4p5+p3

        x = self.ct8x(p3p4p5)
        return x


model = FCN8().to(device)  # Remember to move the model to "device"
summary(model, (3, 512, 512))
print(model)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 512, 512]           1,792
            Conv2d-2         [-1, 64, 512, 512]           1,792
              ReLU-3         [-1, 64, 512, 512]               0
              ReLU-4         [-1, 64, 512, 512]               0
            Conv2d-5         [-1, 64, 512, 512]          36,928
            Conv2d-6         [-1, 64, 512, 512]          36,928
              ReLU-7         [-1, 64, 512, 512]               0
              ReLU-8         [-1, 64, 512, 512]               0
         MaxPool2d-9         [-1, 64, 256, 256]               0
        MaxPool2d-10         [-1, 64, 256, 256]               0
           Conv2d-11        [-1, 128, 256, 256]          73,856
           Conv2d-12        [-1, 128, 256, 256]          73,856
             ReLU-13        [-1, 128, 256, 256]               0
             ReLU-14        [-1, 128, 2

In [8]:
# a = torch.randn((1,512,16,16))  #P5
# b = torch.randn((1,512,32,32))  #P4
# c = torch.randn((1,256,64,64))  #P3

# up2times = nn.Upsample(scale_factor= 2 , mode='bilinear', align_corners=True)
# cc = nn.ConvTranspose2d(512,256,2,stride=2)
# convtranspose8x = nn.ConvTranspose2d(256,7,8,8)


# ab =up2times(a)+b
# # print(ab.shape)
# ab = cc(ab)
# abc = ab+c
# # print(abc.shape)
# abc = convtranspose8x(abc)
# # print(abc.shape)


In [9]:
# a = models.vgg16(pretrained=True).features
# b = a[:17]
# c = a[17:24]
# d = a[24:31]

# print(b)
# print(c)
# print(d)



In [10]:

# summary(model, (3,512,512))

In [11]:
import matplotlib.pyplot as plt
train_acc_his, train_loss_his = [], []
val_acc_his, val_loss_his = [], []

In [12]:
def train(model, epoch, log_interval=200):
    optimizer = optim.AdamW(model.parameters(), lr=1e-4)
    scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)
    criterion = nn.CrossEntropyLoss()
    model.train()  # Important: set training mode

    for ep in range(epoch):
        outs = np.empty((0,512,512))
        tars = np.empty((0,512,512))
        iteration = 0
        correct = 0
        total_train = 0
        for batch_idx, (data, target) in enumerate(tqdm(trainset_loader)):

            loss = 0.0
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()

            output = model(data)

            output = torch.nn.functional.log_softmax(output, dim=1)
            target = torch.tensor(target, dtype=torch.long, device=device)
            loss = criterion(output, target)

            loss.backward()
            optimizer.step()



            # get the index of the max log-probability
            total_train += target.nelement()
            pred = output.max(1, keepdim=True)[1]
            pred = pred.eq(target.view_as(pred))



            # print(pred)
            correct += pred.eq(target.view_as(pred)).sum().item()
            iteration+=1
            

            #prepare data for mean-IOU
            pred = output.max(1, keepdim=False)[1].cpu().numpy()
            labels = target.cpu().numpy()
            outs = np.concatenate((outs,pred))
            tars = np.concatenate((tars,labels))


            if (iteration % log_interval == 0):
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}, Accuracy: {}/{} ({:.0f}%)'.format(
                    ep+1, batch_idx * len(data), len(trainset_loader.dataset),
                    100. * batch_idx / len(trainset_loader), loss.item(),
                    correct, total_train,
                    100. * correct / total_train))
                
        mean_iou_evaluate.mean_iou_score(outs,tars)
        train_acc_his.append(100. * correct / len(trainset_loader.dataset))
        train_loss_his.append(loss.item())
        val(model)  # Evaluate at the end of each epoch
        scheduler.step()

    # summarize history for accuracy
    plt.plot(train_acc_his)
    plt.plot(val_acc_his)
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()

    # summarize history for loss plt.plot(history.history['loss']) plt.plot(history.history['val_loss']) plt.title('model loss')
    plt.plot(train_loss_his)
    plt.plot(val_loss_his)
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()


In [13]:
def val(model):
    criterion = nn.CrossEntropyLoss()
    model.eval()  # Important: set evaluation mode
    val_loss = 0
    correct = 0
    total_val = 0
    outs = np.empty((0,512,512))
    tars = np.empty((0,512,512))

    with torch.no_grad():  # This will free the GPU memory used for back-prop
        for data, target in valset_loader:
            
            data, target = data.to(device), target.to(device)
            output = model(data)

            output = torch.nn.functional.log_softmax(output, dim=1)
            # print(output.shape, target.shape)
            target = torch.tensor(target, dtype=torch.long, device=device)
            # target.clone().detach()
            val_loss += criterion(output, target).item()
            total_val += target.nelement()
            # get the index of the max log-probability
            pred = output.max(1, keepdim=True)[1]
            correct += pred.eq(target.view_as(pred)).sum().item()

            pred = output.max(1, keepdim=False)[1].cpu().numpy()
            labels = target.cpu().numpy()

            outs = np.concatenate((outs,pred))
            tars = np.concatenate((tars,labels))
            
    miou = mean_iou_evaluate.mean_iou_score(outs,tars)
    if(miou>0.63):
        torch.save(model, ('./done.pth'),_use_new_zipfile_serialization=False)

    val_loss /= len(valset_loader.dataset)
    print('\nVal set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        val_loss, correct, total_val,
        100. * correct / total_val))

    val_acc_his.append(100. * correct / len(valset_loader.dataset))
    val_loss_his.append(val_loss)


In [14]:
# def outputpred(model,filename):
#     X = Image.open(filename)
#     X = X.to(device)
#     masks = model(X)
#     print(masks)
#     # cs = np.unique(masks)

#     # for c in cs:
#     #     mask = np.zeros((img.shape[0], img.shape[1]))
#     #     ind = np.where(masks==c)
#     #     mask[ind[0], ind[1]] = 1
#     #     img = viz_mask.viz_data(img, mask, color=viz_mask.cmap[c])
#     #     imageio.imsave('./exp.png', np.uint8(img))


In [15]:
# outputpred(model,'p2_data\\validation\\0236_sat.jpg')

In [16]:
train(model, epoch=20)
# val(model)


  target = torch.tensor(target, dtype=torch.long, device=device)
 40%|████      | 200/500 [05:25<10:23,  2.08s/it]



 80%|████████  | 400/500 [13:57<05:00,  3.00s/it]



100%|██████████| 500/500 [19:26<00:00,  2.33s/it]


class #0 : 0.39494
class #1 : 0.67618
class #2 : 0.01168
class #3 : 0.34094
class #4 : 0.18117
class #5 : 0.05814

mean_iou: 0.277174



  target = torch.tensor(target, dtype=torch.long, device=device)


class #0 : 0.62341
class #1 : 0.75551
class #2 : 0.00727
class #3 : 0.43449
class #4 : 0.46274
class #5 : 0.26940

mean_iou: 0.425471


Val set: Average loss: 0.1912, Accuracy: 51543914/67371008 (77%)



 40%|████      | 200/500 [05:19<10:09,  2.03s/it]



 80%|████████  | 400/500 [13:46<05:01,  3.01s/it]



100%|██████████| 500/500 [19:10<00:00,  2.30s/it]


class #0 : 0.62911
class #1 : 0.78609
class #2 : 0.01236
class #3 : 0.62929
class #4 : 0.51653
class #5 : 0.36513

mean_iou: 0.489753

class #0 : 0.64527
class #1 : 0.83178
class #2 : 0.04407
class #3 : 0.69975
class #4 : 0.49315
class #5 : 0.52045

mean_iou: 0.539079


Val set: Average loss: 0.1398, Accuracy: 55594367/67371008 (83%)



 40%|████      | 200/500 [05:19<10:17,  2.06s/it]



 80%|████████  | 400/500 [13:41<04:53,  2.93s/it]



100%|██████████| 500/500 [19:02<00:00,  2.28s/it]


class #0 : 0.66193
class #1 : 0.81390
class #2 : 0.04147
class #3 : 0.67437
class #4 : 0.54643
class #5 : 0.45081

mean_iou: 0.531484

class #0 : 0.66189
class #1 : 0.84214
class #2 : 0.08092
class #3 : 0.69907
class #4 : 0.54025
class #5 : 0.53970

mean_iou: 0.560662


Val set: Average loss: 0.1303, Accuracy: 55945525/67371008 (83%)



 40%|████      | 200/500 [05:20<10:10,  2.03s/it]



 52%|█████▏    | 261/500 [07:33<09:15,  2.32s/it]

In [None]:
# test code for Mean iou
# a = torch.randn(1, 7, 3, 3)
# # print(a)
# print(a)
# a = torch.torch.nn.functional.log_softmax(a, dim=1)
# print(a.shape)
# print(a)
# a = a.numpy()
# print(a.shape)
outs = np.empty((0,512,512))
b = torch.randn(7,512,512).numpy()
b= np.concatenate((outs,b))
b.shape
# b = torch.argmax(b,dim = 0)
# b = b.numpy()
# mean_iou_evaluate.mean_iou_score(a,b)


(7, 512, 512)

https://zhuanlan.zhihu.com/p/32506912

In [None]:
outs = np.empty((0,512,512))
outs.shape
print(outs)

[]


https://medium.com/ching-i/fully-convolutional-networks-%E8%AB%96%E6%96%87%E9%96%B1%E8%AE%80-246aa68ce4ad

https://github.com/JustinHeaton/fully-convolutional-networks/blob/master/FCN32.ipynb

https://www.malaoshi.top/show_1EF53OZZLiWQ.html

In [None]:
import torch
a = torch.randn(30,7, 512, 512)
# c = a.max(1, keepdim=True)[1]
# c = a.max(1)[1].numpy()
b = torch.argmax(a, dim=1).numpy()
# print(c)
# print(c.shape)
# print(b.shape)
# mean_iou_evaluate.mean_iou_score(c,c)

print(np.concatenate((b, b), axis=0).shape)



(60, 512, 512)


In [None]:
class FCN8(nn.Module):

    def __init__(self, num_classes):
        super().__init__()

        # feats = list(models.vgg16(pretrained=True).features.children())

        # self.feats = nn.Sequential(*feats[0:9])
        # self.feat3 = nn.Sequential(*feats[10:16])
        # self.feat4 = nn.Sequential(*feats[17:23])
        # self.feat5 = nn.Sequential(*feats[24:30])

        # for m in self.modules():
        #     if isinstance(m, nn.Conv2d):
        #         m.requires_grad = False

        self.fconn = nn.Sequential(
            nn.Conv2d(512, 4096, 7),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Conv2d(4096, 4096, 1),
            nn.ReLU(inplace=True),
            nn.Dropout(),
        )
        self.score_feat3 = nn.Conv2d(256, num_classes, 1)
        self.score_feat4 = nn.Conv2d(512, num_classes, 1)
        self.score_fconn = nn.Conv2d(4096, num_classes, 1)

    def forward(self, x):
        feats = self.feats(x)
        feat3 = self.feat3(feats)
        feat4 = self.feat4(feat3)
        feat5 = self.feat5(feat4)
        fconn = self.fconn(feat5)

        score_feat3 = self.score_feat3(feat3)
        score_feat4 = self.score_feat4(feat4)
        score_fconn = self.score_fconn(fconn)

        score = F.upsample_bilinear(score_fconn, score_feat4.size()[2:])
        score += score_feat4
        score = F.upsample_bilinear(score, score_feat3.size()[2:])
        score += score_feat3

        return F.upsample_bilinear(score, x.size()[2:])
model = FCN8(7).to(device,)

In [None]:
class UNET(nn.Module):
    def __init__(self):
        super(UNET, self).__init__()

        self.vgg_feature = models.vgg16(pretrained=True).features[:24]
        # print(self.vgg_feature)
        self.vgg_u1 = nn.Sequential(
            nn.Conv2d(512, 1024, 3, padding=(1, 1)),
            nn.ReLU(),
            nn.Conv2d(1024, 1024, 3, padding=(1, 1)),
            nn.ReLU(),
            nn.ConvTranspose2d(1024, 512, 2, 2),
        )
        # print(self.vgg_u1)
        self.vgg_u2 = nn.Sequential(
            nn.Conv2d(1024, 512, 3, padding=(1, 1)),
            nn.ReLU(),
            nn.Conv2d(512, 512, 3, padding=(1, 1)),
            nn.ReLU(),
            nn.ConvTranspose2d(512, 256,2,2),
        )
        # print(self.vgg_u2)
        self.vgg_u3 = nn.Sequential(
            nn.Conv2d(512, 256, 3, padding=(1, 1)),
            nn.ReLU(),
            nn.Conv2d(256, 256, 3, padding=(1, 1)),
            nn.ReLU(),
            nn.ConvTranspose2d(256, 128,2,2),
        )
        # print(self.vgg_u3)
        self.vgg_u4 = nn.Sequential(
            nn.Conv2d(256, 128, 3, padding=(1, 1)),
            nn.ReLU(),
            nn.Conv2d(128, 128, 3, padding=(1, 1)),
            nn.ReLU(),
            nn.ConvTranspose2d(128, 64,2,2),
        )
        # print(self.vgg_u4)
        self.vgg_u5 = nn.Sequential(
            nn.Conv2d(128, 64, 3, padding=(1, 1)),
            nn.ReLU(),
            nn.Conv2d(64, 64, 3, padding=(1, 1)),
            nn.ReLU(),
            nn.Conv2d(64, 7, 1),
        )
        # print(self.vgg_u5)

    def forward(self, x):
        
        f1 = self.vgg_feature[:3]
        f2 = self.vgg_feature[4:8]
        f3 = self.vgg_feature[9:15]
        f4 = self.vgg_feature[16:22]
        
        out1 = self.vgg_feature(x)
        out2 = self.vgg_feature(f1)
        out3 = self.vgg_feature(f2)
        out4 = self.vgg_feature(f3)
        x = self.vgg_feature(f4)

        x = self.vgg_u1(x)
        x = torch.cat((out4,x),dim=1)
        x = self.vgg_u2(x)
        x = torch.cat((out3,x),dim=1)
        x = self.vgg_u3(x)
        x = torch.cat((out2,x),dim=1)
        x = self.vgg_u4(x)
        x = torch.cat((out1,x),dim=1)
        x = self.vgg_u5(x)

        return x


model = UNET().to(device) 
    