<a href="https://colab.research.google.com/github/kmc3661/Basic-machine-Learning/blob/master/U_Net.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#!pwd

In [None]:
#%cd content/drive/MyDrive

In [None]:
#cd ..

In [None]:
#!ls

In [None]:
# !unzip -qq "/content/drive/My Drive/VOCdevkit.zip"

In [None]:
#from google.colab import drive
#drive.mount('/content/drive')

U-Net Network

In [None]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init as init
from torch.utils.data import DataLoader
from torchvision import transforms, datasets

#Hyper parameters
lr= 1e-3
batch_size = 4
num_epoch = 40

log_dir= '/content/drive/MyDrive/tensorboard/UNet/log'
ckpt_dir = '/content/drive/MyDrive/tensorboard/UNet/checkpoint'
result_dir = '/content/drive/MyDrive/tensorboard/UNet/results'

if not os.path.exists(result_dir):
    os.makedirs(os.path.join(result_dir, 'png'))
    os.makedirs(os.path.join(result_dir, 'numpy'))

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#device = torch.device('cpu')

class UNet(nn.Module):
    def __init__(self):
        super(UNet, self).__init__()

        def CBR2d(in_channels, out_channels, kernel_size=3, stride=1, padding=0, bias=True):
            layers = []
            layers += [nn.Conv2d(in_channels=in_channels, out_channels=out_channels,
                                 kernel_size=kernel_size, stride=stride, padding=padding,
                                 bias=bias)]
            layers += [nn.BatchNorm2d(num_features=out_channels)]
            layers += [nn.ReLU()]

            cbr = nn.Sequential(*layers) # 차원 한개를 없앰

            return cbr

        # Contracting path
        self.cp1_1 = CBR2d(3,64)
        self.cp1_2 = CBR2d(64,64)
        self.pool1 = nn.MaxPool2d(2)

        self.cp2_1 = CBR2d(64, 128)
        self.cp2_2 = CBR2d(128, 128)
        self.pool2 = nn.MaxPool2d(2)

        self.cp3_1 = CBR2d(128, 256)
        self.cp3_2 = CBR2d(256, 256)
        self.pool3 = nn.MaxPool2d(2)

        self.cp4_1 = CBR2d(256, 512)
        self.cp4_2 = CBR2d(512, 512)
        self.pool4 = nn.MaxPool2d(2)
        
        self.cp5_1 = CBR2d(512, 1024)

        #Expanding path

        self.ep5_1 = CBR2d(1024,512)
        self.unpool4 = nn.ConvTranspose2d(512, 512,kernel_size=2,
                                          stride=2,padding=0, bias=True)
        
        self.ep4_2 = CBR2d(1024,512)
        self.ep4_1 = CBR2d(512,256) # 문제가 되는 지점( 256채널을 받아야 하는데 512개가 들어왔다는 에러메시지)
        self.unpool3 = nn.ConvTranspose2d(256,256,kernel_size=2,
                                          stride=2,padding=0,bias=True)
        
        self.ep3_2 = CBR2d(512,256)
        self.ep3_1 = CBR2d(256,128)
        self.unpool2 = nn.ConvTranspose2d(128,128, kernel_size=2,
                                          stride=2, padding=0,bias=True)
        
        self.ep2_2 = CBR2d(256,128)
        self.ep2_1 = CBR2d(128,64)
        self.unpool1 = nn.ConvTranspose2d(64, 64, kernel_size=2,
                                          stride=2, padding=0, bias=True)
        self.ep1_2 = CBR2d(128,64)
        self.ep1_1 = CBR2d(64,64)
        self.final_conv= nn.Conv2d(64,22, kernel_size=1, stride=1,padding=0, bias=True)

    def forward(self,x):
        cp1_1 = self.cp1_1(x) # 4, 3, 572, 572 -> 4, 64, 570, 570
        cp1_2 = self.cp1_2(cp1_1) # 4,64, 568,568
        pool1 = self.pool1(cp1_2) # 4, 64, 284, 284

        cp2_1 = self.cp2_1(pool1)# 4, 128, 282, 282
        cp2_2 = self.cp2_2(cp2_1)
        pool2 = self.pool2(cp2_2)

        cp3_1 = self.cp3_1(pool2)
        cp3_2 = self.cp3_2(cp3_1)
        pool3 = self.pool3(cp3_2)# 4, 256, 136, 136

        cp4_1 = self.cp4_1(pool3)
        cp4_2 = self.cp4_2(cp4_1)
        pool4 = self.pool4(cp4_2)# 4, 512, 32, 32

        cp5_1 = self.cp5_1(pool4)# 4, 1024, 30, 30

        ep5_1 = self.ep5_1(cp5_1)# 4, 512, 28, 28
        unpool4 = self.unpool4(ep5_1)# 4, 512, 56, 56
        

        _,_,H,W = unpool4.shape # 56, 56

        add4 = torchvision.transforms.CenterCrop([H,W])(cp4_2)      
        cat4= torch.cat([unpool4, add4], dim=1) # 4, 1024, 56, 56       

        ep4_2 = self.ep4_2(cat4) # 4, 512, 54, 54
        ep4_1 = self.ep4_1(ep4_2)# 4, 256, 52, 52
        unpool3 = self.unpool3(ep4_1)

        _,_,H,W = unpool3.shape

        add3 = torchvision.transforms.CenterCrop([H,W])(cp3_2)   
        cat3= torch.cat([unpool3, add3], dim=1)


        ep3_2 = self.ep3_2(cat3)
        ep3_1 = self.ep3_1(ep3_2)
        unpool2 = self.unpool2(ep3_1)
            
        _,_,H,W = unpool2.shape


        add2 = torchvision.transforms.CenterCrop([H,W])(cp2_2)      
        cat2= torch.cat([unpool2, add2], dim=1)

        ep2_2 = self.ep2_2(cat2)
        ep2_1 = self.ep2_1(ep2_2)
        unpool1 = self.unpool1(ep2_1)
   
        _,_,H,W = unpool1.shape

        add1 = torchvision.transforms.CenterCrop([H,W])(cp1_2)      
        cat1= torch.cat([unpool1, add1], dim=1)

        ep1_2 = self.ep1_2(cat1)
        ep1_1 = self.ep1_1(ep1_2)
        
        out = self.final_conv(ep1_1)

        return out
        

Download and load Datasets

In [None]:
import torch
import torchvision
import numpy as np
import torchvision.transforms as transforms

img_size=512
transform_train = transforms.Compose([
                                      transforms.Resize((img_size,img_size)),
                                      transforms.RandomRotation(10),
                                      transforms.RandomHorizontalFlip(),
                                      transforms.PILToTensor(),                     
                                      ])

transform_target = transforms.Compose([
                                       transforms.Resize((img_size,img_size)),
                                       transforms.PILToTensor(),                                      
                                      ])

transform_test = transforms.Compose([
                                     transforms.Resize((img_size,img_size)),
                                     transforms.PILToTensor(),
                                     ])

train_data = torchvision.datasets.VOCSegmentation(root = '/content/drive/MyDrive/VOC_2011', year = '2011', image_set='train',
                                                  download= False, transform = transform_train, target_transform = transform_target)
val_data = torchvision.datasets.VOCSegmentation(root = '/content/drive/MyDrive/VOC_2011', year = '2011', image_set='val',
                                                  download= False, transform = transform_test, target_transform = transform_target)
test_data = torchvision.datasets.VOCSegmentation(root =  '/content/drive/MyDrive/VOC_2011', year = '2007', image_set='test',
                                                 download = False, transform = transform_test, target_transform = transform_target)

train_loader = torch.utils.data.DataLoader(train_data, batch_size=4,shuffle=True, num_workers=2)
val_loader = torch.utils.data.DataLoader(val_data, batch_size=4,shuffle=False, num_workers=2)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=4, shuffle=False, num_workers=2)


In [None]:

#train_data[image개수, [image or label, 0 or 1],[image면 3개, targe일 경우 1개],512,512]
#training image: [image개수,[0], 3, 512, 512]
#segmentation class: [image개수,[1], 1, 512, 512] -> 

Training

In [None]:
%load_ext tensorboard

In [None]:
tensorboard --logdir='/content/drive/MyDrive/tensorboard/UNet/log'

In [None]:
from torch.nn.modules.linear import Bilinear
import enum
from torch.optim.optimizer import Optimizer
import torch
from torch.utils.tensorboard import SummaryWriter
import numpy as np
import torch.backends.cudnn as cudnn
import torch.optim as optim


writer_train = SummaryWriter(log_dir=os.path.join(log_dir,'train'))
writer_val = SummaryWriter(log_dir=os.path.join(log_dir,'val'))

net= UNet()
net = net.to(device)
net = torch.nn.DataParallel(net)
cudnn.benchmark=True

lr=1e-3

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(),lr= lr)

fn_tonumpy = lambda x: x.to('cpu').detach().numpy().transpose(0,2,3,1) # numpy로 변환
#fn_denorm = lambda x, mean, std: (x*std)+ mean # denormalize
#fn_class = lambda x: 1.0* (x>0.5) # threshold =0.5


# save the network
def save(ckpt_dir, net, optimizer, epoch):
    if not os.path.exists(ckpt_dir):
        os.mkdir(ckpt_dir)
    
    torch.save({'net': net.state_dict(), 'optim': Optimizer.state_dict()},
               "./%s/model_epoch%d.pth" % (ckpt_dir, epoch))
    
# Load the network
def load(ckpt_dir, net, optimizer):
    if not os.path.exists(ckpt_dir):
        epoch =0
        return net, optimizer, epoch

        ckpt_list = os.listdir(ckpt_dir)
        ckpt_list = sort(key=lambda f: int(''.join(filter(str.isdigit, f))))

        dict_model = torch.load('./%s/%s' % (ckpt_dir, ckpt_lst[-1]))

        net.load_state_dict(dict_model['net'])
        optim.load_state_dict(dict_model['optim'])
        epoch = int(ckpt_lst[-1].split('epoch')[1].split('.pth')[0])

        return net, optim, epoch

def Change_value(targets):
    mask= targets==255
    targets[mask]=21

#Variables setting
num_data_train = len(train_data)
num_data_val = len(val_data)
num_data_test = len(test_data)

num_batch_train = np.ceil(num_data_train / batch_size)
num_batch_val = np.ceil(num_data_val / batch_size)
num_batch_test = np.ceil(num_data_test / batch_size)
    
# training network

def train(epoch):

    net.train()
    train_loss= []

    print('\n[ train_epoch: %d ]' % (epoch+1))
        
    for batch_idx, (inputs,targets) in enumerate(train_loader):
        inputs = inputs.type(torch.FloatTensor)
        targets= targets.type(torch.LongTensor)
        Change_value(targets)
        inputs, targets = inputs.to(device), targets.to(device)
        targets=targets.squeeze(1) # 4,512,512

        outputs = net(inputs) #예측한 값
        outputs=torch.nn.Upsample(size=(512,512),mode='bilinear',align_corners=True)(outputs)# 4,22,512,512

        optimizer.zero_grad() #gradient 초기화
        
        loss = criterion(outputs, targets)
        loss /= len(inputs)
        loss.backward()

        optimizer.step()
        train_loss += [loss.item()]
        _, predicted = outputs.max(1) # 열을 기준으로 최댓 값을 뽑아 예측값을 만드는 것
            # max(0)으로 두면 행을 기준으로 최댓 값, 최대 값과 최대값의 위치를 값으로 뽑아내주는데
            # 최대 값의 위치(즉, 첫번째 이미지가 무슨 클래스인지)만 필요하므로 최대 값은 저장 안한다는 의미로 _ 사용
            
        
        """

            if batch_idx %100 ==0:
                print('\nCurrent batch:',str(batch_idx))
                print('Current benign train accuracy:', str(predicted.eq(targets).sum().item()/ targets.size(0)))
                print('Current benign train loss:', loss.item())
        """
        if batch_idx %100 ==0:
            print("Train: Epoch %03d | Batch: %03d | Loss: %.4f"%
                (epoch+1, batch_idx,np.mean(train_loss)))
            
            #Tensorboard 저장
            targets = targets.unsqueeze(dim=1)
            outputs = torch.argmax(outputs, dim=1)
            outputs=torch.unsqueeze(outputs, 1)
                   
            targets = fn_tonumpy(targets)
            inputs = fn_tonumpy(inputs)
            outputs = fn_tonumpy(outputs)

            #import pdb; pdb.set_trace()

            writer_train.add_image('label', targets, num_batch_train * (epoch) + batch_idx, dataformats='NHWC')
            writer_train.add_image('input', inputs, num_batch_train * (epoch) + batch_idx, dataformats='NHWC')
            writer_train.add_image('output', outputs, num_batch_train * (epoch) + batch_idx, dataformats='NHWC')
        
        writer_train.add_scalar('train loss', np.mean(train_loss),epoch+1)
            
def validate(epoch):

    print('\n[ val_epoch: %d ]' % (epoch+1))
    net.eval()
    val_loss=[]

    for batch_idx, (inputs, targets) in enumerate(val_loader):
        inputs = inputs.type(torch.FloatTensor)
        targets= targets.type(torch.LongTensor)
        Change_value(targets)
        inputs, targets =inputs.to(device), targets.to(device)
        targets=targets.squeeze(1)
        
        outputs = net(inputs)
        outputs=torch.nn.Upsample(size=(512,512),mode='bilinear',align_corners=True)(outputs)
        loss = criterion(outputs, targets)
        loss /= len(inputs)
        val_loss += [loss.item()]

        _,predicted = outputs.max(1)

        if batch_idx %100 ==0:
            print("Validate: Epoch %03d | Batch: %03d | Loss: %.4f"%
                (epoch+1, batch_idx,np.mean(val_loss)))
            # tensorboard 저장
            targets = targets.unsqueeze(dim=1)
            outputs = torch.argmax(outputs, dim=1)
            outputs=torch.unsqueeze(outputs, 1)
        
            targets = fn_tonumpy(targets)
            inputs = fn_tonumpy(inputs)
            outputs = fn_tonumpy(outputs)

            writer_train.add_image('label', targets, num_batch_val * (epoch) + batch_idx, dataformats='NHWC')
            writer_train.add_image('input', inputs, num_batch_val * (epoch) + batch_idx, dataformats='NHWC')
            writer_train.add_image('output', outputs, num_batch_val * (epoch) + batch_idx, dataformats='NHWC')

    print('\nval_loss:%.4f'%(np.mean(val_loss)))
    writer_val.add_scalar('val_loss', np.mean(val_loss),epoch+1)


def adjust_learning_rate(optimizer, epoch):
    lr = 1e-3
    if epoch >= 20:
        lr /= 10
    if epoch >= 30:
        lr /= 10
    for param_group in optimizer.param_groups:
        param_group['lr']= lr

for epoch in range(num_epoch):
    adjust_learning_rate(optimizer, epoch)
    train(epoch)
    validate(epoch)

writer_train.flush()
writer_val.flush()
writer_train.close()
writer_val.close()

