In [1]:
import torch 
import torch.nn as nn
import pandas as pd
import os
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from torch.utils.data import Dataset
import torchvision.transforms as transforms
from torchvision.io import read_image, ImageReadMode
from torchvision import datasets
from torchsummary import summary
from PIL import Image
from torch.utils.tensorboard import SummaryWriter
from datetime import datetime
import cv2 
from skimage import measure
%matplotlib inline



# Encode segmentation masks to classes #

CMP_facade_DB_base

In [2]:
TARGET_IMAGE_WIDTH = 512
TARGET_IMAGE_HEIGHT = 512
NUM_CHANNELS = 2
BATCH_SIZE = 4
EXECUTION_IMAGE_DATA = True
EXECUTION_MODEL_TRAIN = True

colormap12 = {(0,0,170) : 0, #background
            (0,0,255) : 1, #facade
            (0,85,255): 2, #window
            (0,170,255): 3, #door
            (255,85,0): 4, #cornice
            (85,255,170): 5, #sill
            (170,255,85): 6, #balcony
            (255,255,0): 7, #blind
            (255,170,0): 8, #deco
            (0,255,255): 9, #molding
            (255,0,0): 10, #pillar
            (170,0,0): 11 #shop 
                            }
colormap2 = {   (0,0,170) : 0, #background
                (0,85,255): 1 #window    
                           }     
cwd = os.getcwd()
img_trainA_path = os.path.join(cwd, r'dataset\trainA')
img_testA_path = os.path.join(cwd, r'dataset\testA')
img_trainB_path = os.path.join(cwd, r'dataset\trainB')
img_testB_path = os.path.join(cwd, r'dataset\testB')
npy_path = os.path.join(cwd, r'dataset\np.array_targets')
img_test_trans = cwd

def encode_to_classes2(img_array, colormap):

    """
    prepare data for 2 class segmentation
    """
    class_array = np.zeros((img_array.shape[0], img_array.shape[1], 2))
    image_trans_array = np.zeros((img_array.shape[0], img_array.shape[1], 3))

    for W in range(img_array.shape[0]):
        for H in range(img_array.shape[1]):
            pixel_color = (img_array[W,H,0], img_array[W,H,1], img_array[W,H,2])
            if pixel_color == (0,85,255):
                class_array[W,H,1] = 1
                image_trans_array[W,H,0] = 255
                image_trans_array[W,H,1] = 0
                image_trans_array[W,H,2] = 0
            else:
                class_array[W,H,0] = 1
                image_trans_array[W,H,0] = 0
                image_trans_array[W,H,1] = 0
                image_trans_array[W,H,2] = 255
    return class_array.astype(np.uint8), image_trans_array.astype(np.uint8)


def encode_to_classes12(img_array, colormap):

    """
    prepare data for 12 class segmentation
    """
    class_array = np.zeros((img_array.shape[0], img_array.shape[1], 12))
    image_trans_array = np.zeros((img_array.shape[0], img_array.shape[1], 3))

    for W in range(img_array.shape[0]):
        for H in range(img_array.shape[1]):
            pixel_color = (img_array[W,H,0], img_array[W,H,1], img_array[W,H,2])
            if pixel_color in colormap.keys():
                class_array[W,H,colormap[pixel_color]] = 1
            else:
                class_array[W,H,1] = 1

            image_trans_array[W,H,0] = img_array[W,H,0]
            image_trans_array[W,H,1] = img_array[W,H,1]
            image_trans_array[W,H,2] = img_array[W,H,2]
    return class_array.astype(np.uint8), image_trans_array.astype(np.uint8)
    

def create_image_data(execute):
    
    if execute:
        for i in range(303):
            image = Image.open(os.path.join(img_trainB_path, "cmp_b" + f"{i+1}".zfill(4) + '.png')).convert('RGB')
            image = np.array(image.resize((TARGET_IMAGE_WIDTH,TARGET_IMAGE_WIDTH)))
            if NUM_CHANNELS == 2:
                class_array, img_trans_array = encode_to_classes2(image, colormap2)
            else:
                class_array, img_trans_array = encode_to_classes12(image, colormap12)
            np.save(os.path.join(npy_path, r'npy_trainB', "cmp_b" +  f"{i+1}".zfill(4)+ '.npy'), class_array)


        for i in range(75):
            image = Image.open(os.path.join(img_testB_path, "cmp_b" + f"{i+304}".zfill(4) + '.png')).convert('RGB')
            image = np.array(image.resize((TARGET_IMAGE_WIDTH,TARGET_IMAGE_WIDTH)))
            if NUM_CHANNELS == 2:
                class_array, img_trans_array = encode_to_classes2(image, colormap2)
            else:
                class_array, img_trans_array = encode_to_classes12(image, colormap12)
            np.save(os.path.join(npy_path, r'npy_testB', "cmp_b" +  f"{i+304}".zfill(4)+ '.npy'), class_array)

create_image_data(EXECUTION_IMAGE_DATA)

# Custom Dataset class and instances #

In [3]:

transform_image = transforms.Compose(
    [
        transforms.ToTensor(),
    transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))]
)

transform_label = transforms.Compose(
    [   
        transforms.ToTensor()])


class CustomImageDataset(Dataset):
    def __init__(self, annotations_file, img_dir, transform_image, transform_label, data_index):
        self.img_labels = pd.read_csv(annotations_file, sep=';')
        self.img_dir = img_dir
        self.transform_image = transform_image
        self.transform_label = transform_label
        self.data_index = data_index

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
        img_label_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, self.data_index])
        image = Image.open(img_path).convert('RGB')
        image = image.resize((TARGET_IMAGE_WIDTH,TARGET_IMAGE_WIDTH))
        image_label = np.load(img_label_path)
        text_label = self.img_labels.iloc[idx,3]
        if self.transform_image:
            image = self.transform_image(image)
        if self.transform_label:
            image_label = self.transform_label(image_label)
        return image, image_label, text_label


training_dataset = CustomImageDataset(annotations_file=os.path.join(cwd, r'dataset\annotation_train.csv'),
                                   img_dir=os.path.join(cwd, r'dataset'),
                                   transform_image=transform_image,
                                   transform_label=transform_label,
                                   data_index=2,
                                   )

validation_dataset = CustomImageDataset(annotations_file=os.path.join(cwd, r'dataset\annotation_validation.csv'),
                                   img_dir=os.path.join(cwd, r'dataset'),
                                   transform_image=transform_image,
                                   transform_label=transform_label,
                                   data_index=2,
                                   )


# DataLoader instances #

In [4]:
from torch.utils.data import DataLoader

train_dataloader = DataLoader(training_dataset, batch_size=BATCH_SIZE, shuffle=False)
validation_dataloader = DataLoader(validation_dataset, batch_size=BATCH_SIZE, shuffle=False)


# Unet Model implementation #

In [5]:
class conv_block(nn.Module):
    def __init__(self, in_c, out_c):
        super().__init__()
        self.conv1 = nn.Conv2d(in_c, out_c, kernel_size=3, padding=1) #1
        self.bn1 = nn.BatchNorm2d(out_c) #2 reduces internal covariance shift
        self.conv2 = nn.Conv2d(out_c, out_c, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(out_c)
        self.relu = nn.ReLU()

    def forward(self, inputs):
        x = self.conv1(inputs)
        x = self.bn1(x)
        x = self.relu(x)

        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu(x)
        return x


class encoder_block(nn.Module):
    def __init__(self, in_c, out_c):
        super().__init__()
        self.conv = conv_block(in_c, out_c)
        self.pool = nn.MaxPool2d((2, 2))
    def forward(self, inputs):
        x = self.conv(inputs)
        p = self.pool(x)
        return x, p


class decoder_block(nn.Module):
    def __init__(self, in_c, out_c):
        super().__init__()
        self.up = nn.ConvTranspose2d(in_c, out_c, kernel_size=2, stride=2, padding=0)
        self.conv = conv_block(out_c+out_c, out_c)
    def forward(self, inputs, skip):
        x = self.up(inputs)
        x = torch.cat([x, skip], axis=1)
        x = self.conv(x)
        return x

class build_unet(nn.Module):
    def __init__(self, num_channels):
        super().__init__()
        """ Encoder """
        self.e1 = encoder_block(3, 64)
        self.e2 = encoder_block(64, 128)
        self.e3 = encoder_block(128, 256)
        self.e4 = encoder_block(256, 512)
        """ Bottleneck """
        self.b = conv_block(512, 1024)
        """ Decoder """
        self.d1 = decoder_block(1024, 512)
        self.d2 = decoder_block(512, 256)
        self.d3 = decoder_block(256, 128)
        self.d4 = decoder_block(128, 64)
        self.d4 = decoder_block(128, 64)
        """ Classifier """
        self.outputs = nn.Conv2d(64, num_channels, kernel_size=1, padding=0)
    def forward(self, inputs):
        """ Encoder """
        s1, p1 = self.e1(inputs)
        s2, p2 = self.e2(p1)
        s3, p3 = self.e3(p2)
        s4, p4 = self.e4(p3)
        """ Bottleneck """
        b = self.b(p4)
        """ Decoder """
        d1 = self.d1(b, s4)
        d2 = self.d2(d1, s3)
        d3 = self.d3(d2, s2)
        d4 = self.d4(d3, s1)
        """ Classifier """
        outputs = self.outputs(d4)
        return outputs


# Prepare and run model #

In [6]:
device = 'cuda'


LEARNING_RATE = 0.001
EPOCHS = 10
writer_step = 2


def main(execute):
    
    if execute:
        model = build_unet(num_channels=NUM_CHANNELS).to(device)
        #summary(model, (3,256,256))
        loss_fn = torch.nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)


        def train_one_epoch(epoch_index, tb_writer):
            
            running_loss = 0
            last_loss = 0

            for i, batch in enumerate(train_dataloader):
                inputs, labels, _ = batch
                inputs, labels = inputs.to(device), labels.to(device)
                optimizer.zero_grad()
                preds = model(inputs)
                loss = loss_fn(preds, labels)
                loss.backward()
                optimizer.step()
                running_loss += loss.item()
                if i % writer_step == 1:
                    last_loss = running_loss / writer_step # loss per batch
                    print('  batch {} loss: {}'.format(i + 1, last_loss))
                    tb_x = epoch_index * len(train_dataloader) + i + 1
                    tb_writer.add_scalar('Loss/train', last_loss, tb_x)
                    running_loss = 0.
            return last_loss



        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
        writer = SummaryWriter('runs/fashion_trainer_{}'.format(timestamp))
        epoch_number = 0


        for epoch in range(EPOCHS):
            print('EPOCH {}:'.format(epoch_number + 1))

            model.train(True)
            avg_loss = train_one_epoch(epoch_number, writer)


            model.train(False)
            with torch.no_grad():
                running_vloss = 0.0
                for i, vdata in enumerate(validation_dataloader):
                    vinputs, vlabels, _ = vdata
                    vinputs, vlabels = vinputs.to(device), vlabels.to(device)
                    voutputs = model(vinputs)
                    vloss = loss_fn(voutputs, vlabels)
                    running_vloss += vloss

                    avg_vloss = running_vloss / (i + 1)
                    print('LOSS train {} valid {}'.format(avg_loss, avg_vloss))

                    # Log the running loss averaged per batch
                    # for both training and validation
                    writer.add_scalars('Training vs. Validation Loss',
                                    { 'Training' : avg_loss, 'Validation' : avg_vloss },
                                    epoch_number + 1)
                    writer.flush()
            epoch_number += 1


        writer.close()

        tree = os.walk(cwd, topdown=True)

        for root, dirs, files in tree:
            for name in files:
                if '.pth' in os.path.join(cwd, name):
                    os.remove(os.path.join(cwd, name))
                    break
            break

        model_path = 'model_{}_{}'.format(timestamp, epoch_number)
        torch.save(model.state_dict(), model_path + '.pth')

main(EXECUTION_MODEL_TRAIN)

EPOCH 1:
  batch 2 loss: 0.0025511536514386535
  batch 4 loss: 0.002240907517261803
  batch 6 loss: 0.0018963073380291462
  batch 8 loss: 0.0017554272199049592
  batch 10 loss: 0.0017239355947822332
  batch 12 loss: 0.0017004050314426422
  batch 14 loss: 0.0016208774177357554
  batch 16 loss: 0.0018541037570685148
  batch 18 loss: 0.0019820391898974776
  batch 20 loss: 0.001980458211619407
  batch 22 loss: 0.0014002337702549994
  batch 24 loss: 0.0013130876468494534
  batch 26 loss: 0.0012819321127608418
  batch 28 loss: 0.0011595396790653467
  batch 30 loss: 0.0011166086187586188
  batch 32 loss: 0.0013962205848656595
  batch 34 loss: 0.0010292159277014434
  batch 36 loss: 0.0010258627007715404
  batch 38 loss: 0.0011938430252484977
  batch 40 loss: 0.00104225252289325
  batch 42 loss: 0.001208837958984077
  batch 44 loss: 0.001160282059572637
  batch 46 loss: 0.0010373763798270375
  batch 48 loss: 0.0013159841764718294
  batch 50 loss: 0.0012945111375302076
  batch 52 loss: 0.0012039

# Make Predictions #

In [7]:
cwd = os.getcwd()

colormap12 = {(0,0,170) : 0, #background
            (0,0,255) : 1, #facade
            (0,85,255): 2, #window
            (0,170,255): 3, #door
            (255,85,0): 4, #cornice
            (85,255,170): 5, #sill
            (170,255,85): 6, #balcony
            (255,255,0): 7, #blind
            (255,170,0): 8, #deco
            (0,255,255): 9, #molding
            (255,0,0): 10, #pillar
            (170,0,0): 11 #shop 
                            }
colormap2 = {   (0,0,170) : 0, #background
                (0,85,255): 1 #window    
                            }

tree = os.walk(cwd, topdown=True)
for root, dirs, files in tree:
    for name in files:
        if '.pth' in os.path.join(cwd, name):
            model_path = os.path.join(cwd, name)
            break
    break

model = build_unet(num_channels=NUM_CHANNELS).to('cuda')
model.load_state_dict(torch.load(model_path))


def decode_to_RGB2(model, image_path):
    image_test_path = os.path.join(cwd, image_path) 
    my_transforms = transforms.Compose([
                                        transforms.ToTensor(),
                                        transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
                                        ])
    image_test = Image.open(image_test_path).convert('RGB')
    image_test = np.array(image_test.resize((TARGET_IMAGE_WIDTH,TARGET_IMAGE_HEIGHT)))
    image_test  = my_transforms(image_test)
    image_test  = image_test.clone().detach().cuda()

    image_test .to(device)

    output = model(image_test .unsqueeze(0))

    sm = nn.Softmax2d()
    output = sm(output).squeeze(0)
    #output = torch.argmax(output, dim=0)
    output = torch.permute(output, (1,2,0)) #swap axes of tensor
    image_classes = output.cpu().detach().numpy()

    image_output = np.zeros(shape=(TARGET_IMAGE_WIDTH,TARGET_IMAGE_HEIGHT,3)).astype(np.uint8)
    for W in range(image_classes.shape[0]):
        for H in range(image_classes.shape[1]):
            if image_classes[W,H,0] >= 0.9:
                image_output[W,H,0] = 0
                image_output[W,H,1] = 0
                image_output[W,H,2] = 255
                #image[W,H,0] = [k for k, v in colormap.items() if v == pixel_class+1][0][0]
            else:
                image_output[W,H,0] = 255
                image_output[W,H,1] = 0
                image_output[W,H,2] = 0

    image_output = Image.fromarray(image_output)
    image_output.save('test_predicted_mask.png')


def decode_to_RGB12(model, image_path, colormap):
    image_test_path = os.path.join(cwd, image_path) 
    my_transforms = transforms.Compose([
                                        transforms.ToTensor(),
                                        transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
                                        ])

    image_test = Image.open(image_test_path).convert('RGB')
    image_test = np.array(image_test.resize((TARGET_IMAGE_WIDTH,TARGET_IMAGE_HEIGHT)))
    image_test  = my_transforms(image_test)
    image_test  = image_test.clone().detach().cuda()

    image_test .to(device)

    output = model(image_test .unsqueeze(0))

    sm = nn.Softmax2d()
    output = sm(output).squeeze(0)
    #output = torch.argmax(output, dim=0)
    output = torch.permute(output, (1,2,0)) #swap axes of tensor
    image_classes = output.cpu().detach().numpy()

    image_output = np.zeros(shape=(TARGET_IMAGE_WIDTH,TARGET_IMAGE_HEIGHT,3)).astype(np.uint8)
    for W in range(image_classes.shape[0]):
        for H in range(image_classes.shape[1]):
            for C in range(image_classes.shape[2]):
                if image_classes[H,W,C] >= 0.9:
                    image_output[H,W,0] = [k[0] for k, v in colormap.items() if v == C][0]
                    image_output[H,W,1] = [k[1] for k, v in colormap.items() if v == C][0]
                    image_output[H,W,2] = [k[2] for k, v in colormap.items() if v == C][0]

    image_output = Image.fromarray(image_output)
    image_output.save('test_predicted_mask.png')

if NUM_CHANNELS == 2:
    decode_to_RGB2(model, 'test_image.jpg')
else:
    decode_to_RGB12(model, 'test_image.jpg', colormap12)


# Count number of windows #

In [17]:
test_image_path = os.path.join(cwd, 'test_predicted_mask.png')


def count_windows(test_img_path):
    if os.path.exists(test_image_path):
        imgray = cv2.imread(test_image_path, cv2.IMREAD_GRAYSCALE)
        imgrgb = cv2.imread(test_image_path)

        # get contours
        contours = measure.find_contours(imgray, fully_connected='high')

        # get contours length
        contour_length_list = []
        for contour in contours:
            contour_length = 0
            for i in range(len(contour)-1):
                contour_length += cv2.norm(contour[i], contour[i+1], cv2.NORM_L2)
            contour_length_list.append(contour_length)

        max_countour_length = max(contour_length_list)
        # remove countours with length less than 20% of mean length of all countours
        windows_count = len([length for length in contour_length_list if length > max_countour_length/2])
        return windows_count

windows = count_windows(test_image_path)
print(windows)


24


In [9]:
# f, axarr = plt.subplots(2, sharex=True)
# axarr[0].imshow(np.array(cv2.imread(os.path.join(cwd, 'test_image.jpg'))))
# axarr[1].imshow(np.array(cv2.imread(os.path.join(cwd, 'test_predicted_mask.jpg'))))