In [None]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt

from PIL import Image

import json
import os
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset , DataLoader
from torchvision import transforms
from torch.optim import Adam
from torch.autograd import Variable

import albumentations as A
from albumentations.pytorch import ToTensorV2 

path = "../input/imaterialist-fashion-2020-fgvc7"

In [None]:
train_path = "train" 
test_path = "test"

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
IMAGE_HEIGHT = 512
IMAGE_WIDTH = 512
BATCH_SIZE = 1
NUM_WORKERS = 0
LR = 0.0001
EPOCHS = 1

In [None]:
df_train = pd.read_csv("../input/imaterialist-fashion-2020-fgvc7/train.csv")

with open("../input/imaterialist-fashion-2020-fgvc7/label_descriptions.json", 'r') as file:
    label_desc = json.load(file)

In [None]:
class fashion_set(Dataset):
    def __init__(self, path, subpath, df_train, transforms=None):
        self.path = os.path.join(path, subpath)
        self.img_id = os.listdir(self.path)
        self.transforms = transforms
        self.df_train = df_train
    
    def df2mask(self, df, image_path):
        df_mask = df.copy()
        images_meta=[]
        
        img = np.array(Image.open(image_path).convert("RGB"))
        images_meta.append({
            'image': img,
            'shape': img.shape,
            'encoded_pixels': list(df_mask['EncodedPixels']),
            'class_ids':  list(df_mask['ClassId']),
            'height': int(df_mask["Height"].mean()),
            'width': int(df_mask["Width"].mean())
        })

        masks = []
        for image in images_meta:
            shape = image.get('shape')
            encoded_pixels = image.get('encoded_pixels')
            class_ids = image.get('class_ids')
        
            # Initialize numpy array with shape same as image size
            height, width = shape[:2]
            mask = np.zeros((height, width)).reshape(-1)
        
            # Iterate over encoded pixels and create mask
            for segment, (pixel_str, class_id) in enumerate(zip(encoded_pixels, class_ids)):
                splitted_pixels = list(map(int, pixel_str.split()))
                pixel_starts = splitted_pixels[::2]
                run_lengths = splitted_pixels[1::2]
                assert max(pixel_starts) < mask.shape[0]
                for pixel_start, run_length in zip(pixel_starts, run_lengths):
                    pixel_start = int(pixel_start) - 1
                    run_length = int(run_length)
                    mask[pixel_start:pixel_start+run_length] = 255 - class_id * 4
            masks.append(mask.reshape((height, width), order='F'))
        
        return masks, images_meta
    
    def __len__(self):
        return len(self.img_id)
    
    def __getitem__(self, idx):
        img_idx = self.img_id[idx] #第N張圖片路徑名稱 str       
        img_idx_path = os.path.join(self.path, img_idx)
        img = np.array(Image.open(img_idx_path).convert("RGB"))
        df_train = self.df_train       
        
        Image_Id = img_idx.split(".")[0] #圖片ID
        img_df = df_train[df_train["ImageId"] == Image_Id] #找出圖片ID的所有值                
        
        masks, images_meta = self.df2mask(img_df, img_idx_path)       
        
        if self.transforms is not None:        
            image = self.transforms(image=img, masks=masks)
            
        return image, images_meta[0]

In [None]:
class img_show_set(fashion_set): 
    def __init__(self, path, subpath, df_train):
        self.path = os.path.join(path, subpath)
        self.img_id = os.listdir(self.path)
        self.df_train = df_train
    
    def __len__(self):
        return len(self.img_id)
    
    def df2mask(self, df, image_path): #繼承fashion_set df2mask
        masks, images_meta = super().df2mask(df, image_path)
        return masks, images_meta
        
    def __getitem__(self, idx):
        img_idx = self.img_id[idx] #第N張圖片路徑名稱 str       
        img_idx_path = os.path.join(self.path, img_idx)
        img = np.array(Image.open(img_idx_path).convert("RGB"))
        df_train = self.df_train       
        
        Image_Id = img_idx.split(".")[0] #圖片ID
        img_df = df_train[df_train["ImageId"] == Image_Id] #找出圖片ID的所有值                      
        
        masks, images_meta = self.df2mask(img_df, img_idx_path)
            
        return img, masks    

In [None]:
def double_conv(in_channels, out_channels):
    return nn.Sequential(
        nn.Conv2d(in_channels, out_channels, 3, padding=1),
        nn.ReLU(inplace=True),
        nn.Conv2d(out_channels, out_channels, 3, padding=1),
        nn.ReLU(inplace=True)
    )   

class UNet(nn.Module):

    def __init__(self, n_class):
        super().__init__()
                
        self.dconv_down1 = double_conv(3, 64)
        self.dconv_down2 = double_conv(64, 128)
        self.dconv_down3 = double_conv(128, 256)
        self.dconv_down4 = double_conv(256, 512)  

        self.maxpool = nn.MaxPool2d(2)
        
        self.dconv_up3 = double_conv(512, 256)
        self.dconv_up2 = double_conv(256, 128)
        self.dconv_up1 = double_conv(128, 64)
        
        self.TConv3 = nn.ConvTranspose2d(512, 256, 2, stride=2)
        self.TConv2 = nn.ConvTranspose2d(256, 128, 2, stride=2)
        self.TConv1 = nn.ConvTranspose2d(128, 64, 2, stride=2)
        
        self.conv_last = nn.Conv2d(64, n_class, 1)
        
    def forward(self, x):
        conv1 = self.dconv_down1(x)
        x = self.maxpool(conv1)
        
        conv2 = self.dconv_down2(x)
        x = self.maxpool(conv2)
        
        conv3 = self.dconv_down3(x)
        x = self.maxpool(conv3)
        
        x = self.dconv_down4(x)
        
        x = self.TConv3(x)
        x = torch.cat([x, conv3], dim=1)

        x = self.dconv_up3(x)
        x = self.TConv2(x)
        x = torch.cat([x, conv2], dim=1)

        x = self.dconv_up2(x)
        x = self.TConv1(x)
        x = torch.cat([x, conv1], dim=1)

        x = self.dconv_up1(x)
        out = self.conv_last(x)
        out = F.sigmoid(out)
        
        return out

In [None]:
train_transform = A.Compose([
    A.Resize(IMAGE_HEIGHT,IMAGE_WIDTH),
    A.Rotate(limit=35,p=1.0),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.1),
    A.Normalize(
        mean=[0.0,0.0,0.0],
        std = [1.0,1.0,1.0],
        max_pixel_value=255.0
    ),
    ToTensorV2()  
])

label_desc_map = []
#class_id to class_name
#attributes_name to attributes_name
for l in label_desc:
    map_ = {}
    for ids in label_desc[l]:
        map_[ids['id']] = ids['name']
    label_desc_map.append(map_)

In [None]:
def plot_individual_segment(images, masks):
    plt.imshow(images)
    plt.imshow(masks[0], alpha=0.75)
    plt.axis('off')
    plt.show()
    
img_show = img_show_set(path, train_path, df_train)   
img_, masks_ = img_show[np.random.randint(0,len(img_show)+1)] #random idx_number
plt.imshow(img_)

In [None]:
plot_individual_segment(img_, masks_)

In [None]:
train_dataset = fashion_set(path, train_path, df_train,  train_transform)
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, shuffle=True)

In [None]:
model = UNet(n_class=1)
model = model.cuda()
optimizer = Adam(model.parameters(), lr=LR)
criterion = nn.BCELoss()
model.train()

In [None]:
for epoch in range(EPOCHS):
    running_loss = 0.0
    for i, (image, images_meta) in enumerate(tqdm(train_dataloader, position=0, leave=True)):
        images = Variable(image["image"].to(DEVICE))
        masks = Variable(image["masks"][0].to(DEVICE))
        masks = torch.reshape(masks,[1, 1, 512, 512]).float()
        #print(masks.shape,images.shape)
        optimizer.zero_grad()
        output = model(images)
        loss = criterion(output, masks)
        
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        print(loss.item())
    #print("loss for epoch " + str(epoch) + ":  " + str(running_loss))