<a href="https://colab.research.google.com/github/panzershracker/Deep-learning-in-comp.-vision/blob/master/HW4_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Домашнее задание к уроку №4. Евдокимов Алексей

# Задание:

Обучить модель семантической сегментации (человек-vs-фон) на подмножестве датасета MS COCO.
Библиотеки: [Python, Tensorflow]

In [0]:
%tensorflow_version 2.x

In [0]:
import os
import skimage.io as io
import numpy as np
import tensorflow as tf

#Загрузка датасета COCO

In [0]:
# !rm -rf data

In [0]:
!mkdir -p data

In [0]:
load = 0

In [0]:
if load:
  !cd data && wget http://images.cocodataset.org/zips/train2017.zip 
  !cd data && wget http://images.cocodataset.org/zips/val2017.zip 
  !cd data && wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip 

In [0]:
if load:
  !cd data && unzip -q train2017.zip
  !cd data && unzip -q val2017.zip
  !cd data && unzip -q annotations_trainval2017.zip

In [0]:
if load:
  !cd data && git clone https://github.com/cocodataset/cocoapi
  !cd data/cocoapi/PythonAPI && make

In [0]:
# !pip install --upgrade pycocotools

In [0]:
COCO_ROOT = './data/'

import sys
sys.path.insert(0, os.path.join(COCO_ROOT, 'cocoapi/PythonAPI'))

from pycocotools.coco import COCO

# Сласс Dataset для сегментации

In [0]:
class Dataset():
  
  def crop_image(self, img, inp_size, random_crop=False):

    shape = tf.shape(img)
    pad = (
        [0, tf.maximum(inp_size - shape[0], 0)],
        [0, tf.maximum(inp_size - shape[1], 0)],
        [0,0]
    )
    img = tf.pad(img, pad)

    if random_crop:
      img = tf.image.random_crop(img, (inp_size, inp_size, shape[2]))
    else:
      shape = tf.shape(img)
      ho = (shape[0] - inp_size) // 2
      wo = (shape[1] - inp_size) // 2
      img = img[ho : ho+inp_size, wo : wo+inp_size, :]

    return img

  def train_dataset(self, batch_size, epochs, inp_size):

    def item_to_image(item):
      random_crop = True
      img_combined = tf.py_function(self.read_images, [item], tf.uint8)
      img_combined = self.crop_image(img_combined, inp_size, random_crop)

      img = tf.cast(img_combined[...,:3], tf.float32) / np.float32(255)
      mask_class = tf.cast(img_combined[...,3:4], tf.float32)

      return img, mask_class

    dataset = tf.data.Dataset.from_tensor_slices(self.img_list)
    dataset = dataset.shuffle(buffer_size=len(self.img_list))
    dataset = dataset.map(item_to_image)
    dataset = dataset.repeat(epochs)
    dataset = dataset.batch(batch_size, drop_remainder=True)

    return dataset

  def val_dataset(self, batch_size, inp_size):

    def item_to_image(item):
      random_crop = False
      img_combined = tf.py_function(self.read_images, [item], tf.uint8)
      img_combined = self.crop_images(img_combined, inp_size, random_crop)

      img = tf.cast(img_combined[...,:3], tf.float32) / np.float32(255)
      mask_class = tf.cast(img_combined[...,3:4], tf.float32)

    dataset = tf.data.Dataset.from_tensor_slices(self.img_list)
    dataset = dataset.map(item_to_image)
    dataset = dataset.batch(batch_size, drop_remainder=True)

    return dataset

#Класс для датасета COCO

Класс наследуется от Dataset

In [0]:
class COCO_dataset(Dataset):

  def __init__(self, sublist):

    ann_file_fpath = os.path.join(COCO_ROOT, 'annotations', 'instances_' + sublist + '2017.json')
    self.coco = COCO(ann_file_fpath)
    self.cat_ids = self.coco.getCatIds(catNms=['person'])
    self.img_list = self.coco.getImgIds(catIds=self.cat_ids)
    
  def read_images(self, img_id):
    img_id = int(img_id.numpy())
    img_data = self.coco.loadImgs(img_id)[0]
    img_fname = '/'.join(img_data['coco_url'].split('/')[-2:])

    img = io.imread(os.path.join(COCO_ROOT, img_fname))
    if len(img.shape) == 2:
      img = np.tile(img[..., None], (1,1,3))

    ann_ids = self.coco.getAnnIds(imgIds=img_data['id'], catIds=self.cat_ids, iscrowd=None)
    anns = self.coco.loadAnns(ann_ids)
    mask_class = np.zeros((img.shape[0], img.shape[1]), dtype=np.uint8)

    for i in range(len(anns)):
      mask_class += self.coco.annToMask(anns[i])
    mask_class = (mask_class > 0).astype(np.uint8)

    img_combined = np.concatenate([img, mask_class[..., None]], axis=2)

    return img_combined

In [13]:
coco_train = COCO_dataset('train')
coco_valid = COCO_dataset('val')

loading annotations into memory...
Done (t=23.84s)
creating index...
index created!
loading annotations into memory...
Done (t=0.82s)
creating index...
index created!


In [0]:
INP_SIZE = 256
EPOCHS = 1
BATCH = 128
LEARNING_RATE = 0.001

In [0]:
train = coco_train.train_dataset(batch_size=BATCH, epochs=EPOCHS, inp_size=INP_SIZE)
test = coco_valid.train_dataset(batch_size=BATCH, epochs=EPOCHS, inp_size=INP_SIZE)

In [16]:
type(train), type(test)

(tensorflow.python.data.ops.dataset_ops.BatchDataset,
 tensorflow.python.data.ops.dataset_ops.BatchDataset)

#Создание модели U-net

In [0]:
def model():
  x = tf.keras.layers.Input((256,256,3))

  out = tf.keras.layers.Conv2D(64, (3,3), padding='same', activation='relu')(x)
  out1 = tf.keras.layers.Conv2D(64, (3,3), padding='same', activation='relu')(out)
  out = tf.keras.layers.MaxPool2D((2,2))(out1)

  # out = tf.keras.layers.Conv2D(64, (3,3), padding='same', activation='relu')(out)
  out2 = tf.keras.layers.Conv2D(128, (3,3), padding='same', activation='relu')(out)
  out = tf.keras.layers.MaxPool2D((2,2))(out2)

  # out = tf.keras.layers.Conv2D(256, (3,3), padding='same', activation='relu')(out)
  out3 = tf.keras.layers.Conv2D(256, (3,3), padding='same', activation='relu')(out)
  out = tf.keras.layers.MaxPool2D((2,2))(out3)

  # out = tf.keras.layers.Conv2D(512, (3,3), padding='same', activation='relu')(out)
  out4 = tf.keras.layers.Conv2D(512, (3,3), padding='same', activation='relu')(out)
  out = tf.keras.layers.MaxPool2D((2,2))(out4)  

  # out = tf.keras.layers.Conv2D(1024, (3,3), padding='same', activation='relu', )(out)
  # out = tf.keras.layers.Conv2D(1024, (3,3), padding='same', activation='relu', )(out)

  out = tf.keras.layers.Conv2DTranspose(512, (3,3), strides=(2,2), padding='same', activation='relu')(out)
  out = tf.concat([out4, out], axis=3)

  # out = tf.keras.layers.Conv2D(512, (3,3), padding='same', activation='relu')(out)
  out = tf.keras.layers.Conv2D(512, (3,3), padding='same', activation='relu')(out)  

  out = tf.keras.layers.Conv2DTranspose(256, (3,3), strides=(2,2), padding='same', activation='relu')(out)
  out = tf.concat([out3, out], axis=3)

  out = tf.keras.layers.Conv2D(256, (3,3), padding='same', activation='relu')(out)
  # out = tf.keras.layers.Conv2D(256, (3,3), padding='same', activation='relu')(out)  

  out = tf.keras.layers.Conv2DTranspose(128, (3,3), strides=(2,2), padding='same', activation='relu')(out)
  out = tf.concat([out2, out], axis=3)

  out = tf.keras.layers.Conv2D(128, (3,3), padding='same', activation='relu')(out)
  # out = tf.keras.layers.Conv2D(128, (3,3), padding='same', activation='relu')(out) 

  out = tf.keras.layers.Conv2DTranspose(64, (3,3), strides=(2,2), padding='same', activation='relu')(out)
  out = tf.concat([out1, out], axis=3)

  out = tf.keras.layers.Conv2D(64, (3,3), padding='same', activation='relu')(out)
  out = tf.keras.layers.Conv2D(64, (3,3), padding='same', activation='relu')(out)
  out = tf.keras.layers.Conv2D(1, (3,3), padding='same', activation='sigmoid')(out)

  return tf.keras.Model(inputs=x, outputs=out)

model = model()

#Схематическая визуализация сети

In [0]:
# tf.keras.utils.plot_model(model, show_shapes=1, dpi=72)

#Обучение

In [0]:
%%time 

optimizer = tf.keras.optimizers.Adam(LEARNING_RATE)

model.compile(
    optimizer=optimizer,
    loss='binary_crossentropy',
    metrics='accuracy'
)

hist = model.fit(train, epochs=EPOCHS)

plt.plot(hist.history['loss'])
plt.show()

 13/500 [..............................] - ETA: 7:11:24 - loss: 0.5545 - accuracy: 0.7661