In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
paths = {
    'DATASET_PATH': os.path.join('removal_bg', 'dataset'),
    'MODEL_PATH': os.path.join('removal_bg', 'model'),
 }
for path in paths.values():
  if not os.path.exists(path):
    !mkdir -p {path}

Скачать датасет из Kaggle

In [None]:
!pip install -q kaggle
! mkdir ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json
!kaggle datasets download -d nikhilroxtomar/person-segmentation

mkdir: cannot create directory ‘/root/.kaggle’: File exists
Downloading person-segmentation.zip to /content
 96% 441M/461M [00:03<00:00, 166MB/s]
100% 461M/461M [00:03<00:00, 137MB/s]


In [None]:
!mv "/content/person-segmentation.zip" "/content/removal_bg/dataset"

In [None]:
from zipfile import ZipFile
with ZipFile(os.path.join(paths['DATASET_PATH'],'person-segmentation.zip'), 'r') as data:
  data.extractall(paths['DATASET_PATH'])

Обработка данных

In [None]:
from data_processing import create_dir, load_data, augment_data

In [None]:
(train_x, train_y), (test_x, test_y) = load_data('/content/removal_bg/dataset/people_segmentation')
print(f"Train:\t {len(train_x)} - {len(train_y)}")
print(f"Test:\t {len(test_x)} - {len(test_y)}")


Train:	 5111 - 5111
Test:	 567 - 567


In [None]:
""" Создать папки для сохранения данных после аугментации """
create_dir("/content/removal_bg/dataset/new_data/train/image/")
create_dir("/content/removal_bg/dataset/new_data/train/mask/")
create_dir("/content/removal_bg/dataset/new_data/test/image/")
create_dir("/content/removal_bg/dataset/new_data/test/mask/")


In [None]:
""" Аугментация """
from albumentations import HorizontalFlip, ChannelShuffle, Blur, CenterCrop, Rotate
augment_data(train_x, train_y, "/content/removal_bg/dataset/new_data/train/", augment=True)
augment_data(test_x, test_y, "/content/removal_bg/dataset/new_data/test/", augment=False)

100%|██████████| 5111/5111 [13:38<00:00,  6.25it/s]
100%|██████████| 567/567 [00:19<00:00, 29.01it/s]


In [None]:
from glob import glob
def load_data(path):
    x = sorted(glob(os.path.join(path, "image", "*png")))
    y = sorted(glob(os.path.join(path, "mask", "*png")))
    return x, y
train_x, train_y = load_data('/content/removal_bg/dataset/new_data/train')
test_x, test_y = load_data('/content/removal_bg/dataset/new_data/test')
print(f"Train:\t {len(train_x)} - {len(train_y)}")
print(f"Test:\t {len(test_x)} - {len(test_y)}")

Обучение модели

In [None]:
import os

import numpy as np
import cv2
from glob import glob
from sklearn.utils import shuffle
import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger, ReduceLROnPlateau, EarlyStopping, TensorBoard
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import Recall, Precision

from model import deeplabv3_plus
from metrics import dice_loss, dice_coef, iou
np.random.seed(11)
tf.random.set_seed(11)


In [None]:
MODEL_DIR = "/content/drive/My Drive/temp"
if not os.path.exists(MODEL_DIR):
    os.makedirs(MODEL_DIR)

In [None]:

model_path = os.path.join(MODEL_DIR, "model-{epoch:02d}.h5") 
csv_path = os.path.join(MODEL_DIR, "data.csv")

In [None]:
""" Гиперпараметры """
batch_size = 2
lr = 1e-4
num_epochs = 20

In [None]:
""" Загрузка датасета """
from train import shuffling, load_data, read_image, read_mask, tf_parse, tf_dataset
train_path = os.path.join('/content/removal_bg/dataset/new_data', "train")
valid_path = os.path.join('/content/removal_bg/dataset/new_data', "test")

train_x, train_y = load_data(train_path)
train_x, train_y = shuffling(train_x, train_y)
valid_x, valid_y = load_data(valid_path)

print(f"Train: {len(train_x)} - {len(train_y)}")
print(f"Valid: {len(valid_x)} - {len(valid_y)}")


In [None]:
train_dataset = tf_dataset(train_x, train_y, batch=batch_size)
valid_dataset = tf_dataset(valid_x, valid_y, batch=batch_size)

In [None]:
""" Модель """
H = 512
W = 512
model = deeplabv3_plus((H, W, 3))
model.compile(loss=dice_loss, optimizer=Adam(lr), metrics=[dice_coef, iou, Recall(), Precision()])

In [None]:
callbacks = [
    ModelCheckpoint(model_path, verbose=1, save_best_only=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, min_lr=1e-7, verbose=1),
    CSVLogger(csv_path),
    TensorBoard(),
    EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=False),
]

In [None]:
model.fit(
    train_dataset,
    epochs=num_epochs,
    validation_data=valid_dataset,
    callbacks=callbacks
)

Оценка по модели

In [None]:
model.load_weights(os.path.join(MODEL_DIR, "model.h5"))

In [None]:
from tensorflow.keras.utils import CustomObjectScope
from sklearn.metrics import accuracy_score, f1_score, jaccard_score, precision_score, recall_score
from metrics import dice_loss, dice_coef, iou
from train import load_data

In [None]:
def save_results(image, mask, y_pred, save_image_path):
    ## i - m - yp - yp*i
    line = np.ones((H, 10, 3)) * 128

    mask = np.expand_dims(mask, axis=-1)    ## (512, 512, 1)
    mask = np.concatenate([mask, mask, mask], axis=-1)  ## (512, 512, 3)
    mask = mask * 255

    y_pred = np.expand_dims(y_pred, axis=-1)    ## (512, 512, 1)
    y_pred = np.concatenate([y_pred, y_pred, y_pred], axis=-1)  ## (512, 512, 3)

    masked_image = image * y_pred
    y_pred = y_pred * 255

    cat_images = np.concatenate([image, line, mask, line, y_pred, line, masked_image], axis=1)
    cv2.imwrite(save_image_path, cat_images)

In [None]:
create_dir("results")

""" загрузка модели """
with CustomObjectScope({'iou': iou, 'dice_coef': dice_coef, 'dice_loss': dice_loss}):
    model = tf.keras.models.load_model("/content/drive/MyDrive/temp/model.h5")

""" Загрузка тестовой выборки """
dataset_path = "/content/removal_bg/dataset/new_data"
valid_path = os.path.join(dataset_path, "test")
test_x, test_y = load_data(valid_path)
print(f"Test: {len(test_x)} - {len(test_y)}")

""" Оценка и прогноз """
SCORE = []
for x, y in tqdm(zip(test_x, test_y), total=len(test_x)):
    """ Извлечение имени """
    name = x.split("/")[-1].split(".")[0]

    """ Чтение изображений """
    image = cv2.imread(x, cv2.IMREAD_COLOR)
    x = image/255.0
    x = np.expand_dims(x, axis=0)

    """ Чтение масок """
    mask = cv2.imread(y, cv2.IMREAD_GRAYSCALE)

    """ Прогноз """
    y_pred = model.predict(x)[0]
    y_pred = np.squeeze(y_pred, axis=-1)
    y_pred = y_pred > 0.5
    y_pred = y_pred.astype(np.int32)

    """ Сохранение прогноза """
    save_image_path = f"results/{name}.png"
    save_results(image, mask, y_pred, save_image_path)
    
    """ Вычисление значений метрик """
    mask = mask.flatten()
    y_pred = y_pred.flatten()
    acc_value = accuracy_score(mask, y_pred)
    f1_value = f1_score(mask, y_pred, labels=[0, 1], average="binary")
    jac_value = jaccard_score(mask, y_pred, labels=[0, 1], average="binary")
    recall_value = recall_score(mask, y_pred, labels=[0, 1], average="binary")
    precision_value = precision_score(mask, y_pred, labels=[0, 1], average="binary")
    SCORE.append([name, acc_value, f1_value, jac_value, recall_value, precision_value])

In [None]:
score = [s[1:]for s in SCORE]
score = np.mean(score, axis=0)
print(f"Accuracy: {score[0]:0.5f}")
print(f"F1: {score[1]:0.5f}")
print(f"Jaccard: {score[2]:0.5f}")
print(f"Recall: {score[3]:0.5f}")
print(f"Precision: {score[4]:0.5f}")

df = pd.DataFrame(SCORE, columns=["Image", "Accuracy", "F1", "Jaccard", "Recall", "Precision"])
df.to_csv("/content/drive/MyDrive/temp//score.csv")