In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import os
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import imread

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Model, Sequential

# Data Preparation

In [None]:
from pathlib import Path
import zipfile

train_zip_path = '../input/carvana-image-masking-challenge/train.zip'
masks_zip_path = '../input/carvana-image-masking-challenge/train_masks.zip'
test_zip_path = '../input/carvana-image-masking-challenge/test.zip'

if not Path('/kaggle/working/train').exists():
    with zipfile.ZipFile(train_zip_path,'r') as z:
        z.extractall('/kaggle/working')
if not Path('/kaggle/working/train_masks').exists():
    with zipfile.ZipFile(masks_zip_path,'r') as z:
        z.extractall('/kaggle/working')
if not Path('/kaggle/working/test').exists():
    pass
    # with zipfile.ZipFile(test_zip_path,'r') as z:
    #    z.extractall('/kaggle/working')

In [None]:
print("train set:  ", len(os.listdir("/kaggle/working/train")))
print("train masks:", len(os.listdir("/kaggle/working/train_masks")))

In [None]:
from glob import glob

root_dir = "/kaggle/working"
train_path = os.path.join(root_dir, "train")
train_masks_path = os.path.join(root_dir, "train_masks")
#test_path = os.path.join(root_dir, "test")

train_filepaths = glob(os.path.join(train_path, "*.jpg"))
train_masks_filepaths = glob(os.path.join(train_masks_path, "*.gif"))
#test_filepaths = glob(os.path.join(test_path, "*.jpg"))

# Get unique ids of images
def get_root_name(filepaths):
    file_names = [os.path.basename(filepath) for filepath in filepaths]
    root_name = [name.split("_")[0] for name in file_names]
    return root_name

all_train_ids = set(get_root_name(train_filepaths))
all_train_masks_ids = set(get_root_name(train_masks_filepaths))
#all_test_ids = set(get_root_name(test_filepaths))

In [None]:
def display_images():
    plt.figure(figsize=(15, 25))
    title = ['Input Image', 'Mask']

    for i in range(0, 4, 2):
        plt.subplot(5, 2, i+1)
        plt.title(title[0])
        path_img = root_dir + "/train/" + list(all_train_ids)[i] + f"_0{i+1}.jpg"
        plt.imshow(imread(path_img))
        plt.axis("off")

        plt.subplot(5, 2, i+2)
        plt.title(title[1])
        path_mask_img = root_dir + "/train_masks/" + list(all_train_ids)[i] + f"_0{i+1}_mask.gif"
        plt.imshow(imread(path_mask_img))
        plt.axis("off")
    plt.show()

display_images()

In [None]:
def get_image_id(path):
    return os.path.splitext(os.path.basename(path))[0]

df = pd.DataFrame(dict(image_path=train_filepaths))
df['image_id'] = df['image_path'].map(lambda path: get_image_id(path))
df['mask_path'] = df['image_path'].map(
    lambda x: x.replace('train', 'train_masks').replace('.jpg', '_mask.gif'))
df['car_id'] = df['image_id'].map(lambda img_id: img_id.split('_')[0])

In [None]:
df

In [None]:
from sklearn.model_selection import train_test_split

def split_data(ids, col="car_id"):
    train_ids, valid_ids = train_test_split(ids, random_state=42, test_size=.2)
    valid_ids, test_ids = train_test_split(valid_ids, random_state=42, test_size=.5)
    train_df = df[df[col].isin(train_ids)]
    valid_df = df[df[col].isin(valid_ids)]
    test_df = df[df[col].isin(test_ids)]
    return train_df, valid_df, test_df

train_df, valid_df, test_df = split_data(list(all_train_ids))
print("train_df: ", train_df.shape[0])
print("valid_df: ", valid_df.shape[0])
print("test_df:  ", test_df.shape[0])

In [None]:
from tensorflow.image import stateless_random_crop, stateless_random_brightness

IMG_SIZE = [512, 512]
rng = tf.random.Generator.from_seed(1)

def decode(path):
    img = tf.io.read_file(path) 
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, IMG_SIZE)
    img = img / 255.0
    return img

@tf.function
def preprocess(image_path, mask_path):
    image = decode(image_path)
    mask = decode(mask_path)
    mask = mask[:, :, :1] # take one channel
    return image, mask

@tf.function
def data_augmentation(image, mask):
    if rng.uniform(()) > 0.5: 
        image = tf.image.flip_left_right(image)
        mask = tf.image.flip_left_right(mask)

    seed = rng.make_seeds(2)[0]
    image = stateless_random_brightness(image, max_delta=0.1, seed=seed)
    return image, mask

def make_dataset(df, shuffle=False, augment=False, batch_size=16, buffer_size=1000):
    ds = tf.data.Dataset.from_tensor_slices((df["image_path"].values, df["mask_path"].values))
    ds = ds.map(preprocess, num_parallel_calls=5)
    if shuffle:
        ds = ds.shuffle(buffer_size)
    if augment:
        ds = ds.map(data_augmentation, num_parallel_calls=tf.data.AUTOTUNE)
    ds = ds.batch(batch_size)
    return ds.prefetch(1)

train_data = make_dataset(train_df, shuffle=True, augment=True)
valid_data = make_dataset(valid_df)
test_data = make_dataset(test_df)

In [None]:
train_data

**Deleting unused dataframe to free memory**

In [None]:
del df
del train_df
del valid_df
del test_df

In [None]:
import tensorflow as tf

In [None]:
IMG_SIZE = [512, 512]

# Building Model 

In [None]:
def upsample(filters, size, strides):
    """Upsample the input"""

    initializer = tf.random_normal_initializer(0., 0.02)

    result = Sequential()
    result.add(tf.keras.layers.Conv2DTranspose(filters, size, strides=strides,
                                      padding="same",
                                      kernel_initializer=initializer,
                                      use_bias=False))
    result.add(tf.keras.layers.BatchNormalization())
   
    result.add(tf.keras.layers.ReLU())
    return result

In [None]:
from tensorflow.keras.applications import VGG19

base_model = VGG19(input_shape=IMG_SIZE + [3], include_top=False, weights="imagenet")

layers_names = [
    "block2_conv1",    # 256x256
    "block2_conv2",    # 256x256
    "block3_conv1",    # 128x128
    "block3_conv2",    # 128x128
    "block4_conv1",    # 64x64
    "block4_conv2",    # 64x64
    "block5_conv1",    # 32x32
]

layers = [base_model.get_layer(name).output for name in layers_names]
down_stack = tf.keras.Model(inputs=base_model.input, outputs=layers)
down_stack.trainable = False


up_stack = [
    upsample(512, 3, 1),   # 32x32 -> 32x32
    upsample(512, 3, 2),   # 32x32 -> 64x64
    upsample(256, 3, 1),   # 64x64 -> 64x64 
    upsample(256, 3, 2),   # 64x64 -> 128x128
    upsample(128, 3, 1),   # 128x128 -> 128x128
    upsample(128, 3, 2),   # 128x128 -> 256x256
]     

In [None]:
def unet_generator(output_channels=1):
    inputs = tf.keras.layers.Input(shape=IMG_SIZE + [3])
    x = inputs

    initializer = tf.random_normal_initializer(0., 0.02)
    output = tf.keras.layers.Conv2DTranspose(
        output_channels, 3, strides=2, activation='sigmoid',
        padding="same", kernel_initializer=initializer
    )
    
    concat = tf.keras.layers.Concatenate()

    # Downsampling 
    skips = down_stack(x)
    x = skips[-1]
    skips = reversed(skips[:-1])

    # Upsampling and establishing the skip connection
    for up, skip in zip (up_stack, skips):
        x = up(x)
        if up.layers[0].strides == (2, 2):
            concat = tf.keras.layers.Concatenate()
            x = concat([x, skip])

    x = output(x)
    
    return tf.keras.Model(inputs=inputs, outputs=x)

model = unet_generator()


In [None]:
tf.keras.utils.plot_model(model, show_shapes=True)


**Model output before Before training**

In [None]:
for images, masks in train_data.take(2):
    for img, mask in zip(images, masks):
        sample_image = img
        sample_mask = mask
        break

In [None]:
def visualize(display_list):
    plt.figure(figsize=(15, 15))
    title = ['Input Image', 'True Mask', 'Predicted Mask']
    for i in range(len(display_list)):
        plt.subplot(1, len(display_list), i+1)
        plt.title(title[i])
        plt.imshow(tf.keras.preprocessing.image.array_to_img(display_list[i]))
        plt.axis('off')
    plt.show()

def show_predictions(sample_image, sample_mask):
    pred_mask = model.predict(sample_image[tf.newaxis, ...])
    pred_mask = pred_mask.reshape(IMG_SIZE[0],IMG_SIZE[1],1)
    visualize([sample_image, sample_mask, pred_mask])
    
show_predictions(sample_image, sample_mask)

In [None]:
early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=5,
                                                    restore_best_weights=True)


epochs = 1

class DisplayCallback(tf.keras.callbacks.Callback):
    def on_epoch_begin(self, epoch, logs=None):
        if (epoch + 1) % 3 == 0:
            show_predictions(sample_image, sample_mask)
    
model.compile(optimizer='adam',
              loss=tf.keras.losses.BinaryCrossentropy(),
              metrics=['accuracy'])
model_history = model.fit(train_data, epochs=epochs,
                          validation_data=valid_data,
                          callbacks=[DisplayCallback(), early_stopping_cb])

In [None]:
loss = model_history.history['loss']
val_loss = model_history.history['val_loss']

acc = model_history.history['accuracy']
val_acc = model_history.history['val_accuracy']

plt.figure(figsize=(20, 5))
plt.subplot(1, 2, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Binary Cross Entropy')
plt.legend()
plt.show()

In [None]:
for images, masks in test_data.take(1):
    for img, mask in zip(images, masks):
        show_predictions(img, mask)