In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/working'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Import libraries

In [None]:
import tensorflow as tf
import matplotlib.pyplot as plt
import keras.backend as K
from zipfile import ZipFile
from IPython.display import clear_output
from pathlib import Path
import zipfile

# Data loading and preprocessing

In [None]:
train_zip_path = "/kaggle/input/carvana-image-masking-challenge/train.zip"
with zipfile.ZipFile(train_zip_path, "r") as z_:
    z_.extractall("/kaggle/working")

In [None]:
masks_zip_path = "/kaggle/input/carvana-image-masking-challenge/train_masks.zip"
with zipfile.ZipFile(masks_zip_path, "r") as z_:
    z_.extractall("/kaggle/working")


In [None]:
print(len(os.listdir("/kaggle/working/train")))
print(len(os.listdir("/kaggle/working/train_masks")))

In [None]:
#Train dataframe

car_ids = []
paths = []

for dirname, _, filenames in os.walk("/kaggle/working/train"):
    for filename in filenames:
        path = os.path.join(dirname, filename)
        paths.append(path)
        
        car_id = filename.split(".")[0]
        car_ids.append(car_id)
        
df = pd.DataFrame({"id": car_ids, "car_path": paths})
df = df.set_index("id")
df

In [None]:
#Train_mask dataframe

car_ids = []
mask_path = []

for dirname, _,filenames in os.walk("/kaggle/working/train_masks"):
    for filename in filenames:
        path = os.path.join(dirname, filename)
        mask_path.append(path)
        
        car_id = filename.split(".")[0]
        car_id = car_id.split("_mask")[0]
        car_ids.append(car_id)
        
        
mask_df = pd.DataFrame({"id": car_ids, "mask_path": mask_path})
mask_df = mask_df.set_index("id")
mask_df

In [None]:
df["mask_path"] = mask_df["mask_path"]
df = df.reset_index(drop=True)
df

In [None]:
#data augmentation function

image_size = [256, 256]
OUTPUT_CHANNELS = 3

def augmentation(input_image, mask_image):
    
    
    if tf.random.uniform(()) > 0.5:
        input_image = tf.image.flip_left_right(input_image)
        mask_image = tf.image.flip_left_right(mask_image)
    
    return input_image, mask_image

In [None]:
#Preprocessing function

def preprocess(car_path, mask_path):
    input_image = tf.io.read_file(car_path)
    input_image = tf.image.decode_jpeg(input_image, channels=OUTPUT_CHANNELS)
    input_image = tf.image.resize(input_image, image_size)
    input_image = tf.cast(input_image, tf.float32) / 255.0

    
    mask_image = tf.io.read_file(mask_path)
    mask_image = tf.image.decode_jpeg(mask_image, channels=OUTPUT_CHANNELS)
    mask_image = tf.image.resize(mask_image, image_size)
    mask_image = mask_image[:, :, :1]
    mask_image = tf.math.sign(mask_image)
    
    input_image, mask_image = augmentation(input_image, mask_image)
    
    return input_image, mask_image

In [None]:
#create_dataset function

def create_dataset(df, train = False):
    if not train:
        ds = tf.data.Dataset.from_tensor_slices((df["car_path"].values, df["mask_path"].values))
        ds = ds.map(preprocess, num_parallel_calls=tf.data.AUTOTUNE)
        
    else:
        ds = tf.data.Dataset.from_tensor_slices((df["car_path"].values, df["mask_path"].values))
        ds = ds.map(preprocess, num_parallel_calls=tf.data.AUTOTUNE)
        ds = ds.map(augmentation, num_parallel_calls=tf.data.AUTOTUNE)
        
    return ds

In [None]:
#Data split

from sklearn.model_selection import train_test_split

train_df, valid_df = train_test_split(df, random_state=42, test_size=0.25)
train = create_dataset(train_df, train=True)
valid = create_dataset(valid_df)

In [None]:
TRAIN_LENGTH = len(train_df)
BATCH_SIZE = 16
BUFFER_SIZE = 1000

In [None]:
#train and validation dataset

train_dataset = train.cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat()
train_dataset = train_dataset.prefetch(buffer_size=tf.data.AUTOTUNE)
valid_dataset = valid.batch(BATCH_SIZE)

In [None]:
#Take a look before data training

def display(display_list):
    plt.figure(figsize=(15,15))
    
    title = ["Input image", "True mask", "Predicted_mask"]
    
    for i in range(len(display_list)):
        plt.subplot(1, len(display_list), i + 1)
        plt.title(title[i])           
        plt.imshow(tf.keras.preprocessing.image.array_to_img(display_list[i]))
        plt.axis("off")
                   
    plt.show()

In [None]:
for image, mask in train.take(1):
    sample_image, sample_mask = image, mask
    display([sample_image, sample_mask])

# Model building

In [None]:
#Base model

base_model = tf.keras.applications.MobileNetV2(input_shape=[256, 256, 3], include_top=False)

layer_names = [
    "block_1_expand_relu",
    "block_3_expand_relu",
    "block_6_expand_relu",
    "block_13_expand_relu",
    "block_16_project",
]

In [None]:
#Encoder

model_base_output = [base_model.get_layer(name).output for name in layer_names]

down_stack = tf.keras.Model(inputs=base_model.input, outputs=model_base_output)

down_stack.trainable = False

In [None]:
#Decoder function

def upsample(filters, size, apply_dropout=False):
    initializer = tf.random_normal_initializer(0., 0.02)
    
    result = tf.keras.Sequential()
    result.add(
    tf.keras.layers.Conv2DTranspose(filters, size, strides=2, padding="same", kernel_initializer=initializer, use_bias=False))
    
    result.add(tf.keras.layers.BatchNormalization())
    
    if apply_dropout:
        result.add(tf.keras.layers.Dropout(0.5))
        
    result.add(tf.keras.layers.ReLU())

    return result

In [None]:
#Decoder

up_stack = [
    upsample(512, 3),
    upsample(256, 3),
    upsample(128, 3),
    upsample(64, 3)
] 

In [None]:
#unet_model function

def unet_model(output_channels):
    inputs = tf.keras.layers.Input(shape=[256, 256, 3])
    
    skips = down_stack(inputs)
    x = skips[-1]
    skips = reversed(skips[:-1])
    
    for up, skip, in zip(up_stack, skips):
        x = up(x)
        concat = tf.keras.layers.Concatenate()
        x = concat([x, skip])
        
    last = tf.keras.layers.Conv2DTranspose(output_channels, 3, strides=2, padding="same")
    x = last(x)
    
    return tf.keras.Model(inputs=inputs, outputs=x)

In [None]:
#Create model

model = unet_model(OUTPUT_CHANNELS)

model.compile(optimizer="adam",
             loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
             metrics=["accuracy"])

In [None]:
#Model architecture

tf.keras.utils.plot_model(model, show_shapes=True)

In [None]:
#Let's test the model to see what it predicts before training

def create_mask(pred_mask):
    pred_mask = tf.argmax(pred_mask, axis=-1)
    pred_mask = pred_mask[..., tf.newaxis]
    return pred_mask[0]

In [None]:
def show_predictions(train_dataset=None, num=1):
    if train_dataset:
        for image, mask in train_dataset.take(num):
            pred_mask = model.predict(image)
            display([image[0], mask[0], create_mask(pred_mask)])
            
    else:
        display([sample_image, sample_mask, create_mask(model.predict(sample_image[tf.newaxis, ...]))])
            
show_predictions()

In [None]:
model.summary()

In [None]:
#Calllback function

from IPython.display import clear_output

class DisplayCallback(tf.keras.callbacks.Callback):
    def on_epoch_begin(self, epoch, logs=None):
        clear_output(wait=True)
        show_predictions()
        print("\nSample Predictions after epoch {}\n".format(epoch+1))

# Training

In [None]:
EPOCHS = 5
STEPS_PER_EPOCH= TRAIN_LENGTH // BATCH_SIZE

model_history = model.fit(train_dataset, epochs=EPOCHS,
                          steps_per_epoch=STEPS_PER_EPOCH,
                          validation_data=valid_dataset,
                          callbacks=[DisplayCallback()])