## Functions to load and read input data

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
#importng all the required libraries
import tensorflow as tf
from zipfile import ZipFile 
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import os

In [None]:
#extracting all the contents of zipped file which contains all the train images
train_zip = "/kaggle/input/carvana-image-masking-challenge/train.zip"#address of the input file 
with ZipFile(train_zip, 'r') as zip_: # reading everything in the train folder and extrcting it in output directory
    zip_.extractall('/kaggle/working')

In [None]:
#extracting all the contents of zipped file which contains all the masks of train images
train_mask_zip = "/kaggle/input/carvana-image-masking-challenge/train_masks.zip"
with ZipFile(train_mask_zip, 'r') as zip_: 
    zip_.extractall('/kaggle/working')

In [None]:
print("Train set:  ", len(os.listdir("/kaggle/working/train")))#length of folder extracted above in output
print("Train masks:", len(os.listdir("/kaggle/working/train_masks")))#length of folder extracted above in output

In [None]:
car_ids = []
paths = []
for dirname, _, filenames in os.walk('/kaggle/working/train'):
    for filename in filenames:#images in train folder
        path = os.path.join(dirname, filename)    
        paths.append(path)#images address
        
        car_id = filename.split(".")[0]
        car_ids.append(car_id) #id of train images

d = {"id": car_ids, "car_path": paths}
df = pd.DataFrame(data = d)
df = df.set_index('id') # df containing path to each training image and image unique name as index
df

In [None]:
car_ids = []
mask_path = []
for dirname, _, filenames in os.walk('/kaggle/working/train_masks'):
    for filename in filenames: #contents of train_mask folder
        path = os.path.join(dirname, filename)
        mask_path.append(path)  #
        
        car_id = filename.split(".")[0]
        car_id = car_id.split("_mask")[0]
        car_ids.append(car_id)

        
d = {"id": car_ids,"mask_path": mask_path}
mask_df = pd.DataFrame(data = d)
mask_df = mask_df.set_index('id') #containg path to masks of train data and unique id if images as index 
mask_df

In [None]:
mask_df["mask_path"][0]

In [None]:
df["mask_path"] = mask_df["mask_path"]#putting image path and mask path in a single dataframe
df

## functions for preprocessing of data to make it ready for training models

In [None]:
img_size = [64,64]

def data_augmentation(car_img, mask_img):# few data augmentation methods

    if tf.random.uniform(()) > 0.5:#generatin a random condition wih random number between 0 and 1 
        car_img = tf.image.flip_left_right(car_img)#flipping image from left to right
        mask_img = tf.image.flip_left_right(mask_img)#flipping image masks from left to right

    return car_img, mask_img

def preprocessing(car_path, mask_path):
    car_img = tf.io.read_file(car_path) #reading train image path
    car_img = tf.image.decode_jpeg(car_img, channels=3) #coverting from scalar string tensor to  3d uint8
    car_img = tf.image.resize(car_img, img_size) #resizing it ti use it more conviniently 
    car_img = tf.cast(car_img, tf.float32) / 255.0 # normalizing the pixel values between 0 to 1
    
    mask_img = tf.io.read_file(mask_path)#reading mask path
    mask_img = tf.image.decode_jpeg(mask_img, channels=3)
    mask_img = tf.image.resize(mask_img, img_size)
    mask_img = mask_img[:,:,:1]  #taking use of only one channel  
    mask_img = tf.math.sign(mask_img)
    
    
    return car_img, mask_img
def create_dataset(df, train = False):
    if not train:
        ds = tf.data.Dataset.from_tensor_slices((df["car_path"].values, df["mask_path"].values))#taking path as object
        ds = ds.map(preprocessing, tf.data.AUTOTUNE) #mapping masks to preproessing function
    else:
        ds = tf.data.Dataset.from_tensor_slices((df["car_path"].values, df["mask_path"].values))
        ds = ds.map(preprocessing, tf.data.AUTOTUNE) # mapping images to preprocessing function
        ds = ds.map(data_augmentation, tf.data.AUTOTUNE)# mapping images to data_augmemtation function

    return ds

In [None]:
df.columns

In [None]:
sh = plt.imread(df["mask_path"][0]).shape # real shape of mask 
mask_h = sh[0]#real height of mask images
mask_w = sh[1]#real width of mask images

In [None]:
mask_h 

In [None]:
train_df, valid_df = train_test_split(df, random_state=42, test_size=.25)# splitting given labelled data into train and valid 
train = create_dataset(train_df, train = True)# train dataset
valid = create_dataset(valid_df)# valid dataset

In [None]:
TRAIN_LENGTH = len(train_df)#length if train dataset
BATCH_SIZE = 16 # batch size to train images 
BUFFER_SIZE = 1000 #

In [None]:
train_dataset = train.cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat() # shuffing and batching records
train_dataset = train_dataset.prefetch(buffer_size=tf.data.AUTOTUNE) #fetching data to train
valid_dataset = valid.batch(BATCH_SIZE)

In [None]:
def display(display_list): # function for visualizing  images
    plt.figure(figsize=(10, 10)) #size of plot

    title = ['Input Image', 'True Mask', 'Predicted Mask'] #possible images

    for i in range(len(display_list)): 
        plt.subplot(1, len(display_list), i+1) #image, true_mask, predicted_mask
        plt.title(title[i])
        plt.imshow(tf.keras.preprocessing.image.array_to_img(display_list[i])) #function to show images
        plt.axis('off')
    plt.show()

In [None]:
for i in range(5):
   for image, mask in train.take(i): #taking i records
        sample_image, sample_mask = image, mask
        print(sample_image.shape)
        display([sample_image, sample_mask]) #visualizing using above function

## creating model

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Dropout 
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Conv2DTranspose
from tensorflow.keras.layers import concatenate
from tensorflow.keras.losses import binary_crossentropy
from sklearn.model_selection import train_test_split

UNet model is particularly good at image segmentation and we are going to make it from scratch.
we have to subpart it in two parts. Encoder which will be used for downsampling and decoder will be used for upsampling.

The encoder block follows a pattern of using convolution layer two times on the input data and then use maxpooling layer to decrease parameters and computational cost.
We can use batch normalization in between layers to regularize their input 

In [None]:

def EncoderMiniBlock(inputs, n_filters=32, dropout_prob=0.3, max_pooling=True):
    """
    This block uses multiple convolution layers, max pool, relu activation to create an architecture for learning. 
    Dropout can be added for regularization to prevent overfitting. 
    The block returns the activation values for next layer along with a skip connection which will be used in the decoder
    """
    # Add 2 Conv Layers with relu activation and HeNormal initialization using TensorFlow 
    # Proper initialization prevents from the problem of exploding and vanishing gradients 
    # 'Same' padding will pad the input to conv layer such that the output has the same height and width (hence, is not reduced in size) 
    conv = Conv2D(n_filters, 
                  3,   # Kernel size   
                  activation='relu',
                  padding='same',
                  kernel_initializer='HeNormal')(inputs)
    conv = Conv2D(n_filters, 
                  3,   # Kernel size
                  activation='relu',
                  padding='same',
                  kernel_initializer='HeNormal')(conv)
    
    # Batch Normalization will normalize the output of the last layer based on the batch's mean and standard deviation
    conv = BatchNormalization()(conv, training=False)

    # In case of overfitting, dropout will regularize the loss and gradient computation to shrink the influence of weights on output
    if dropout_prob > 0:     
        conv = tf.keras.layers.Dropout(dropout_prob)(conv)

    # Pooling reduces the size of the image while keeping the number of channels same
    # Pooling has been kept as optional as the last encoder layer does not use pooling (hence, makes the encoder block flexible to use)
    # Below, Max pooling considers the maximum of the input slice for output computation and uses stride of 2 to traverse across input image
    if max_pooling:
        next_layer = tf.keras.layers.MaxPooling2D(pool_size = (2,2))(conv)    
    else:
        next_layer = conv

    # skip connection (without max pooling) will be input to the decoder layer to prevent information loss during transpose convolutions      
    skip_connection = conv
    
    return next_layer, skip_connection



Using decoder block to upsample images by using trans convolution on the provided input and merging it with corresponding downsampled layer then applying convolution two times.  

In [None]:
def DecoderMiniBlock(prev_layer_input, skip_layer_input, n_filters=32):
    """
    Decoder Block first uses transpose convolution to upscale the image to a bigger size and then,
    merges the result with skip layer results from encoder block
    Adding 2 convolutions with 'same' padding helps further increase the depth of the network for better predictions
    The function returns the decoded layer output
    """
    # Start with a transpose convolution layer to first increase the size of the image
    up = Conv2DTranspose(
                 n_filters,
                 (3,3),    # Kernel size
                 strides=(2,2),
                 padding='same')(prev_layer_input)

    # Merge the skip connection from previous block to prevent information loss
    merge = concatenate([up, skip_layer_input], axis=3)
    
    # Add 2 Conv Layers with relu activation and HeNormal initialization for further processing
    # The parameters for the function are similar to encoder
    conv = Conv2D(n_filters, 
                 3,     # Kernel size
                 activation='relu',
                 padding='same',
                 kernel_initializer='HeNormal')(merge)
    conv = Conv2D(n_filters,
                 3,   # Kernel size
                 activation='relu',
                 padding='same',
                 kernel_initializer='HeNormal')(conv)
    return conv

now that we have coded basic blocks, we need to compile unet, that is appropriate for our dataset

In [None]:
#3.3 - Compile U-Net Blocks
def UNetCompiled(input_size=(128, 128, 3), n_filters=16, n_classes=1):
    inputs = Input(input_size)
    
    # Encoder includes multiple convolutional mini blocks with different maxpooling, dropout and filter parameters
    # Observe that the filters are increasing as we go deeper into the network which will increasse the # channels of the image 
    cblock1 = EncoderMiniBlock(inputs, n_filters,dropout_prob=0, max_pooling=True)
    cblock2 = EncoderMiniBlock(cblock1[0],n_filters*2,dropout_prob=0, max_pooling=True)
    cblock3 = EncoderMiniBlock(cblock2[0], n_filters*4,dropout_prob=0, max_pooling=True)
    cblock4 = EncoderMiniBlock(cblock3[0], n_filters*8,dropout_prob=0.3, max_pooling=True)
    cblock5 = EncoderMiniBlock(cblock4[0], n_filters*16, dropout_prob=0.3, max_pooling=False) 
    
    # Decoder includes multiple mini blocks with decreasing number of filters
    # Observe the skip connections from the encoder are given as input to the decoder
    # Recall the 2nd output of encoder block was skip connection, hence cblockn[1] is used
    ublock6 = DecoderMiniBlock(cblock5[0], cblock4[1],  n_filters * 8)
    ublock7 = DecoderMiniBlock(ublock6, cblock3[1],  n_filters * 4)
    ublock8 = DecoderMiniBlock(ublock7, cblock2[1],  n_filters * 2)
    ublock9 = DecoderMiniBlock(ublock8, cblock1[1],  n_filters)
 
    # Complete the model with 1 3x3 convolution layer (Same as the prev Conv Layers)
    # Followed by a 1x1 Conv layer to get the image to the desired size. 
    # Observe the number of channels will be equal to number of output classes
    conv9 = Conv2D(n_filters,
                 3,
                 activation='relu',
                 padding='same',
                 kernel_initializer='he_normal')(ublock9)

    conv10 = Conv2D(n_classes, 1, padding='same')(conv9)
    
    # Define the model
    model = tf.keras.Model(inputs=inputs, outputs=conv10)

    return model



In [None]:
#compiling unet model
unet = UNetCompiled(input_size=(64,64,3), n_filters=16, n_classes=1)


In [None]:
from keras.losses import binary_crossentropy
import keras.backend as K

#metric and loss function to be used for training model
def dice_coeff(y_true, y_pred):
    smooth = 1.
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    score = (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)
    return score


def dice_loss(y_true, y_pred):
    loss = 1 - dice_coeff(y_true, y_pred)
    return loss


def bce_dice_loss(y_true, y_pred):
    loss = binary_crossentropy(y_true, y_pred) + dice_loss(y_true, y_pred)
    return loss

In [None]:
unet.compile(optimizer=tf.keras.optimizers.Adam(), 
             loss=tf.keras.losses.BinaryCrossentropy(),
              metrics=['accuracy'])

In [None]:
STEPS_PER_EPOCH = TRAIN_LENGTH // BATCH_SIZE

In [None]:
#callbacks to apply when training the model
#early stopping if performance does not improve within given patience parameter
#model checkpoint for saving weights of the model which las performed better 
# reducing learning rate in order to find the optimal solution 
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau

In [None]:
callbacks = [
    EarlyStopping(patience=10, verbose=1),
    ReduceLROnPlateau(factor=0.1, patience=5, min_lr=0.00001, verbose=1),
    ModelCheckpoint('model-tgs-salt.h5', verbose=1, save_best_only=True, save_weights_only=True)
]

In [None]:
hist = unet.fit(train_dataset, batch_size=16, epochs=15,steps_per_epoch=STEPS_PER_EPOCH,callbacks = callbacks, validation_data=valid_dataset)

In [None]:
#loading the weight of model which last performed  best
unet.load_weights("./model-tgs-salt.h5")

In [None]:
#delete train_df,valid_df, train, valid, df, mask_df to free up memory 
import gc
#del train_df
gc.collect()

In [None]:
del mask_df
gc.collect()

In [None]:
del df
gc.collect()

In [None]:
del train
gc.collect()

In [None]:
del valid
gc.collect()

In [None]:
del valid_df
gc.collect()

In [None]:
def vis_compare(dataset=valid_dataset,num_case=1):
       
    for sample in dataset.take(1):
        image, label = sample[0].numpy(), sample[1].numpy()
        print(image.shape)
        print(label.shape)
    preds=unet.predict(image)#predicting mask of valid dataset 
    preds = np.squeeze(preds, axis =-1)
    print(preds.shape)
    if num_case>1:
        cases=[j for j in np.random.choice(image.shape[0],size=num_case,replace=False)]   #choosing random images
        for i in cases:
            truth=(image[i],label[i])
            pred=(image[i],preds[i])
            print(f"case_number_{i}")
            fig, arr = plt.subplots(1, 3, figsize=(15, 15))
            arr[0].imshow(image[i])
            arr[0].set_title('Processed Image')
            arr[1].imshow(label[i])
            arr[1].set_title('Actual Masked Image ')
            arr[2].imshow(preds[i])
            arr[2].set_title('Predicted Masked Image ')
    else:
        truth=(image[0],label[0])
        pred=(image[0],preds[0])
        display([image[0],label[0],preds[0]])
            
    
    
    plt.show()

In [None]:
vis_compare(dataset=valid_dataset,num_case=2)

In [None]:
#for test data
test_zip = "/kaggle/input/carvana-image-masking-challenge/test.zip"#path for test data
with ZipFile(test_zip, 'r') as zip_: 
    zip_.extractall('/kaggle/working')
    

In [None]:
car_ids = []
paths = []
for dirname, _, filenames in os.walk('/kaggle/working/test'):
    for filename in filenames:
        path = os.path.join(dirname, filename)    
        paths.append(path)
        
        car_id = filename.split(".")[0]
        car_ids.append(car_id)

d = {"id": car_ids, "car_path": paths}
df = pd.DataFrame(data = d)
df = df.set_index('id')
df

In [None]:
img_size = [64,64]


def preprocessing(car_path): #preprocessing test data as did to train data
    car_img = tf.io.read_file(car_path) 
    car_img = tf.image.decode_jpeg(car_img, channels=3)
    car_img = tf.image.resize(car_img, img_size)
    car_img = tf.cast(car_img, tf.float32) / 255.0
    return car_img
def create_dataset(df, train = False): #dataset for input of model
    if not train:
        ds = tf.data.Dataset.from_tensor_slices((df["car_path"].values))
        ds = ds.map(preprocessing, tf.data.AUTOTUNE)

    return ds
test = create_dataset(df)




In [None]:
BATCH_SIZE = 16
test_dataset = test.batch(BATCH_SIZE)

In [None]:
len(df)

In [None]:
#visualizing test images
for i in range(5):
   for image  in test.take(i):
        print(image.shape)
        print(i)
        sample_image= image
        display([sample_image])

In [None]:
preds = unet.predict(test_dataset)

In [None]:
 preds.shape

In [None]:
from tqdm import tqdm
import cv2

In [None]:
 def run_length_encode(mask): #function to change the predicted masks into wanted submission type according to instructions  
    '''
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    '''
    inds = mask.flatten()#making it scalar
    runs = np.where(inds[1:] != inds[:-1])[0] + 2
    runs[1::2] = runs[1::2] - runs[:-1:2]
    rle = ' '.join([str(r) for r in runs])
    return rle


rles = []   

#preds_o=preds.max(axis=3)

preds = np.squeeze(preds, axis=3)
for pred in preds:
    prob = cv2.resize(pred, (mask_w, mask_h))
    mask = prob > 0.5
    rle = run_length_encode(mask)
    rles.append(rle)

print("Generating submission file...")


In [None]:
len(rles)

In [None]:
dfs = pd.DataFrame({'img': df.index.values + ".jpg", 'rle_mask': rles})#creating dataframe of image id and predicted masks as columns 


In [None]:
dfs.to_csv('submission_6.csv.gz', index=False, compression ='gzip' )#converting to compressed csv file 

In [None]:
pd.read_csv("./submission_6.csv.gz")