In [None]:
"""# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session"""

# Loading in the necessary libraries

In [None]:
# Loading the dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns
import os
import random
import cv2

## Loading the data

In [None]:
# Loading csv file
df = pd.read_csv("../input/understanding_cloud_organization/train.csv")

In [None]:
df.head()

# Exploring the data

In [None]:
# Exploring the data
# But this is the total number of labels that
# can be assigned to whole of the dataset
print(f"The number of data points {len(df)}")

In [None]:
# Null values in each of the columns
df.isna().sum().plot(kind="bar")

In [None]:
# Percentage of null values
size = [len(df)-df.EncodedPixels.count(),df.EncodedPixels.count()]
plt.figure(figsize=(8,8))
plt.pie(size, labels=["empty","Non-empty"],explode=(0,0.1), autopct="%1.1f")
plt.title("Null value percentage")  

#### Replacing the null values with the -1

In [None]:
# Replacing the nan values with 0s
df["EncodedPixels"] = df['EncodedPixels'].fillna(-1)

In [None]:
# creating a new columns with label
df["Label"] = df["Image_Label"].apply(lambda x: x.split("_")[1])
df.head()

In [None]:
# Creating an new feature with just the image names
df["Image_name"] = df["Image_Label"].apply(lambda x: x.split("_")[0])
#df.drop("Image_Label",axis=1,inplace=True)
df.head()

In [None]:
# Lets check the number of each corresponding labels
def check_num(label):
  return df[(df["Label"]==label) & (df["EncodedPixels"]!=-1)]["EncodedPixels"].count()

values = {}
for i in df.Label.unique():
  values[i] = check_num(i)

print(values)
plt.title("Number of each classes")
pd.Series(values).plot(kind="bar")

In [None]:
# Creating a dataframe of images and classes in each images
def dummy_var(label):
  values = []
  df_temp = df[df["Label"]==label]
  df_temp["Dummy"] = df_temp["EncodedPixels"].apply(lambda x: 1 if x!=-1 else 0)
  return list(df_temp["Dummy"])

df_images = pd.DataFrame()
df_images["Image"] = df["Image_name"].unique()
for i in df["Label"].unique():
  df_images[i] = dummy_var(i)

df_images.head()

In [None]:
# Number of images available for us
print(f"Number of images: {len(df_images)}")

## Number of maps per image

In [None]:
# Number of detections per images
df_images["Total"] = df_images["Fish"]+df_images["Flower"]+df_images["Gravel"]+df_images["Sugar"]
plt.title("Number of labels per image")
sns.countplot(df_images["Total"])

# Creating a new df

In [None]:
# Create one column for each mask
train_df = pd.pivot_table(df, index=['Image_name'], values=['EncodedPixels'], columns=['Label'], aggfunc=np.min).reset_index()
train_df.columns = ['image', 'Fish_mask', 'Flower_mask', 'Gravel_mask', 'Sugar_mask']

train_df.head()

# Exploring the segmentation masks

In [None]:
# dimenesions of image 
width = 2100
height = 1400

### Function to decode the segmentation maps

In [None]:
# Function to decode the encoded pixels
def decode_pixels(pix, rows=2100, cols=1400,label=255):
  # coverting the string into a list of numbers
  rle_numbers = [int(num_string) for num_string in pix.split(' ')]
  # Coverting them into starting index and length pairs
  rle_pairs = np.array(rle_numbers).reshape(-1,2)
  # Creating a blank image in form of a single row array
  img = np.zeros(rows*cols, dtype=np.uint8)

  # Setting the segmented pixels in the img
  for ind, length in rle_pairs:
    ind -= 1
    img[ind:ind+length] = label
  img = img.reshape(rows,cols)
  img = img.T
  return img


In [None]:
# Testing the function out
seg = decode_pixels(df["EncodedPixels"][4])
seg = cv2.resize(seg, (1050,700))
plt.imshow(seg)

## Displaying some random segment maps

In [None]:
# Sample of the segment regions
plt.figure(figsize=(15,8))
j = 0
for i in range(6):
  plt.subplot(2,3,i+1)
  while True:
    if df["EncodedPixels"][j]!=-1:
      break
    j+=1
  plt.imshow(decode_pixels(df["EncodedPixels"][j]))
  plt.title(df["Image_name"][j]+"_"+df["Label"][j])
  j+=1
  plt.xticks([])
  plt.yticks([])
plt.show()

# Loading and displaying satellite images

In [None]:
# location of img directory
img_dir = "../input/understanding_cloud_organization/train_images"

In [None]:
# Seeing the cloumns of train.csv
df.columns

In [None]:
# Looking at any one image name
df["Image_name"].unique()[10]

In [None]:
# Displaying one image
path = os.path.join(img_dir,df["Image_name"][0])
img = cv2.imread(path,1)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
plt.imshow(img)

### Displaying some random images

In [None]:
# Displaying sample images
imgs = df["Image_name"].unique()[:6]
plt.figure(figsize=(15,8))
for i in range(len(imgs)):
  plt.subplot(2,3,i+1)
  path = os.path.join(img_dir,imgs[i])
  img = cv2.imread(path,1)
  plt.title(imgs[i])
  plt.imshow(cv2.cvtColor(img,cv2.COLOR_BGR2RGB))
plt.show()

# Visualizing the mask overlaid images

In [None]:
# Sample image with masks overlayed
plt.figure(figsize=(15,8))
j = 0
for i in range(6):
  plt.subplot(2,3,i+1)
  while True:
    if df["EncodedPixels"][j]!=-1:
      break
    j+=1
  seg = decode_pixels(df["EncodedPixels"][j])
  path = os.path.join(img_dir,df["Image_name"][j])
  img = cv2.imread(path,0)
  dest = cv2.addWeighted(img, 0.8, seg, 0.4, 0.0)
  plt.imshow(dest)
  plt.title(df["Image_name"][j])
  j+=1
  plt.xticks([])
  plt.yticks([])
plt.show()

# Preprocessing the data

In [None]:
# Function to preprocess the data 
BASE_DIR = "../input/understanding_cloud_organization/train_images"
labels = list(df["Label"].unique())
def preprocess(df):
  data = []
  for i in range(len(df)):
    if i % 100 == 0:
      print(f"{i} completed")
    path = os.path.join(BASE_DIR,df["image"][i])
    img_arr = cv2.imread(path,1)
    img_arr = cv2.resize(img_arr,(480,384))
    channels = []
    for j in df.columns[1:]:
      #print(type(df[j][i]),j,i)
      if type(df[j][i]) is not str:
            arr = np.zeros(384*480, dtype=np.uint8)
            arr = arr.reshape(480,384)
            #img = img.T
            channels.append(arr.T)
            continue
      arr = decode_pixels(df[j][i],label=1)
      arr = cv2.resize(arr,(480,384))
      channels.append(arr)

    data.append([img_arr/255,np.dstack(channels)])
  return data

In [None]:
# Sanity check on the output
data = preprocess(train_df[:5])
len(data)

In [None]:
# Checking the shape of image
data[1][0].shape

In [None]:
# Checking the shape of segmentation mask
data[1][1].shape

# Creating a custom generator for batch learning

In [None]:
img_dir = "../input/understanding_cloud_organization/train_images"
masks_dir = "./masks"

In [None]:
# utility function for Data Generators
BASE_DIR = "../input/understanding_cloud_organization/train_images"
labels = list(df["Label"].unique())

def preprocess1(df):
    # To store the data
    data = []
    # Iterating through each of the rows in the dataframe
    for i in range(len(df)):
        # Getting the path of the image
        path = os.path.join(BASE_DIR,df.iloc[i]["image"])
        # Reading in the image
        img_arr = cv2.imread(path,1)
        # Resizing it to the proper size
        img_arr = cv2.resize(img_arr,(480,384))
        # To store the differnt segmentation maps
        channels = []
        # Getting the differnt segmentation maps
        for j in df.columns[1:]:
          # making an empty map if the image doesn't contain a label
          if type(df.iloc[i][j]) is not str:
                arr = np.zeros(384*480, dtype=np.uint8)
                arr = arr.reshape(480,384)
                channels.append(arr.T)
                continue
          # Creating the segmentation map
          arr = decode_pixels(df.iloc[i][j],label=1)
          # Resizing it to proper size
          arr = cv2.resize(arr,(480,384))
          channels.append(arr)
        # Adding to the data list as [image, output seg map]
        data.append([img_arr/255,np.dstack(channels)])
    # Spliting the data into input and output
    imgs = []
    masks = []
    for i, j in data:
        imgs.append(i)
        masks.append(j)

    return np.array(imgs), np.array(masks).astype(np.float)

### Custom data generators

In [None]:
# Creating a custom Data Generator
def data_gen(img_folder, df, batch_size):
    
    c = 0 
    n = list(df["image"])
    while True:
        c1 = c+batch_size
        
        if c1 > len(df):
            c1 = len(df)
        imgs, masks = preprocess1(df.iloc[c:c1])
        c = c1
        if c1 >= len(df):
            c = 0
        if imgs.shape == (batch_size,384, 480, 3) and masks.shape == (batch_size,384, 480, 4):
            yield imgs, masks
        else:
            continue
    

In [None]:
# Sample check to see if the code is working
this = data_gen(img_folder=img_dir, df=train_df, batch_size=8)

In [None]:
# Checking
k = 0
for i,j in this:
    if k == 5:
        break
    print(i.shape,j.shape)
    k+=1
    

# Model building

In [None]:
# Installing the segmentation_models library
! pip install segmentation_models

In [None]:
# Loading the dependencies
import tensorflow as tf
import segmentation_models as sm
import glob
import cv2
import os
import numpy as np
import matplotlib.pyplot as plt
import keras

from tensorflow.keras.utils import normalize
from keras.metrics import MeanIoU

sm.set_framework('tf.keras')

sm.framework()

# Setting up the Hyperparamters

In [None]:
BACKBONE = 'efficientnetb5'
LEARNING_RATE = 0.002
HEIGHT = 384
WIDTH = 480
CHANNELS = 3
N_CLASSES = 4
ES_PATIENCE = 10
RLROP_PATIENCE = 3
DECAY = 0.0001
DECAY_DROP = 0.2
model_path = f'uNet_%s_%sx%s_lr{LEARNING_RATE}.h5' % (BACKBONE, HEIGHT, WIDTH)

In [None]:
# Setting up the optimizer
optim = tf.keras.optimizers.Adam(LEARNING_RATE)

# Setting up the metrics
metrics = [sm.metrics.IOUScore(threshold=0.50),sm.metrics.FScore(threshold=0.5)]

# Setting up the Callbacks
checkpoint = tf.keras.callbacks.ModelCheckpoint(model_path, monitor='val_loss', mode='min', save_best_only=True, save_weights_only=True)
es = tf.keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', patience=ES_PATIENCE, restore_best_weights=True, verbose=1)
rlrop = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', mode='min', patience=RLROP_PATIENCE, factor=DECAY_DROP, min_lr=1e-6, verbose=1)

# Final list of call backs
callback_list = [checkpoint, es, rlrop]

# Defining the model architecture

In [None]:
# Defining model
model = sm.Unet(backbone_name=BACKBONE, 
                encoder_weights='imagenet',
                classes=N_CLASSES,
                activation='sigmoid', encoder_freeze=True,
                input_shape=(HEIGHT, WIDTH, CHANNELS))

# Compiling the model
model.compile(optimizer=optim, loss=sm.losses.bce_dice_loss, metrics=metrics)

# Model summary
model.summary()

# Setting the training and testing data

In [None]:
# Total number of images
print(f"Total size of data {len(train_df)}")

### Splitting the data into train and test

In [None]:
# Training and Testing Data
train = train_df.iloc[0:4500]
test = train_df.iloc[4500:5000]
batch_size = 8

### Creating the data generators for batch learning

In [None]:
# Data Generators
Train_data_generator = data_gen(img_folder=img_dir, df=train, batch_size=batch_size)
Validation_data_generator = data_gen(img_folder=img_dir, df=test, batch_size=batch_size)

# Training the model

In [None]:
history = model.fit(Train_data_generator,epochs=40,
                             steps_per_epoch=(4500//batch_size),
                             validation_data=Validation_data_generator,
                             validation_steps=(500//batch_size),
                             callbacks=callback_list)

# Evaluating the model on test data

In [None]:
res = model.evaluate(Validation_data_generator,steps=500//16)
print(f"Loss:{res[0]}")
print(f"IoU:{res[1]}")
print(f"F1:{res[2]}")   

# Saving the best Model

In [None]:
model.save(f"clouds_efficientnetb5_iouscore-{str(res[1])[:5]}.h5")

# Visualising the training of model

In [None]:
# All the curves together
df_res = pd.DataFrame(history.history)
plt.title("Model Performance")
plt.plot(df_res)
plt.show()

In [None]:
# Looking at the columns of the result df
df_res.columns

## Visualizing each curve separately

In [None]:
# Each of the learning curves for the model has been displayed separately
colors = "bgrcy"
plt.figure(figsize=(15,15))
for i in range(len(df_res.columns)):
    plt.subplot(4,2,i+1)
    df_res[df_res.columns[i]].plot(color=colors[random.randint(0,4)])
    plt.title(df_res.columns[i])
plt.show()

# Performance of model on unseen images

## Post processing the output from model

In [None]:
# Thresholding function to be applied on the output
def threshold(x):
    if x>0.5:
        return 1
    else:
        return 0

# Making the function applicable to a numpy array
exp =np.vectorize(threshold)

In [None]:
# Function to compare the predicted mask to the actual mask
# Function simply plots the actual and predicted mask of the 4 classes 
# side by side
def compare_masks(actual,predicted):
    plt.figure(figsize=(15,15))
    j = 0
    for i in range(8):
        plt.subplot(4,2,i+1)
        if (i+1)%2!=0:
            plt.title(f"Actual-{labels[j]}")
            plt.imshow(actual[:,:,j])
        else:
            plt.title(f"Predicted-{labels[j]}")
            plt.imshow(predicted[:,:,j])
            j+=1    
    plt.show()

In [None]:
# Function to predict and visualise the outputs of the models
# Simply combine the above 2 functions together
def predict(df):
    data = preprocess(df)
    output = model.predict(data[0][0][ np.newaxis, ...])
    output = exp(output)
    compare_masks(data[0][1],output[0])
    return     

In [None]:
# Function to display the actual image
def display_img(img_name):
    path = os.path.join(img_dir,img_name)
    img = cv2.imread(path,1)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    plt.imshow(img)

## Testing the model on unseen data

### 1:

In [None]:
# Displaying the actual image
display_img(train_df["image"][5107])

In [None]:
# Performance of model on image
test = pd.DataFrame(train_df.iloc[5107]).T.reset_index().drop("index",axis=1)
predict(test)

### 2:

In [None]:
# Displaying the actual image
display_img(train_df["image"][5101])

In [None]:
test = pd.DataFrame(train_df.iloc[5101]).T.reset_index().drop("index",axis=1)
predict(test)

### 3:

In [None]:
# Displaying the actual image
display_img(train_df["image"][5105])

In [None]:
test = pd.DataFrame(train_df.iloc[5105]).T.reset_index().drop("index",axis=1)
predict(test)

In [None]:
#import keras

In [None]:
#model.load_weights('../input/modelforsegmentation/clouds_iouscore-0.39.h5')

### 4:

In [None]:
# Displaying the actual image
display_img(train_df["image"][5108])

In [None]:
test = pd.DataFrame(train_df.iloc[5108]).T.reset_index().drop("index",axis=1)
predict(test)

### 5:

In [None]:
# Displaying the actual image
display_img(train_df["image"][5117])

In [None]:
test = pd.DataFrame(train_df.iloc[5117]).T.reset_index().drop("index",axis=1)
predict(test)

### 6

In [None]:
# Displaying the actual image
display_img(train_df["image"][510])

In [None]:
test = pd.DataFrame(train_df.iloc[510]).T.reset_index().drop("index",axis=1)
predict(test)

### 7:

In [None]:
# Displaying the actual image
display_img(train_df["image"][5000])

In [None]:
test = pd.DataFrame(train_df.iloc[5000]).T.reset_index().drop("index",axis=1)
predict(test)

### 8:

In [None]:
# Displaying the actual image
display_img(train_df["image"][5111])

In [None]:
test = pd.DataFrame(train_df.iloc[5111]).T.reset_index().drop("index",axis=1)
predict(test)

### 9:

In [None]:
# Displaying the actual image
display_img(train_df["image"][5222])

In [None]:
test = pd.DataFrame(train_df.iloc[5222]).T.reset_index().drop("index",axis=1)
predict(test)

# Loading in the trained model

In [None]:
# Importing keras
import keras

In [None]:
# Creating a model with the same architecture
loaded_model = sm.Unet(backbone_name=BACKBONE, 
                encoder_weights='imagenet',
                classes=N_CLASSES,
                activation='sigmoid', encoder_freeze=True,
                input_shape=(HEIGHT, WIDTH, CHANNELS))

# COmpiling the model
loaded_model.compile(optimizer=optim, loss=sm.losses.bce_dice_loss, metrics=metrics)

In [None]:
loaded_model.load_weights('../input/trained-model/clouds_effientnetb5_iouscore-0.417.h5')

In [None]:
# For testing
def predict_loaded(df):
    data = preprocess(df)
    output = loaded_model.predict(data[0][0][ np.newaxis, ...])
    output = exp(output)
    compare_masks(data[0][1],output[0])
    return 

In [None]:
# Testing the model
test = pd.DataFrame(train_df.iloc[5222]).T.reset_index().drop("index",axis=1)
predict_loaded(test)