**Ressources**

- [Convert Pandas DF into TF Dataset](https://medium.com/when-i-work-data/converting-a-pandas-dataframe-into-a-tensorflow-dataset-752f3783c168)
- [Albumentations Doku](https://albumentations.ai/docs/getting_started/mask_augmentation/)
- 

**TensorFlow / Keras**
- [Load and preprocess Images](https://www.tensorflow.org/tutorials/load_data/images)
- [TF Data Set from Pandas](https://www.tensorflow.org/tutorials/load_data/pandas_dataframe)
- [Keras Preprocessing](https://www.tensorflow.org/guide/keras/preprocessing_layers)
- [Image segmentation](https://www.tensorflow.org/tutorials/images/segmentation)
- [Keras Directory Iterator](https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/image/DirectoryIterator)

In [None]:
#!pip install segmentation-models

In [None]:
#!pip install -U albumentations --no-binary qudida,albumentations

In [None]:
# remove file directory for changed test run
!rm -r data/segmentation

In [None]:
import tensorflow as tf
from tensorflow_examples.models.pix2pix import pix2pix
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import segmentation_models as sm

from IPython.display import clear_output

import matplotlib.pyplot as plt

import pandas as pd
import numpy as np
import os
import shutil
import cv2
import time
import glob

from sklearn.model_selection import train_test_split

# self-written scripts
import sys
sys.path.insert(0, 'Python_Scripts')

import data_preparation_cnn


In [None]:
# Parameters
DIMENSION = (256, 1600)
classId_toRun = 2
SEED = 42
#Resizing images is optional, CNNs are ok with large images
SIZE_X = 128 #Resize images (height  = Y, width = X)
SIZE_Y = 512

EPOCHS = 2
BATCH_SIZE = 32
LEARNING_RATE = 0.005
OPTIMIZER = tf.keras.optimizers.Adam(lr=LEARNING_RATE)
IMAGES_PER_CLASS = 1000
BACKBONE = 'efficientnetb5'
SKIP_CONNECTIONS = None

---

### Data Preparations

In [None]:
df = pd.read_csv('data/train_complete.csv')

# create data frame for defective pictures with added `Mask`
defects = df.query('Defect == 1')
defects.head(2)

---

### Preparation for CNN Model

In [None]:
# create images and masks and their respective augmentations for all 4 defect classes

"""run time: ~ 4:15 """

# remove file directory for changed test run
!rm -r data/segmentation


start = time.time()
print('Starting data preparations')
print('-----'*10)

for i in range(4):
    print()
    data_preparation_cnn.prepare_data_for_class_id(df=defects, 
                                                   image_dimension=DIMENSION, 
                                                   seed=SEED, 
                                                   class_id=i+1, 
                                                   inverse_masks=False,
                                                   num_augmentations = IMAGES_PER_CLASS
                                                  )
    print()
    
end = time.time()

print('-----'*10)
print('total time for preparations:', end-start)

---

### CNN-Model

In [None]:
import tensorflow as tf
import segmentation_models as sm
import glob
import cv2
import os
import numpy as np
from matplotlib import pyplot as plt

import mlflow
from modeling.config_CNN_single_class import EXPERIMENT_NAME
TRACKING_URI = open(".mlflow_uri").read().strip()

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME)
mlflow.start_run()
run = mlflow.active_run()

In [None]:
preprocess_input = sm.get_preprocessing('efficientnetb5')

In [None]:


# load images and masks as input for model
train_images, train_masks = data_preparation_cnn.get_resized_image_and_mask_lists(class_id=classId_toRun, 
                                                                                  size_x=SIZE_X, 
                                                                                  size_y=SIZE_Y)
print(train_images.shape)
print(train_masks.shape)

In [None]:
#Use customary x_train and y_train variables
X = train_images
Y = train_masks
print(X.shape, Y.shape)
Y = np.expand_dims(Y, axis=3) #May not be necessary.. leftover from previous code 
print(Y.shape)

In [None]:
def get_images(class_id, size_x, size_y):
    images = []
    path_suffix = 'c' + str(class_id) + '/'

    for directory_path in glob.glob('data/segmentation/test/' + path_suffix):
        for img_path in sorted(glob.glob(os.path.join(directory_path, "*.jpg"))):
            #print(img_path)
            #break
            img = cv2.imread(img_path, cv2.IMREAD_COLOR)       
            img = cv2.resize(img, (size_y, size_x))
            #img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
            images.append(img)
            #train_labels.append(label)
    #Convert list to array for machine learning processing        
    images = np.array(images)
    
    return images

In [None]:
def get_masks(class_id, size_x, size_y):
    images = []
    path_suffix = 'c' + str(class_id) + '/'

    for directory_path in glob.glob('data/segmentation/test_mask/' + path_suffix):
        for img_path in sorted(glob.glob(os.path.join(directory_path, "*.png"))): #.png
            #print(img_path)
            #break
            img = cv2.imread(img_path, 0)       #cv2.IMREAD_GRAYSCALE
            img = cv2.resize(img, (size_y, size_x))
            #img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
            images.append(img)
            #train_labels.append(label)
    #Convert list to array for machine learning processing        
    images = np.array(images)
    
    return images

In [None]:
x_val = get_images(class_id=classId_toRun, size_x=SIZE_X, size_y=SIZE_Y)
y_val = get_masks(class_id=classId_toRun, size_x=SIZE_X, size_y=SIZE_Y)
y_val = np.expand_dims(y_val, axis=3) #May not be necessary.. leftover from previous code 

In [None]:
# preprocess input variables
x_train = preprocess_input(X)
y_train = Y
x_val = preprocess_input(x_val)

In [None]:
#m Calcuation Dice Coeffcient and Dice Loss
#https://dipanshurana.medium.com/steel-defect-detection-image-segmentation-using-keras-and-tensorflow-6118bc586ad2
def dice_coef(y_true,y_pred):
    y_true_f = tf.reshape(tf.dtypes.cast(y_true, tf.float32),[-1])
    y_pred_f = tf.reshape(tf.dtypes.cast(y_pred, tf.float32),[-1])
    intersection = tf.reduce_sum(y_true_f * y_pred_f)
    return (2. * intersection + 1.) / (tf.reduce_sum(y_true_f) + tf.reduce_sum(y_pred_f) + 1.)

def dice_loss(y_true, y_pred):
    y_true_f = tf.reshape(y_true, [-1])
    y_pred_f = tf.reshape(y_pred, [-1])
    return (1-dice_coef(y_true, y_pred))

#### Setting up the model

In [None]:
sm.set_framework('tf.keras')

sm.framework()
# define model
#model = sm.Unet()#BACKBONE, encoder_weights='imagenet')

model = sm.Unet(BACKBONE,
                input_shape=(SIZE_X,SIZE_Y,3),
                classes=1,
                activation='sigmoid',
                encoder_weights='imagenet',
                encoder_freeze=True 
               )
model.compile(optimizer=OPTIMIZER, loss=dice_loss, metrics=['accuracy',dice_coef]) 


#print(model.summary())

In [None]:
history=model.fit(x_train, 
                  y_train,
                  batch_size=BATCH_SIZE, 
                  epochs=EPOCHS,
                  verbose=1,
                  validation_data=(x_val, y_val)
                 )

In [None]:
dice_coef_value_train = history.history['dice_coef'][-1]
dice_coef_value_val = history.history['val_dice_coef'][-1]

In [None]:
history.history['dice_coef'][-1]

In [None]:
params = {
    'defect_class':classId_toRun,
    'images_per_class': IMAGES_PER_CLASS,
    'backbone_name': BACKBONE,
    "epochs": EPOCHS,
    "batch_size": BATCH_SIZE,
    "size_x": SIZE_X,
    "size_y": SIZE_Y,
    "seed": SEED,
    "optimizer": OPTIMIZER,
    'learning_rate': LEARNING_RATE,
    'scip_connections': SKIP_CONNECTIONS
  }

In [None]:
#logging params to mlflow
mlflow.log_params(params)
#setting tags
mlflow.set_tag("running_from_jupyter", "True")
#logging metrics
mlflow.log_metric("train-" + "DICE", dice_coef_value_train)
mlflow.log_metric("validation-" + "DICE", dice_coef_value_val)
# logging the model to mlflow will not work without a AWS Connection setup.. too complex for now
# but possible if running mlflow locally
# mlflow.log_artifact("../models")
# mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

In [None]:
# save model if desired
model.save('test' + str(classId_toRun)+'.h5')

#### Evaluation of the run

In [None]:
#accuracy = model.evaluate(x_val, y_val)
#plot the training and validation accuracy and loss at each epoch
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(loss) + 1)
plt.plot(epochs, loss, 'y', label='Training loss')
plt.plot(epochs, val_loss, 'r', label='Validation loss')
plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
history.history
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
epochs = range(1, len(loss) + 1)
plt.plot(epochs, acc, 'y', label='Training accuracy')
plt.plot(epochs, val_acc, 'r', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('accuracy')
plt.legend()
plt.show()

In [None]:
history.history
acc = history.history['dice_coef']
val_acc = history.history['val_dice_coef']
epochs = range(1, len(loss) + 1)
plt.plot(epochs, acc, 'y', label='Training dice-coef')
plt.plot(epochs, val_acc, 'r', label='Validation dice-coef')
plt.title('Training and validation Dice-Coefficient')
plt.xlabel('Epochs')
plt.ylabel('dice-coef')
plt.legend()
plt.show()

In [None]:
real_img = get_images(classId_toRun,SIZE_X,SIZE_Y)

In [None]:
prediction = model.predict(x_val)

In [None]:
# Figure as subplots
i=31

fig, ax = plt.subplots(4, 1, constrained_layout=True, figsize=(14, 14))
# Subplot 1
plt.subplot(4, 1, 1)
plt.imshow(real_img[i],interpolation='none')
plt.xlim(0,SIZE_Y)
plt.ylim(0,SIZE_X)
ax[0].set_title('Original Image Nr. ' +str(i), fontsize= 14)


# Subplot 2
plt.subplot(4, 1, 2)
plt.imshow(y_val[i],interpolation='none')
plt.xlim(0,SIZE_Y) 
plt.ylim(0,SIZE_X)
ax[1].set_title('Original-Mask Imgage Nr. ' +str(i), fontsize= 14)

# Subplot 3
plt.subplot(4, 1, 3)
plt.imshow(prediction[i],interpolation='none')
plt.xlim(0,SIZE_Y) 
plt.ylim(0,SIZE_X)
ax[2].set_title('Predicted-Mask Imgage Nr. ' +str(i), fontsize= 14)

# Subplot 4
plt.subplot(4, 1, 4)
plt.imshow(np.round(prediction[i]),interpolation='none')
plt.xlim(0,SIZE_Y) 
plt.ylim(0,SIZE_X)
ax[3].set_title('Binary Predicted-Mask Imgage Nr. ' +str(i), fontsize= 14)

In [None]:
prediction[i]

In [None]:
evaluation = model.evaluate(x_val, y_val, steps=10)

In [None]:
# View and Save segmented image
prediction_image = prediction.reshape(mask.shape)
plt.imshow(prediction_image, cmap='gray')
# plt.imsave('data/segmentation/test0_segmented.jpg', prediction_image, cmap='gray')

In [None]:
img_path = 'data/segmentation/test_mask/c1/mask_04e23e414.jpg'
test_mask = cv2.imread(img_path, cv2.IMREAD_COLOR)       
test_mask = cv2.resize(test_mask, (SIZE_Y, SIZE_X))
plt.imshow(test_mask, cmap='gray')