**Ressources**

- [Convert Pandas DF into TF Dataset](https://medium.com/when-i-work-data/converting-a-pandas-dataframe-into-a-tensorflow-dataset-752f3783c168)
- [Albumentations Doku](https://albumentations.ai/docs/getting_started/mask_augmentation/)
- 

In [None]:
#!pip install segmentation-models

In [None]:
#!pip install -U albumentations --no-binary qudida,albumentations

In [None]:
# remove file directory for changed test run
!rm -r data/segmentation

In [1]:
import tensorflow as tf
from tensorflow_examples.models.pix2pix import pix2pix
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import segmentation_models as sm

from IPython.display import clear_output

import matplotlib.pyplot as plt

import pandas as pd
import numpy as np
import os
import shutil
import cv2
import time
import glob

from sklearn.model_selection import train_test_split

# self-written scripts
import sys
sys.path.insert(0, 'Python_Scripts')

import data_preparation_cnn


# Parameters
DIMENSION = (256, 1600)
SEED = 42
BATCH_SIZE = 32
#Resizing images is optional, CNNs are ok with large images
SIZE_X = 128 #Resize images (height  = X, width = Y)
SIZE_Y = 128

Segmentation Models: using `keras` framework.


---

### Data Preparations

In [2]:
df = pd.read_csv('data/train_complete.csv')

# create data frame for defective pictures with added `Mask`
defects = df.query('Defect == 1')
defects.head(2)

Unnamed: 0,FilePath,ImageId,ClassId,EncodedPixels,Defect
0,/Users/fabioteichmann/neuefische/projects/Caps...,0002cc93b.jpg,1,29102 12 29346 24 29602 24 29858 24 30114 24 3...,1
1,/Users/fabioteichmann/neuefische/projects/Caps...,0007a71bf.jpg,3,18661 28 18863 82 19091 110 19347 110 19603 11...,1


---

### Preparation for CNN Model

In [None]:
# create images and masks and their respective augmentations for all 4 defect classes

"""run time: ~ 4:15 """


start = time.time()
print('Starting data preparations')
print('-----'*10)

for i in range(4):
    print()
    data_preparation_cnn.prepare_data_for_class_id(df=defects, 
                                                   image_dimension=DIMENSION, 
                                                   seed=SEED, 
                                                   class_id=i+1, 
                                                   inverse_masks=True)
    print()
    
end = time.time()

print('-----'*10)
print('total time for preparations:', end-start)

---

### CNN-Model

In [None]:
import tensorflow as tf
import segmentation_models as sm
import glob
import cv2
import os
import numpy as np
from matplotlib import pyplot as plt

In [None]:
#BACKBONE = 'resnet34'
#preprocess_input = sm.get_preprocessing(BACKBONE)

preprocess_input = sm.get_preprocessing('efficientnetb5')

In [3]:
# load images and masks as input for model
train_images, train_masks = data_preparation_cnn.get_resized_image_and_mask_lists(class_id=4, 
                                                                                  size_x=SIZE_X, 
                                                                                  size_y=SIZE_Y)
print(train_images.shape)
print(train_masks.shape)

(3604, 128, 128, 3)
(3604, 128, 128)


In [None]:
#Use customary x_train and y_train variables
X = train_images
Y = train_masks
print(X.shape, Y.shape)
Y = np.expand_dims(Y, axis=3) #May not be necessary.. leftover from previous code 
print(Y.shape)

In [None]:
from sklearn.model_selection import train_test_split
x_train, x_val, y_train, y_val = train_test_split(X, Y, test_size=0.2, random_state=42)

# preprocess input
x_train = preprocess_input(x_train)
x_val = preprocess_input(x_val)

#### Setting up the model

In [None]:
sm.set_framework('tf.keras')

sm.framework()
# define model
#model = sm.Unet()#BACKBONE, encoder_weights='imagenet')
model = sm.Unet('efficientnetb5',classes=1,activation='sigmoid',encoder_weights='imagenet')
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

#print(model.summary())

In [None]:
y_val.shape

In [None]:
history=model.fit(x_train, 
          y_train,
          batch_size=BATCH_SIZE, 
          epochs=10,
          verbose=1,
          validation_data=(x_val, y_val))

#### Evaluation of the run

In [None]:
#accuracy = model.evaluate(x_val, y_val)
#plot the training and validation accuracy and loss at each epoch
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(loss) + 1)
plt.plot(epochs, loss, 'y', label='Training loss')
plt.plot(epochs, val_loss, 'r', label='Validation loss')
plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

model.save('class2_test.h5')

In [None]:
history.history
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
epochs = range(1, len(loss) + 1)
plt.plot(epochs, acc, 'y', label='Training accuracy')
plt.plot(epochs, val_acc, 'r', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('accuracy')
plt.legend()
plt.show()

In [None]:
from tensorflow import keras
model = keras.models.load_model('class2_test.h5', compile=False)
#Test on a different image
#READ EXTERNAL IMAGE...
#data/segmentation/test/c1/1bed9264f.jpg
# test_img = cv2.imread('data/segmentation/test/c1/0d4866e3c.jpg', cv2.IMREAD_COLOR)   
# test_img = cv2.imread('data/segmentation/test/c1/1bed9264f.jpg', cv2.IMREAD_COLOR)  
test_img = cv2.imread('data/segmentation/test/c1/04e23e414.jpg', cv2.IMREAD_COLOR) 
test_img = cv2.resize(test_img, (SIZE_Y, SIZE_X))
test_img = cv2.cvtColor(test_img, cv2.COLOR_RGB2BGR)
print(test_img.shape)
test_img = np.expand_dims(test_img, axis=0)
print(test_img.shape)

In [None]:
prediction = model.predict(test_img)

In [None]:
#evaluation = model.evaluate(x_val, y_val, steps=10)

In [None]:
#evaluation

In [None]:
#View and Save segmented image
prediction_image = prediction.reshape(mask.shape)
plt.imshow(prediction_image, cmap='gray')
plt.imsave('data/segmentation/test0_segmented.jpg', prediction_image, cmap='gray')

In [None]:
img_path = 'data/segmentation/test_mask/c1/mask_04e23e414.jpg'
test_mask = cv2.imread(img_path, cv2.IMREAD_COLOR)       
test_mask = cv2.resize(test_mask, (SIZE_Y, SIZE_X))
plt.imshow(test_mask, cmap='gray')



---

### Model from Oxford-Tut (TO-DO)

In [None]:
# ENCODER
base_model = tf.keras.applications.MobileNetV2(input_shape=[128, 128, 3], include_top=False)

# Use the activations of these layers
layer_names = [
                'block_1_expand_relu',   # 64x64
                'block_3_expand_relu',   # 32x32
                'block_6_expand_relu',   # 16x16
                'block_13_expand_relu',  # 8x8
                'block_16_project',      # 4x4
]
base_model_outputs = [base_model.get_layer(name).output for name in layer_names]

# Create the feature extraction model
down_stack = tf.keras.Model(inputs=base_model.input, outputs=base_model_outputs)

down_stack.trainable = False

In [None]:
# The decoder/upsampler is simply a series of upsample blocks implemented in TensorFlow examples.

up_stack = [
    pix2pix.upsample(512, 3),  # 4x4 -> 8x8
    pix2pix.upsample(256, 3),  # 8x8 -> 16x16
    pix2pix.upsample(128, 3),  # 16x16 -> 32x32
    pix2pix.upsample(64, 3),   # 32x32 -> 64x64
]

In [None]:
def unet_model(output_channels:int):
    inputs = tf.keras.layers.Input(shape=[128, 128, 3])

    # Downsampling through the model
    skips = down_stack(inputs)
    x = skips[-1]
    skips = reversed(skips[:-1])

    # Upsampling and establishing the skip connections
    for up, skip in zip(up_stack, skips):
        x = up(x)
        concat = tf.keras.layers.Concatenate()
        x = concat([x, skip])

    # This is the last layer of the model
    last = tf.keras.layers.Conv2DTranspose(
          filters=output_channels, kernel_size=3, strides=2,
          padding='same')  #64x64 -> 128x128

    x = last(x)

    return tf.keras.Model(inputs=inputs, outputs=x)

In [None]:
OUTPUT_CLASSES = 2

model = unet_model(output_channels=OUTPUT_CLASSES)
model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), # expects `one_hot`
              metrics=['accuracy'])

---

In [None]:
# #!wget https://cdn.freecodecamp.org/project-data/sms/train-data.tsv
# train_file_path = "data/segmentation/train"

# train_data = tf.data.experimental.make_csv_dataset(train_file_path, 
#                                                    header=False, 
#                                                    field_delim='\t', 
#                                                    column_names=['label', 'text'], 
#                                                    batch_size=64, 
#                                                    label_name='label', 
#                                                    num_epochs=1, 
#                                                    ignore_errors=True)

# examples, labels = next(iter(train_data)) # Just the first batch.
# print("FEATURES: \n", examples, "\n")
# print("LABELS: \n", labels)

# encoder = keras.layers.TextVectorization(max_tokens=None, output_mode='int', output_sequence_length=160)
# encoder.adapt(train_data)

In [None]:
def get_dataset(df, complete=True):
    if complete:
        dataset = tf.data.Dataset.from_tensor_slices((tf.cast(df['FilePath'].values, tf.string),
                                                      tf.cast(df['Mask'].values.tolist(), tf.float32)
                                                     )
                                                    )
    else:
        dataset = tf.data.Dataset.from_tensor_slices((tf.cast(df['FilePath'][:3000].values, tf.string),
                                                      tf.cast(df['Mask'][:3000].values.tolist(), tf.float32)
                                                     )
                                                    )
    return dataset

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf

#tf.enable_eager_execution()

# training_df = pd.DataFrame(
#     data={'FilePath': defects.FilePath[:50],
#           'Mask': defects.Mask[:50]
#          }
# )

training_dataset = get_dataset(X_train, complete=False)

# for features_tensor, target_tensor in training_dataset:
#     print(f'features:{features_tensor} target:{target_tensor}')
training_dataset

In [None]:
training_dataset

In [None]:
tf.data.Dataset.from_tensor_slices((defects.FilePath[:10], list(defects.Mask[:10])))

---

### Build Dataset for Model

In [None]:
# build the dataset by re-esembling the splitted data
dataset = {'train': X_train.join(y_train), 'test': X_test.join(y_test)}

---

### Prepare the Model

In [None]:
def normalize(input_image, input_mask):
    input_image = tf.cast(input_image, tf.float32) / 255.0
    input_mask -= 1
    return input_image, input_mask

In [None]:
def load_image(file_path, mask):
    raw = tf.io.read_file(file_path)
    image = tf.image.decode_png(raw, channels=3)
    
    input_image = image #tf.image.resize(image, (128, 128))
    input_mask = mask #tf.image.resize(mask,#.reshape(mask.shape[0], mask.shape[1]), (128, 128))

    input_image, input_mask = normalize(input_image, input_mask)

    return input_image, input_mask

In [None]:
TRAIN_LENGTH = len(set(X_train.ImageId)) # number of unique `ImageIds`
BATCH_SIZE = 64
BUFFER_SIZE = 1000
STEPS_PER_EPOCH = TRAIN_LENGTH // BATCH_SIZE

In [None]:
train_images = training_dataset.map(load_image, num_parallel_calls=tf.data.AUTOTUNE)
#test_images = dataset['test'].map(load_image, num_parallel_calls=tf.data.AUTOTUNE)

In [None]:
train_images

---
### Testing

In [None]:
X_test.head()

In [None]:
# Generate `masks` for every image
masks = []

for i in range(len(defects.ClassId)):
    #print(defects.EncodedPixels[i])
    encoded_pixels = defects.EncodedPixels[i]
    class_id = defects.ClassId[i]
    mask = mask_conversion.decode_pixel(image_dimension=DIMENSION, 
                                        encoded_pixels=encoded_pixels, 
                                        class_id=class_id)
    masks.append([mask.reshape(mask.shape[0]*mask.shape[1])])
    

masks_ = pd.DataFrame(masks, columns=['Mask'])

In [None]:
defects = defects.join(masks_)

In [None]:
defects.head(3)

In [None]:
# Visualize some masks
for i in range(5):
    mask = defects.Mask[i].reshape(mask.shape[0], mask.shape[1])

    plt.figure(figsize=(25, 8))
    plt.axis("off")  
    ax = plt.subplot(5, 1, i + 1)

    plt.imshow(mask)

In [None]:
# encoded_pixels = defects.EncodedPixels[0]
# class_id = defects.ClassId[0]
# mask = mask_conversion.decode_pixel(image_dimension=DIMENSION, 
#                                     encoded_pixels=encoded_pixels, 
#                                     class_id=class_id)
# mask

In [None]:
mask_flat = mask.reshape(mask.shape[0]*mask.shape[1])
mask_unflat = mask_flat.reshape(mask.shape[0], mask.shape[1])

In [None]:
# (mask_unflat == mask).sum()

In [None]:
# defects.EncodedPixels[0]

In [None]:
"""FOR CLASSIFICATION"""

path = os.getcwd()
print(path)

# Normalize the color values between 0 and 1
train = ImageDataGenerator(rescale=1/255)
validation = ImageDataGenerator(rescale=1/255)

# Takes the path to a directory & generates batches of augmented data.
train_generator = train.flow_from_directory(path + '/data/segmentation/train/',
                                            target_size=(256,1600),   #150,150
                                            batch_size = 32,
                                            save_format='jpg',
                                            class_mode = 'sparse')

# Takes the path to a directory & generates batches of augmented data.
validation_generator = validation.flow_from_directory(path + '/data/segmentation/test/',
                                          target_size=(256,1600),
                                          batch_size = 32,
                                          class_mode = 'sparse')

---

### Externalized functions

In [None]:
def create_train_test_dfs(df, seed):
    X = df.copy()
    y = X.pop('ClassId')

    # Split into train and test set 
    X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.3, random_state=seed)
    
    return X_train.join(y_train), X_test.join(y_test)

In [None]:
# Create a temp path for the train & test split
def create_train_test_folders(subfolder, class_id):
    print('preparing folders...')
    path = os.getcwd()
    #
    ## You might need to adjust the path to your local environment
    temp_path = path + "/data/" + subfolder
    path_suffix = 'c' + str(class_id)
    
    # make base folder structure
    try:
        os.mkdir(temp_path)
        
        os.mkdir(temp_path + '/train')
        os.mkdir(temp_path + '/train_aug')
        
        os.mkdir(temp_path + '/train_mask')
        os.mkdir(temp_path + '/train_mask_aug')
        
        os.mkdir(temp_path + '/test')
        os.mkdir(temp_path + '/test_mask')
        print('base folder structure created')
        
    except OSError:
        print('base folder structure exists')
    
    # make class specific sub-folder structure
    try:
        os.mkdir(temp_path + '/train/' + path_suffix)
        os.mkdir(temp_path + '/train_aug/' + path_suffix)
        os.mkdir(temp_path + '/train_mask/'+ path_suffix)
        os.mkdir(temp_path + '/train_mask_aug/'+ path_suffix)
        print(f'sub-folder structure for ClassId {class_id} created')
        
    except OSError:
        print(f'sub-folder structure for ClassId {class_id} already exists')
    
    # make class specific sub-folder structure for test
    try:
        os.mkdir(temp_path + '/test/' + path_suffix)
        os.mkdir(temp_path + '/test_mask/' + path_suffix)
        print(f'sub-foder structure for testing for ClassId {class_id} created')
        
    except OSError:
        print ("Directories already exist")
    else:
        print ("Successfully created the directories")
    print()

In [None]:
# Copy and Separate in Imgages in Test and Train Folder
def copy_images_to_train_test(df_train, df_test, subfolder, class_id=2):
    path = os.getcwd()
    path_suffix = 'c' + str(class_id) + '/'
    
    create_train_test_folders(subfolder, class_id)
    df_train = df_train.query('ClassId == @class_id')
    df_test = df_test.query('ClassId == @class_id')
    
    print('copying images to folders...')

    for i in range(len(df_train)):
        #print(path)
        # for training data
        origin_train_path = path + '/data/train_images/'
        source_file_train = df_train.iloc[i,1]
        #print(source_file_train)
        target_directory_train = path + '/data/' + subfolder + '/train/' + path_suffix
        #print(origin_train_path)
        #print(target_directory_train)
        
            
        # Copy The Files
        shutil.copy2(origin_train_path + source_file_train, target_directory_train + source_file_train)
        try:
            # for testing data
            origin_test_path = path + '/data/train_images/'
            source_file_test = df_test.iloc[i,1]
            target_directory_test = path + '/data/' + subfolder + '/test/' + path_suffix

            shutil.copy2(origin_test_path + source_file_test, target_directory_test + source_file_test)
        except:
            continue
    print(f'Images successfully copied to {subfolder}!')
    print()

#### Create Mask Pictures

In [None]:
import cv2
import pathlib

def create_mask_image(image, image_id, encoded_pixels, inverse_masks):
    # path = os.getcwd()
    # target_directory = '/data/segmentation/train_mask/c1/'
    image_name = 'mask_' + image_id
    
    # os.chdir(path + target_directory)
    #print(target_directory.split('/')[0])
    #print(path.joinpath(target_directory.split('/')[0], image_name))
    if inverse_masks:
        mask = mask_conversion.create_mask_with_class_id_inverted(DIMENSION,class_id=2,encoded_pixels=encoded_pixels)
    else:
        mask = mask_conversion.create_mask_with_class_id(DIMENSION,class_id=2,encoded_pixels=encoded_pixels)
    mask *= 255
    
    written = cv2.imwrite(image_name, mask)
    #print(written)

In [None]:
def generate_mask_images(df, class_id, train=True, inverse_masks=False):
    """generates and saves mask images for `ClassId` 2
    """
    print(f'generating mask images for ClassId {class_id} images...')
    image_ids = df.query('ClassId == @class_id').ImageId

    path = os.getcwd()
    path_suffix = 'c' + str(class_id) + '/'
    #print(path)
    if train:
        target_directory = '/data/segmentation/train_mask/' + path_suffix
    else:
        target_directory = '/data/segmentation/test_mask/' + path_suffix
    # switch to target directory for saving process
    os.chdir(path + target_directory)

    for image_id in image_ids:
        if train:
            image = cv2.imread('data/segmentation/train/' + path_suffix + image_id)
        else:
            image = cv2.imread('data/segmentation/test/' + path_suffix + image_id)
        encoded_pixels = defects.query('ImageId == @image_id and ClassId == @class_id')[['EncodedPixels']]
        encoded_pixels = encoded_pixels.EncodedPixels.values[0]
        create_mask_image(image, image_id, encoded_pixels, inverse_masks)
    # switch back to home directory
    os.chdir(path)
    
    print('mask images successfully generated!')
    print()

---

#### Albumentations

In [None]:
import albumentations as A

augment = A.Compose([
    #A.VerticalFlip(p=0.5),
    A.HorizontalFlip(p=0.5),
    # A.OneOf([
    #     A.ElasticTransform(alpha=120, sigma=120 * 0.05, alpha_affine=120 * 0.03, p=0.5),
    #     A.GridDistortion(p=0.5),
    #     A.OpticalDistortion(distort_limit=2, shift_limit=0.5, p=1)                  
    #     ], p=0.8),
    A.CLAHE(p=0.8),
    A.RandomBrightnessContrast(p=0.8),    
    A.RandomGamma(p=0.8)
])

#### Testing Augmentations

In [None]:
def visualize(image, mask, original_image=None, original_mask=None):
    fontsize = 18
    
    if original_image is None and original_mask is None:
        f, ax = plt.subplots(2, 1, figsize=(15, 12))

        ax[0].imshow(image)
        ax[1].imshow(mask)
    else:
        f, ax = plt.subplots(2, 2, figsize=(12, 5))

        ax[0, 0].imshow(original_image)
        ax[0, 0].set_title('Original image', fontsize=fontsize)
        
        ax[1, 0].imshow(original_mask)
        ax[1, 0].set_title('Original mask', fontsize=fontsize)
        
        ax[0, 1].imshow(image)
        ax[0, 1].set_title('Transformed image', fontsize=fontsize)
        
        ax[1, 1].imshow(mask)
        ax[1, 1].set_title('Transformed mask', fontsize=fontsize)

In [None]:
original_image = cv2.imread('data/segmentation/train/c1/060c3159e.jpg')
original_mask = cv2.imread('data/segmentation/train_mask/c1/mask_060c3159e.jpg')

augmented = augment(image=original_image, mask=original_mask)

transformed_image = augmented['image']
transformed_mask = augmented['mask']

If the image has multiple associated masks, you should use the `masks` argument instead of `mask`. In masks you should pass a list of masks.

```python
transformed = transform(image=image, masks=masks)
transformed_image = transformed['image']
transformed_masks = transformed['masks']
```

In [None]:
visualize(transformed_image, transformed_mask, original_image, original_mask)

#### Apply Augmentations

In [None]:
import random
import os
import time

def augement_images_and_masks(image_ids, num_augmentations, class_id):
    print(f'beginning augmentation for ClassId {class_id}...')
    start = time.time()
    
    path = os.getcwd()
    path_suffix = 'c' + str(class_id) + '/'
    
    target_directory_image = '/data/segmentation/train_aug/' + path_suffix
    target_directory_mask = '/data/segmentation/train_mask_aug/' + path_suffix
    
    i = 1
    
    while i <= num_augmentations:
        #print(i)
        number = random.randint(0, len(image_ids) -1)
        image_id = image_ids[number]
        mask_id = 'mask_' + image_id
        #print(image_id, mask_id)
        
        original_image = cv2.imread('data/segmentation/train/' + path_suffix + image_id)
        #print(type(original_image))
        original_mask = cv2.imread('data/segmentation/train_mask/' + path_suffix + mask_id)
        
        augmented = augment(image=original_image, mask=original_mask)
        transformed_image = augmented['image']
        transformed_mask = augmented['mask']
        
        os.chdir(path + target_directory_image)
        written = cv2.imwrite('aug_' + str(i) + '_' + image_id, transformed_image)
        #print(written)
        
        os.chdir(path + target_directory_mask)
        written = cv2.imwrite('aug_' + str(i) + '_' + mask_id, transformed_mask)
        #print(written)
        
        os.chdir(path)
        
        i += 1
    end = time.time()
    print(f'augmented {num_augmentations} images of ClassId {class_id}')
    print('time required for augmentation:', end - start)

In [None]:
path = os.getcwd()
path

In [None]:
os.chdir(path)

In [None]:
from sys import argv
image_ids = Xy_train.query('ClassId == 2').ImageId.values
print(len(image_ids))
nr = 0
#for id_ in image_ids:
    #print(os.path.exists('data/segmentation/train/c1/' + id_))
    # if os.access('data/segmentation/train/c1/' + image_id, os.R_OK):
    #     continue
    # else:
    #     nr += 1
        
print(nr)

In [None]:
image_id = 'a41ba727f.jpg'
original_image = cv2.imread('data/segmentation/train/c1/' + image_id)
print(os.access('data/segmentation/train/c1/' + image_id, os.R_OK))
print(os.path.exists('data/segmentation/train/c1/' + image_id))
print('data/segmentation/train/c1/' + image_id)
plt.imshow(original_image)

---

In [None]:
def prepare_data_for_class_id(df, image_dimension, seed, class_id, inverse_masks):
    """combines all data preparations:
    - creating required folder structure
    - copying images to folders according to train-test-split using `seed`
    - generating masks for all images of `class_id`
    - augmenting images and corresponding masks
    
    Input parameters:
    df            - data frame that contains all defects and the `FilePaths` to all images
    seed          - seed for `train_test_split`
    class_id      - id of defect class
    inverse_masks - if `True`, defect pixels will be white, pixels without defect will be black
    """
    
    print('Starting data preparations')
    print('-----'*10)
    
    start = time.time()
    # split data into train and test
    df_train, df_test = data_preparation_cnn.create_train_test_dfs(df, seed)
    
    subfolder = 'segmentation'
    # sort images according to train-test-split
    data_preparation_cnn.copy_images_to_train_test(df_train, df_test, subfolder, class_id)
    
    # generate mask images for train and test data
    data_preparation_cnn.generate_mask_images(df, class_id, image_dimension, train=True, inverse_masks=inverse_masks)
    data_preparation_cnn.generate_mask_images(df, class_id, image_dimension, train=False, inverse_masks=inverse_masks)
    
    # augment images and masks where needed
    image_ids = df_train.query('ClassId == @class_id').ImageId.values
    num_augmentations = max(df_train.groupby('ClassId').count().ImageId)
    
    data_preparation_cnn.augement_images_and_masks(image_ids=image_ids, 
                                                   num_augmentations=num_augmentations, 
                                                   class_id=class_id)
    end = time.time()
    
    print('data successfully prepared for the model!')
    print('time elapsed:', end-start)

In [None]:
def add_mask_to_df(df):
    # Generate `masks` for every image
    masks = []

    for i in range(len(df.ClassId)):
        #print(defects.EncodedPixels[i])
        encoded_pixels = df.EncodedPixels[i]
        class_id = df.ClassId[i]
        mask = mask_conversion.decode_pixel(image_dimension=DIMENSION, 
                                            encoded_pixels=encoded_pixels, 
                                            class_id=class_id)
        masks.append([mask.reshape(mask.shape[0]*mask.shape[1])])


    masks_ = pd.DataFrame(masks, columns=['Mask'])
    
    return df.join(masks_)