# Convolutional Neural Network - Multi Classfication


#### Intro to binary CNN
https://towardsdatascience.com/10-minutes-to-building-a-cnn-binary-image-classifier-in-tensorflow-4e216b2034aa

#### Code inspiration from here..

https://medium.com/techiepedia/binary-image-classifier-cnn-using-tensorflow-a3f5d6746697 


In [None]:
# Libraries that might be installed beforehand

#!pip install opencv-python
#!pip install sklearn

In [None]:
# Default Libraries
import pandas as pd
import tensorflow as tf
import numpy as np

# CNN Envrionment
from tensorflow import keras
from tensorflow.keras import layers
from keras.layers import Dropout
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPool2D , Flatten
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing import image
import tensorflow_hub as hub

%load_ext tensorboard

# Plotting Libraries
import matplotlib.pyplot as plt

# Miscellaneous
import datetime
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve, auc
import os
import shutil
import cv2
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning) 

# Python Scripts
import sys  
sys.path.insert(0, './Python_Scripts')
import util

In [None]:
# Check for Tensorflow version
print(tf.__version__)
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO)

In [None]:
# Load Data
df = pd.read_csv('Data/train_complete.csv')
df.head(2)

util.isolate_single_defects(df)

## Convert from RGB to Gray-Scale:


https://www.tensorflow.org/api_docs/python/tf/image/rgb_to_grayscale

In [None]:
# Grey-Scaling
original =  plt.imread('./Data/train_images/' + str(df.ImageId[0]))

converted = tf.image.rgb_to_grayscale(original)

print(original.shape)
print(converted.shape)

In [None]:
# Figure as subplots
fig, ax = plt.subplots(2, 1, constrained_layout=True, figsize=(25, 10))

# Subplot 1
plt.subplot(2, 1, 1)
plt.imshow(original)
plt.xlim(0,1600)
plt.ylim(0,256)
ax[0].set_title('Original', fontsize= 24)


# Subplot 2
plt.subplot(2, 1, 2)
plt.imshow(converted)
plt.xlim(0,1600) 
plt.ylim(0,256)
ax[1].set_title('Grey-Scaled', fontsize= 24)

## Test Split and Data Storage Preparation

In [None]:
 # Do the test split
#index = df.ClassId == 3 
#df = df[index] 
df_red = df.query('Defect == 1')

X = df_red.iloc[:,0:4]
y = df_red.iloc[:,2]
y = y - 1
#df_red.loc[df['ClassId'] == 3,'ClassId'] = 0
#df_red.loc[df['ClassId'] == 1,'ClassId'] = 1
#df_red.loc[df['ClassId'] == 1,'ClassId'] = 1
#df_red.loc[df['ClassId'] == 4,'ClassId'] = 1

In [None]:
y

In [None]:
df_red.head()

In [None]:
# Split into train and test set 
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.3, random_state = 42)
length_train = len(y_train)
length_test = len(y_test)
print('Length of Train-Set: ' + str(len(y_train)))
print('Length of Test-Set: ' + str(len(y_test)))

In [None]:
# Check No-Defect Distributions
print('No-Defect-Ratio in Train: ' + str(round((y_train == 1).astype(int).sum(axis=0) / len(y_train),4)))
print('No-Defect-Ratio in Test: ' + str(round((y_test == 1).astype(int).sum(axis=0) / len(y_test),4)))

In [None]:
# ConvertToGrayScale
def ConvertToGrayScale(file_name, input_path, target_path):
    #original =  plt.imread(input_path + file_name)
    original = tf.keras.utils.load_img(input_path + file_name, grayscale=False, color_mode='rgb', target_size=None)
    converted = tf.image.rgb_to_grayscale(original)
    tf.keras.utils.save_img(target_path + file_name, converted, data_format=None, file_format=None, scale=True)
    return 

In [None]:
# Excute ConvertToGrayScale Function
#input_path = './Data/train_images/'
#target_path = './Data/train_images_GrayScaled/'
#for i in range(len(df.ImageId)):
#    file_name = df.ImageId[i]
#    ConvertToGrayScale(file_name, input_path, target_path)

In [None]:
# Create a temp path for the train & test split
def MakePathStructure():
    path = os.getcwd()
    
    ## You might need to adjust the path to your local environment
    temp_path = path + "/Data/Temp"
    
    # Make DirectoryTemp and Sub-Directories Train & Test
    try:
        #os.mkdir(temp_path)
        os.mkdir(temp_path + '/Train')
        os.mkdir(temp_path + '/Test')
        os.mkdir(temp_path + '/Train/C1')
        os.mkdir(temp_path + '/Train/C2')
        os.mkdir(temp_path + '/Train/C3')
        os.mkdir(temp_path + '/Train/C4')
        os.mkdir(temp_path + '/Test/C1')
        os.mkdir(temp_path + '/Test/C2')
        os.mkdir(temp_path + '/Test/C3')
        os.mkdir(temp_path + '/Test/C4')

            
    except OSError:
        return print ("Creation of the directories failed")
    else:
        return print ("Successfully created the directories")

In [None]:
# Execute Function If Test-Temp Folder Structure is not ready yet..
#MakePathStructure()

In [None]:
# Copy and Separate in Imgages in Test and Train Folder
def CopySeparateImagesToTestTrain():
# Train Data
    for i in range(len(X_train)):
        origin_train_path = path + '/Data/train_images/'
        source_file = X_train.iloc[i,1]
        
        #Seperate the classes into subfolders C0 and C1
        if y_train.iloc[i] == 1:
            target_directory = path + '/Data/Temp/Train/C1/'
        elif y_train.iloc[i] == 2:
            target_directory = path + '/Data/Temp/Train/C2/'
        elif y_train.iloc[i] == 3:
            target_directory = path + '/Data/Temp/Train/C3/'
        elif y_train.iloc[i] == 4:
            target_directory = path + '/Data/Temp/Train/C4/'
            
            
        # Copy The Files
        shutil.copy2(origin_train_path + source_file , target_directory + source_file)
        
    # Test Data
    for i in range(len(X_test)):
        origin_train_path = path + '/Data/train_images/'
        source_file = X_test.iloc[i,1]
        
        #Seperate the classes into subfolders C0 and C1
        if y_train.iloc[i] == 1:
            target_directory = path + '/Data/Temp/Test/C1/'
        elif y_train.iloc[i] == 2:
            target_directory = path + '/Data/Temp/Test/C2/'
        elif y_train.iloc[i] == 3:
            target_directory = path + '/Data/Temp/Test/C3/'
        elif y_train.iloc[i] == 4:
            target_directory = path + '/Data/Temp/Test/C4/'
        # Copy The Files
        shutil.copy2(origin_train_path + source_file , target_directory + source_file)

In [None]:
# Execute function if files needs to copied into the subfolders und splitted into the classes
path = os.getcwd()
#CopySeparateImagesToTestTrain()

## CNN Model Preparation

https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/image/ImageDataGenerator

https://keras.io/api/preprocessing/image/

In [None]:
# Get current working directory
path = os.getcwd()

# Normalize the color values between 0 and 1
datagen = ImageDataGenerator(rescale=1/255,
rotation_range = 0,
width_shift_range = 0.2,
height_shift_range = 0.2,
shear_range = 0,
horizontal_flip = True)


validation = ImageDataGenerator(rescale=1/255)
batch_size = 32

# Takes the path to a directory & generates batches of augmented data.
train_generator = datagen.flow_from_directory( path + '/Data/Temp/Train/',
                                          target_size=(128,512),   
                                          #color_mode='grayscale',
                                          batch_size = 32,
                                          seed = 42,
                                          class_mode = 'categorical')

# Takes the path to a directory & generates batches of augmented data.
validation_generator = validation.flow_from_directory(path + '/Data/Temp/Test/',
                                          target_size=(128,512),
                                          #color_mode='grayscale',
                                          batch_size = 32,
                                          seed = 42,
                                          class_mode = 'categorical')

## Analysis Outcome From Image Generator

In [None]:
print('Data labels are separated in following binary code: ' + str(validation_generator.class_indices))

In [None]:
# validation_generator[..]
print(type(validation_generator))

In [None]:
# Iterate and see the pictures and labels
#img_batch, labels = next(validation_generator)
#print('Shape of the input batch' + str(img_batch.shape))
#print('Min of labels: ' + str(min(labels)) + ' and Max of labels: ' +str(max(labels)))

In [None]:
# Plot the image batch content
#fig, ax = plt.subplots(int(len(img_batch) / 2), 2,constrained_layout=False)
#                                                                            
#for i in range(len(img_batch)):
#    plt.subplot(int(len(img_batch) / 2), 2, i + 1)
#    fig.set_figheight(30)
#    fig.set_figwidth(20)
#    plt.xlim(0,1600)
#    plt.ylim(0,256)
#    plt.imshow(img_batch[i])
#    plt.text(100, 100, labels[i],color='r')
#    
#plt.show()

## Setup The Neural Network

https://keras.io/guides/sequential_model/

In [None]:
# importing the libraries
from keras.models import Model
from keras.layers import Flatten, Dense
from tensorflow.keras.applications.vgg16 import VGG16
from keras.preprocessing import image

IMAGE_SIZE = [128, 512]  # we will keep the image size as (64,64). You can increase the size for better results. 

# loading the weights of VGG16 without the top layer. These weights are trained on Imagenet dataset.
vgg = VGG16(input_shape = IMAGE_SIZE + [3], weights = 'imagenet', include_top = False)  # input_shape = (64,64,3) as required by VGG

# this will exclude the initial layers from training phase as there are already been trained.
for layer in vgg.layers:
    layer.trainable = False

x = Flatten()(vgg.output)
x = Dense(128, activation = 'relu')(x)   # we can add a new fully connected layer but it will increase the execution time.
x = Dense(4, activation = 'softmax')(x)  # adding the output layer with softmax function as this is a multi label classification problem.

model = Model(inputs = vgg.input, outputs = x)

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

https://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html

https://keras.io/api/models/model_training_apis/


https://www.sciencedirect.com/topics/computer-science/one-hot-encodinghttps://www.sciencedirect.com/topics/computer-science/one-hot-encoding

## Train The CNN

In [None]:
def train_model(model, batch_size, learning_rate, epochs):
# RMSprop: Maintain a moving (discounted) average of the square of gradients
# Divide the gradient by the root of this average
    opt = tf.keras.optimizers.RMSprop(learning_rate = learning_rate)
    
     
    # Wie sehen die Labels der Loss-Funktion aus? Scalar? Hot-Encoding? Check Doku, was wird fuer binary_crossent. benoetigt
    #https://keras.io/api/losses/probabilistic_losses/#binarycrossentropy-class
    #model.compile(loss =tf.keras.losses.CategoricalCrossentropy(from_logits=False),
     #             optimizer = opt,
      #            metrics = ['accuracy'])

    
    log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
    
    #https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/image/ImageDataGenerator#fit
    # You may need to implement seed in fit?
    history = model.fit(train_generator,
              #steps_per_epoch = length_train // batch_size, # Number if images in train divided by batch size
              epochs = epochs,
              verbose = 1,
              validation_data = validation_generator,
              #validation_steps = length_test // batch_size,
              callbacks=[tensorboard_callback]) 
    
    return model

In [None]:
x,y =next(train_generator)
x.shape

In [None]:
x,y =next(validation_generator)
y.shape

In [None]:
# Train model
trained_model = train_model(model, batch_size, learning_rate = 0.01,epochs= 10)
#model.save_weights('first_try.h5')

https://www.pyimagesearch.com/2018/12/24/how-to-use-keras-fit-and-fit_generator-a-hands-on-tutorial/

In [None]:
import seaborn as sns
from sklearn import metrics
y_pred = model.predict(validation_generator)

In [None]:
y_pred_ = np.argmax(y_pred,axis=1)

In [None]:
sns.heatmap(metrics.confusion_matrix(y_test,y_pred_), annot=True, cmap='YlGn')

In [None]:
print(metrics.classification_report(y_test, y_pred))

In [None]:
# Start the TensorBoard Notebook Extension
%tensorboard --logdir logs/fit

## CNN Model Fitting Process

In [None]:
#print(history.history)

## CNN Evaluate Model Performance

In [None]:
# Evaluate The Model Accuracy
def evaluate_model(validation_generator):
    model.evaluate(validation_generator)
    return

In [None]:
evaluate_model(validation_generator)

In [None]:
# Delete the validation generator
#STEP_SIZE_TEST = validation_generator.n // validation_generator.batch_size
#validation_generator.reset()
preds = model.predict(validation_generator, verbose=1)