#### Using GoogLeNet as Classify Model
Following GoogLeNet implementation from this [blog](https://ai.plainenglish.io/googlenet-inceptionv1-with-tensorflow-9e7f3a161e87)

Use GoogLeNet to classify defect patterns only (i.e., as an alternative to Yu classify model)

In [None]:
# !pip install pickle5

In [None]:
# import libraries
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow import keras
from tensorflow.keras import datasets, layers, models, losses, optimizers, callbacks

import os
import time
import numpy as np
import pandas as pd

import helpers as helper
from keras_model_s3_wrapper import *

import boto3
import pickle5 as pickle
s3 = boto3.resource('s3')
bucket_name = 'wafer-capstone'
my_bucket = s3.Bucket(bucket_name)

In [None]:
tf.__version__

In [None]:
tf.config.list_physical_devices(device_type=None)

In [None]:
# specify variables for model
path = 'data'
result_path = 'results'
model_path = '../saved_models'

filename = 'WM-clean-paper'
option = '-classify' # -classify, -knn
map_column = 'waferMap224'

model_id = 'yudetect'
data_id = 'paper'
note = '' # -optional

x = 60
y = 60

In [None]:
# load train, dev, and test sets
# directly from S3 (using boto3 resource)
start = time.time()

train_key = f'{path}/{filename}-train{option}.pkl'
dev_key = f'{path}/{filename}-dev.pkl'
test_key = f'{path}/{filename}-test.pkl'

train = pickle.loads(my_bucket.Object(train_key).get()['Body'].read())
dev = pickle.loads(my_bucket.Object(dev_key).get()['Body'].read())
test = pickle.loads(my_bucket.Object(test_key).get()['Body'].read())

# remove nones from dev and test

dev = dev[dev.classifyLabels != 8].reset_index(drop=True)
test = test[test.classifyLabels != 8].reset_index(drop=True)

print("Wall time: {:.2f} seconds".format(time.time() - start))
print(f"Train: {len(train)}")
print(f"Dev: {len(dev)}")
print(f"Test: {len(test)}")

print(f"Sanity check: {np.unique(train[map_column][0])}")

In [None]:
# # load train, dev, and test sets
# # directly from S3 (using boto3 client)
# start = time.time()

# s3 = boto3.client('s3')
# bucket_name = 'wafer-capstone'

# obj = s3.get_object(Bucket = bucket_name, Key = f'{path}/{filename}-train{option}.pkl')
# body = obj['Body'].read()
# train = pickle.loads(body)

# obj = s3.get_object(Bucket = bucket_name, Key = f'{path}/{filename}-dev.pkl')
# body = obj['Body'].read()
# dev = pickle.loads(body)

# obj = s3.get_object(Bucket = bucket_name, Key = f'{path}/{filename}-test.pkl')
# body = obj['Body'].read()
# test = pickle.loads(body)

# print("Wall time: {:.2f} seconds".format(time.time() - start))
# print(f"Train: {len(train)}")
# print(f"Dev: {len(dev)}")
# print(f"Test: {len(test)}")

# print(f"Sanity check: {np.unique(train[map_column][0]),}")

In [None]:
# # load train, dev, and test sets
# # from local instance
# start = time.time()

# with open(f'{path}/{filename}-train{option}.pkl', "rb") as fh:
#     train = pickle.load(fh)
# with open(f'{path}/{filename}-dev.pkl', "rb") as fh:
#     dev = pickle.load(fh)
# with open(f'{path}/{filename}-test.pkl', "rb") as fh:
#     test = pickle.load(fh)

# print("Wall time: {:.2f} seconds".format(time.time() - start))
# print(f"Train: {len(train)}")
# print(f"Dev: {len(dev)}")
# print(f"Test: {len(test)}")

#### Quick EDA

In [None]:
# train failure type distribution
helper.defect_distribution(train, note='Train Set')

In [None]:
# dev failure type distribution
helper.defect_distribution(dev, note='Dev Set')

In [None]:
# test failure type distribution
helper.defect_distribution(test, note='Test Set')

#### Data set-up

In [None]:
# prepare inputs
start = time.time()

x_train = np.stack(train[map_column])
x_val = np.stack(dev[map_column])
x_test = np.stack(test[map_column])

print("Wall time: {:.2f} seconds".format(time.time() - start))
# sanity check
# expected: (#rows, xdim, ydim)
print(x_train.shape)

In [None]:
# expand tensor and repeat 3 times
# images in greyscale, so no channel dimension
start = time.time()

x_train = tf.expand_dims(x_train, axis=3, name=None)
x_val = tf.expand_dims(x_val, axis=3, name=None)
x_test = tf.expand_dims(x_test, axis=3, name=None)

x_train = tf.repeat(x_train, 3, axis=3)
x_val = tf.repeat(x_val, 3, axis=3)
x_test = tf.repeat(x_test, 3, axis=3)

print("Wall time: {:.2f} seconds".format(time.time() - start))
# sanity check
# expected: TensorShape([#rows, xdim, ydim, 3])
x_train.shape

In [None]:
# prepare labels for supervised learning
# note: make sure labels are integers if using sparse categorical cross entropy
start = time.time()

y_train = np.asarray(train['detectLabels']).astype(np.uint8)
y_val = np.asarray(dev['detectLabels']).astype(np.uint8)
y_test = np.asarray(test['detectLabels']).astype(np.uint8)

print("Wall time: {:.2f} seconds".format(time.time() - start))
# sanity check
# expected: type = int, min = 0, max = 7
print(type(y_train[0]))
print(min(y_train), min(y_val), min(y_test))
print(max(y_train), max(y_val), max(y_test))

#### Model

In [None]:
def inception(x,
              filters_1x1,
              filters_3x3_reduce,
              filters_3x3,
              filters_5x5_reduce,
              filters_5x5,
              filters_pool):
  path1 = layers.Conv2D(filters_1x1, (1, 1), padding='same', activation='relu')(x)

  path2 = layers.Conv2D(filters_3x3_reduce, (1, 1), padding='same', activation='relu')(x)
  path2 = layers.Conv2D(filters_3x3, (1, 1), padding='same', activation='relu')(path2)

  path3 = layers.Conv2D(filters_5x5_reduce, (1, 1), padding='same', activation='relu')(x)
  path3 = layers.Conv2D(filters_5x5, (1, 1), padding='same', activation='relu')(path3)

  path4 = layers.MaxPool2D((3, 3), strides=(1, 1), padding='same')(x)
  path4 = layers.Conv2D(filters_pool, (1, 1), padding='same', activation='relu')(path4)

  return tf.concat([path1, path2, path3, path4], axis=3)

In [None]:
# REMOVE RESIZING IF USING 224x224

inp = layers.Input(shape=(x, y, 3))
input_tensor = layers.experimental.preprocessing.Resizing(224, 224, interpolation="neighbor", input_shape=x_train.shape[1:])(inp)

x = layers.Conv2D(64, 7, strides=2, padding='same', activation='relu')(input_tensor)
x = layers.MaxPooling2D(3, strides=2)(x)

x = layers.Conv2D(64, 1, strides=1, padding='same', activation='relu')(x)
x = layers.Conv2D(192, 3, strides=1, padding='same', activation='relu')(x)

x = layers.MaxPooling2D(3, strides=2)(x)

x = inception(x,
              filters_1x1=64,
              filters_3x3_reduce=96,
              filters_3x3=128,
              filters_5x5_reduce=16,
              filters_5x5=32,
              filters_pool=32)

x = inception(x,
              filters_1x1=128,
              filters_3x3_reduce=128,
              filters_3x3=192,
              filters_5x5_reduce=32,
              filters_5x5=96,
              filters_pool=64)

x = layers.MaxPooling2D(3, strides=2)(x)

x = inception(x,
              filters_1x1=192,
              filters_3x3_reduce=96,
              filters_3x3=208,
              filters_5x5_reduce=16,
              filters_5x5=48,
              filters_pool=64)

aux1 = layers.AveragePooling2D((5, 5), strides=3)(x)
aux1 = layers.Conv2D(128, 1, padding='same', activation='relu')(aux1)
aux1 = layers.Flatten()(aux1)
aux1 = layers.Dense(1024, activation='relu')(aux1)
aux1 = layers.Dropout(0.7)(aux1)
aux1 = layers.Dense(10, activation='softmax')(aux1)

x = inception(x,
              filters_1x1=160,
              filters_3x3_reduce=112,
              filters_3x3=224,
              filters_5x5_reduce=24,
              filters_5x5=64,
              filters_pool=64)

x = inception(x,
              filters_1x1=128,
              filters_3x3_reduce=128,
              filters_3x3=256,
              filters_5x5_reduce=24,
              filters_5x5=64,
              filters_pool=64)

x = inception(x,
              filters_1x1=112,
              filters_3x3_reduce=144,
              filters_3x3=288,
              filters_5x5_reduce=32,
              filters_5x5=64,
              filters_pool=64)

aux2 = layers.AveragePooling2D((5, 5), strides=3)(x)
aux2 = layers.Conv2D(128, 1, padding='same', activation='relu')(aux2)
aux2 = layers.Flatten()(aux2)
aux2 = layers.Dense(1024, activation='relu')(aux2)
aux2 = layers.Dropout(0.7)(aux2)
aux2 = layers.Dense(10, activation='softmax')(aux2)

x = inception(x,
              filters_1x1=256,
              filters_3x3_reduce=160,
              filters_3x3=320,
              filters_5x5_reduce=32,
              filters_5x5=128,
              filters_pool=128)

x = layers.MaxPooling2D(3, strides=2)(x)

x = inception(x,
              filters_1x1=256,
              filters_3x3_reduce=160,
              filters_3x3=320,
              filters_5x5_reduce=32,
              filters_5x5=128,
              filters_pool=128)

x = inception(x,
              filters_1x1=384,
              filters_3x3_reduce=192,
              filters_3x3=384,
              filters_5x5_reduce=48,
              filters_5x5=128,
              filters_pool=128)

x = layers.GlobalAveragePooling2D()(x)

x = layers.Dropout(0.4)(x)
out = layers.Dense(9, activation='softmax')(x)

In [None]:
model = Model(inputs = inp, outputs = [out, aux1, aux2])

In [None]:
model.compile(optimizer='adam', loss=[losses.sparse_categorical_crossentropy, losses.sparse_categorical_crossentropy, losses.sparse_categorical_crossentropy], loss_weights=[1, 0.3, 0.3], metrics=['accuracy'])

In [None]:
history = model.fit(x_train, [y_train, y_train, y_train], validation_data=(x_val, [y_val, y_val, y_val]), batch_size=64, epochs=10)

In [None]:
fig, axs = plt.subplots(2, 1, figsize=(15,15))

axs[0].plot(history.history['loss'])
axs[0].plot(history.history['val_loss'])
axs[0].title.set_text('Training Loss vs Validation Loss')
axs[0].set_xlabel('Epochs')
axs[0].set_ylabel('Loss')
axs[0].legend(['Train','Val'])

axs[1].plot(history.history['dense_4_accuracy'])
axs[1].plot(history.history['val_dense_4_accuracy'])
axs[1].title.set_text('Training Accuracy vs Validation Accuracy')
axs[1].set_xlabel('Epochs')
axs[1].set_ylabel('Accuracy')
axs[1].legend(['Train', 'Val'])

#### Model results

In [None]:
# save model to local instance
model.save(f'{model_path}/{model_id}-{data_id}{note}')

In [None]:
# save model to S3
s3_save_keras_model(model, f'{model_id}-{data_id}{note}')

In [None]:
# compute model results on test set
start = time.time()
results = model.evaluate(x_test, y_test)
print("Wall time: {:.2f} seconds".format(time.time() - start))
print(results)

In [None]:
# generate predictions for model analysis
start = time.time()
pred = model.predict(x_test)
y_pred = np.argmax(pred[0], axis=1)
predictions = [y_pred, pred]
print("Wall time: {} seconds".format(time.time() - start))

In [None]:
# save predictions
# save to local instance
with open(f'{result_path}/{model_id}-{data_id}{note}.pkl', "wb") as f:
    pickle.dump(predictions, f)

In [None]:
# plot confusion matrix
helper.plot_confusion_matrix(y_test, y_pred, mode='classify', normalize=True)

In [None]:
# plot confusion matrix counts
helper.plot_confusion_matrix(y_test, y_pred, mode='classify', normalize=False)