#### Classify model from Yu, et al with Deformable Convolution Layers
Implementation of the classification model from the Yu, et al [paper](https://drive.google.com/file/d/1nYl4w41CAcj8XwTEdVwcD5lVheUFIHVy/view?usp=sharing) with the first convolution layer in each block replaced by a deformable convolution layer.  Deformable convolution layer used from Junliangwangdhu [GitHub](https://github.com/Junliangwangdhu/WaferMap).

Difference(s) from paper:
- Used to classify all defects, including none
- Resized to 224x224, but applied n=2 morphological thinning instead of median filter

None is randomly undersampled to 30,000.

In [2]:
# !pip install pickle5

In [None]:
# !pip install pandas==1.1.5

In [3]:
# import libraries
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow import keras
#from tensorflow.keras import datasets, layers, models, losses, optimizers, regularizers, callbacks, Input, Model, Sequential
from keras import datasets, layers, models, losses, optimizers, regularizers, callbacks, Input, Model, Sequential

import os
import time
import numpy as np
import pandas as pd

from layers_train import ConvOffset2D_train

import helpers as helper
from keras_model_s3_wrapper import *

import boto3
import pickle5 as pickle
s3 = boto3.resource('s3')
bucket_name = 'wafer-capstone'
my_bucket = s3.Bucket(bucket_name)

Using TensorFlow backend.


In [4]:
tf.__version__

'1.15.5'

In [None]:
pd.__version__

In [None]:
# tf.config.list_physical_devices(device_type=None)

In [5]:
# specify variables
path = 'processed_data/vit'
filename = 'WM-clean-vit224thin-undersampled'

result_path = 'results_dcn'
model_id = 'wmdcn'
data_id = '224thin-undersampled'
note = '' # -optional

In [6]:
# load dataset
from io import BytesIO
start = time.time()

data_key = f'{path}/{filename}.npz'
data_obj = my_bucket.Object(data_key).get()['Body'].read()
data = np.load(BytesIO(data_obj), allow_pickle=True)

print('Wall time: {:.3f} seconds'.format(time.time() - start))
data.files

Wall time: 2.401 seconds


['ids', 'labels', 'dataset', 'thinmap']

#### Data set-up

In [7]:
# prepare inputs
start = time.time()

x_train = data['thinmap'][data['dataset']=='train']
x_val = data['thinmap'][data['dataset']=='dev']
x_test = data['thinmap'][data['dataset']=='test']

print('Wall time: {:.3f} seconds'.format(time.time() - start))
print(f'Train: {x_train.shape}')
print(f'Dev: {x_val.shape}')
print(f'Test: {x_test.shape}')

Wall time: 109.186 seconds
Train: (47863, 224, 224, 3)
Dev: (25942, 224, 224, 3)
Test: (25943, 224, 224, 3)


In [8]:
# prepare labels for supervised learning
# note: make sure labels are integers if using sparse categorical cross entropy
start = time.time()

y_train = data['labels'][data['dataset']=='train']
y_val = data['labels'][data['dataset']=='dev']
y_test = data['labels'][data['dataset']=='test']

print("Wall time: {:.2f} seconds".format(time.time() - start))
# sanity check
# expected: type = int, min = 0, max = 8
print(type(y_train[0]))
print(min(y_train), min(y_val), min(y_test))
print(max(y_train), max(y_val), max(y_test))

Wall time: 0.02 seconds
<class 'numpy.uint8'>
0 0 0
8 8 8


#### Model

In [9]:
# define model architecture

# data_augmentation = Sequential([
#   layers.experimental.preprocessing.RandomFlip(seed=424),
#   layers.experimental.preprocessing.RandomRotation(1, fill_mode='constant', interpolation='nearest', seed=424),
# ])

#model.add(layers.experimental.preprocessing.RandomFlip(seed=424))
#model.add(layers.experimental.preprocessing.RandomRotation(1, fill_mode='constant', interpolation='nearest', seed=424))

inputs = Input(shape=(224, 224, 3))
#x = data_augmentation(inputs)

x = ConvOffset2D_train(32, name='conv_1_offset')(inputs)
x = layers.Conv2D(32, 3, padding='same', activation='relu')(x)
x = layers.Conv2D(32, 3, padding='same', activation='relu')(x)
x = layers.MaxPooling2D(3)(x)

x = ConvOffset2D_train(64, name='conv_2_offset')(x)
x = layers.Conv2D(64, 3, padding='same', activation='relu')(x)
x = layers.Conv2D(64, 3, padding='same', activation='relu')(x)
x = layers.MaxPooling2D(3)(x)

x = ConvOffset2D_train(128, name='conv_3_offset')(x)
x = layers.Conv2D(128, 3, padding='same', activation='relu')(x)
x = layers.Conv2D(128, 3, padding='same', activation='relu')(x)
x = layers.Conv2D(128, 3, padding='same', activation='relu')(x)
x = layers.MaxPooling2D(3)(x)

x = layers.Flatten()(x)
x = layers.Dense(4096, activation='sigmoid', kernel_regularizer=regularizers.l2(0.000001))(x)
x = layers.Dense(1024, activation='sigmoid', kernel_regularizer=regularizers.l2(0.000001))(x)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(9, activation='softmax')(x)

model = Model(inputs=inputs, outputs=outputs)
model.summary()

Instructions for updating:
If using Keras pass *_constraint arguments to layers.


Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
conv_1_offset (ConvOffset2D_ (None, 224, 224, 3)       1728      
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 224, 224, 32)      896       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 224, 224, 32)      9248      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 74, 74, 32)        0         
_________________________________________________________________
conv_2_offset (ConvOffset2D_ (None, 74, 74, 32)        36864     
__________________________________________

In [10]:
# set model optimizer and metrics
opt = optimizers.Adam(learning_rate=0.0001)
model.compile(optimizer='adam', loss=losses.sparse_categorical_crossentropy, metrics=['accuracy'])

In [11]:
# run model
start = time.time()

history = model.fit(x_train, y_train, validation_data=(x_val, y_val), batch_size=128, epochs=30)

print("Wall time: {:.2f} seconds".format(time.time() - start))

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where

Train on 47863 samples, validate on 25942 samples
Epoch 1/30


InvalidArgumentError: Input to reshape is a tensor with 411041792 values, but the requested shape requires a multiple of 768
	 [[{{node conv_1_offset/Reshape_2}}]]

In [None]:
# visualize accuracy and loss history
fig, axs = plt.subplots(2, 1, figsize=(15,15))

axs[0].plot(history.history['loss'])
axs[0].plot(history.history['val_loss'])
axs[0].title.set_text('Training Loss vs Validation Loss')
axs[0].legend(['Train', 'Val'])

axs[1].plot(history.history['accuracy'])
axs[1].plot(history.history['val_accuracy'])
axs[1].title.set_text('Training Accuracy vs Validation Accuracy')
axs[1].legend(['Train', 'Val'])

#### Model results

In [None]:
# save model to S3
s3_save_keras_model(model, f'{model_id}-{data_id}{note}')

In [None]:
# compute model results on test set
start = time.time()
results = model.evaluate(x_test, y_test)
print("Wall time: {:.2f} seconds".format(time.time() - start))
print(results)

In [None]:
# generate predictions for model analysis
start = time.time()
y_pred = model.predict(x_test)
y_max = np.argmax(y_pred, axis=1).astype(np.uint8)
predictions = [y_max, y_pred]
print("Wall time: {:.2f} seconds".format(time.time() - start))

In [None]:
# save predictions to local instance
with open(f'{result_path}/{model_id}-{data_id}{note}.pkl', "wb") as f:
    pickle.dump(predictions, f)

In [None]:
# plot confusion matrix
helper.plot_confusion_matrix(y_test, y_max, mode='all', normalize=True)

In [None]:
# plot confusion matrix counts
helper.plot_confusion_matrix(y_test, y_max, mode='all', normalize=False)