#Mask Detection
### In this section, you will implement the mask detector using different methods.


## Download the Dataset

Train data

In [None]:
!wget "https://drive.google.com/uc?export=download&id=1ouMOxaDNNr-U7E2DD3Y6wLlyeVb8qnr0" -O train_data.p
data_list = pickle.load(open("train_data.p", "rb"))

Test images

In [None]:
!wget "https://drive.google.com/uc?export=download&id=16CCS6DiAzFwCT1165ogvKVf9JwajOAHm" -O test_images.p
test_images = pickle.load(open("test_images.p", "rb"))

### Get a View of how dataset looks like.

Run the following code to check the dataset.

The given training dataset contains 4602 examples and example is a human face with a label (good or bad) to denote whether there is a mask on the face.

In [None]:
print("The size of the dataset is: ", len(data_list))
print("\n The Structure of the data: ", '\n', type(data_list[0]))
print(data_list[-1])
print("\n Show some of the samples")
f, axarr = plt.subplots(2,2, figsize=(10,10))
axarr[0, 0].imshow(data_list[1]['image'])
axarr[0, 0].title.set_text(data_list[0]['label'])
axarr[0, 1].imshow(data_list[2]['image'])
axarr[0, 1].title.set_text(data_list[2]['label'])
axarr[1, 0].imshow(data_list[-1]['image'])
axarr[1, 0].title.set_text(data_list[-1]['label'])
axarr[1, 1].imshow(data_list[-2]['image'])
axarr[1, 1].title.set_text(data_list[-3]['label'])
f.show()

## Preprocessing of the Dataset

### Distribution of the Dataset

In [None]:
### Count the number of different classes ###
Num_of_classes = 0
classes = {}
for sample in data_list:
  if sample['label'] not in classes:
    classes[sample['label']] = 1
  else:
    classes[sample['label']] += 1
print("Num. of Different Classes: ", classes)

In [None]:
### Distribution of the Size of the Images ###
image_size = []
for sample in data_list:
  image_data = sample['image']
  size = image_data.shape[0] * image_data.shape[1]
  image_size.append(size)

In [None]:
print("Median Size of Images: ", np.median(image_size))

### Image Preprocessing
For this section, we will preprocess the image data according to the following steps
*   Convert gray image to RGB
*   Resize the image to 128 * 128
*   Scale the value of each pixel from [0, 255] to [-1, 1]

You may find the opencv API is helpful.

In [None]:
### Images & Labels Split ###
label2int = {"good": 1, "bad": 0}
IMAGES = []
LABELS = []
for sample in data_list:
  IMAGES.append(sample['image'])
  LABELS.append(label2int[sample['label']])

In [None]:
and from sklearn.preprocessing import MinMaxScaler

### Preprocess the Image ###
def preprocess(image):
  '''TODO
  Preprocess the input image:
  1. convert the gray-scale (2D) image to RGB (3D)
  2. Resize the image to (128, 128, 3)
  3. Scale the value of each pixel from [0, 255] to [-1, 1]
  '''

  #Converts the gray-scale (2D) image to RGB (3D)
  image = cv.cvtColor(image.astype(np.uint8), cv.COLOR_GRAY2RGB)
  
  #Resizes the image to (128, 128, 3)
  image = cv.resize(image, (128, 128))

  #Scales the value of each pixel from [0, 255] to [-1, 1] using MinMaxScaler
  scaler = MinMaxScaler(feature_range=(-1,1))
  image = scaler.fit_transform(image.reshape(-1, image.shape[-1])).reshape(image.shape)

  return image

In [None]:
### Process the Raw Image ###
processed_IMAGES = []
for image in IMAGES:
  processed_IMAGES.append(preprocess(image))

In [None]:
#Tests with a sample image to see if pixel values have been properly scaled from [0,255] to [-1,1]
test = processed_IMAGES[0]
print(np.max(test)) #Should be 1
print(np.min(test)) #Should be -1

### Train / Validation Split
Now we split the dataset to train and dev set.

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(processed_IMAGES, LABELS, test_size=0.2)

In [None]:
#https://towardsdatascience.com/deeppicar-part-5-lane-following-via-deep-learning-d93acdce6110
from imgaug import augmenters
import random

class Augmenter:
  def __init__(self, XData, YData):
    self.XData = XData
    self.YData = YData
    self.run()

  def zoom(self, data):
    def yielder():
      for img in data:
          maxZoomLevel = random.uniform(1.1, 1.5)
          zoom = augmenters.Affine(scale=(1, maxZoomLevel)) 

          yield zoom.augment_image(img) 
          
    return np.array(list(yielder()))
  def flip(self, data):
    def yielder():
      for img in data:
          flipHorz = random.randint(0,1)
          flipVert = random.randint(0,1)
          if flipHorz and flipVert:
            img = cv.flip(img,0)
            yield cv.flip(img, 1)
          elif flipHorz and not flipVert:
            yield cv.flip(img, 0)
          elif not flipHorz and flipVert:
            yield cv.flip(img, 1)
          else:
            img = cv.flip(img,0)
            yield cv.flip(img, 1)

    return np.array(list(yielder()))
  def blur(self, data):
    def yielder():
      for img in data:
        blur = random.randint(0,1) 
        # if blur:
        kernel_size = random.randint(3, 30)
        yield cv.blur(img, (kernel_size,kernel_size))
        # else:
        #   yield img
    return np.array(list(yielder()))

  def run(self):
    Xdat1, Xdat2, Xdat3 = np.array_split(self.XData, 3)
    zoomedXData = self.zoom(Xdat1)
    XData = np.append(self.XData, zoomedXData, axis = 0)
    flippedXData = self.flip(Xdat2)
    XData = np.append(XData, flippedXData, axis = 0)

    blurredXData = self.blur(Xdat3)
    XData = np.append(XData, blurredXData, axis = 0)

    # XData = np.stack((self.XData, zoomedXData, flippedXData, blurredXData),axis = 0)
    YData = np.tile(self.YData,2)

    return XData, YData


X_train, y_train = Augmenter(X_train, y_train).run()

## Baseline - Perceptron
For the baseline model, we simply flatten the image data and utlize Perceptron as our classifier.

In [None]:
def flatten_generator(images):
  for image in images:
    yield image.flatten()

def flatten(images):
  '''TODO: Flatten the Image Data
  Input: (128, 128, 3)
  Output: (49152,)
  '''
  return list(flatten_generator(images))

X_train_flatten = flatten(X_train)
X_val_flatten = flatten(X_val)

In [None]:
from sklearn.linear_model import Perceptron
### TODO: Use Perceptron to fit the training data ###
model = Perceptron()
model.fit(X_train_flatten, y_train)
print(model.score(X_train_flatten, y_train))

### Evaluate the performance on the test images

In [None]:
!wget "https://drive.google.com/uc?export=download&id=16CCS6DiAzFwCT1165ogvKVf9JwajOAHm" -O test_images.p
test_images = pickle.load(open("test_images.p", "rb"))

You will need to preprocess these test images and predict the label for each images (1: good, 2: bad) 

and save your predictions in a list [0, 1, 1, ...]

In [None]:
###TODO: Use your perceptron model to predict on the test images###
prediction = []
prediction = model.predict(X_val_flatten)
#print(prediction)

# Do not change the following line, you need to submit perceptron.p to gradescope
# If you are using colab, it will show up on the left side, remember to download it
pickle.dump(prediction, open("perceptron.p", "wb"))

## Build Your Own CNN Model
To improve the performance of our mask detector, we plan to build a CNN model using keras. A suggested architecture is shown as follows, but feel free to modify it by adding or eliminating layers. The autograder is based on your final accuracy.


* Convolution with 32 filters with kernel size 7x7 followed by ReLU activation 
function, input shape (128, 128, 3);
* Max Pool with filter size/pool size = 7 and stride = 4;
* Convolution with 16 filters with kernel size 5x5 followed by ReLU activation function;
* Max Pool with filter size/pool size = 7 and stride = 4;
* Flatten layer to transform 3D layers to a single tensor/vector;
* Fully Connected with 64 neurons and ReLU activation function
* Fully Connected with 2 neurons and softmax activation function



In [None]:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, AveragePooling2D, Conv1D, MaxPooling1D
from keras.layers import Dense, Activation, Dropout, Flatten

In [None]:
model = Sequential()
num_classes = 2

# # TODO: Conv1
model.add(Conv2D(32, 7, activation='relu', input_shape=(128, 128, 3)))
model.add(MaxPooling2D(pool_size=(7, 7), strides = 4))

# # TODO: Conv2
model.add(Conv2D(16, 5, activation='relu'))
model.add(MaxPooling2D(pool_size=(7, 7), strides = 4))

# # TODO: Flatten the layer
model.add(Flatten())

# # TODO: Add the intermediate fully connected layers (Dense in keras)
model.add(Dense(64, activation='relu'))

# # TODO: Add the final fully connected layer with the softmax activation function
model.add(Dense(2, activation='softmax'))

In [None]:
model.summary()

In [None]:
model.compile(optimizer='sgd', loss='categorical_crossentropy', metrics=['accuracy']) 

### Ready For Training
Before training, we need to first make sure the training data is an array with the correct size of our model's input. We also need to convert our labels to one hot type.

In [None]:
def convert2onehot(labels):
  result = []
  for label in labels:
    if label == 1:
      result.append([1, 0])
    else:
      result.append([0, 1])
  return np.array(result)

X_train_array = np.array(X_train)
X_val_array = np.array(X_val)
y_train_onehot = convert2onehot(y_train)
y_val_onehot = convert2onehot(y_val)

In [None]:
model.fit(X_train_array, y_train_onehot, batch_size=16, epochs=10, validation_data=(X_val_array, y_val_onehot))

### Evaluate on test images
Again, evaluate the performance of your model on the test images and save your predictions into a list `[0, 1, 0, 0, ...]`.

Note that you need to convert the one-hot prediction to the original labels.

In [None]:
###TODO: Use the CNN model to predict on the test images###
prediction = []

predictions = model.predict(X_val_array)
for p in predictions:
  if p[0] >= p[1]:
    prediction.append(1)
  else:
    prediction.append(0)

print(prediction)

# Do not change the following line, you need to submit cnn.p to gradescope
# If you are using colab, it will show up on the left side, remember to download it
pickle.dump(prediction, open("cnn.p", "wb"))

## Advanced CNN Model using ImageNet
To further enhance the ability of R2D2, we decided to use some well designed network architectures, aka, Imagenet. There are bunch of imagenets embedded in keras and you could follow the examples from [official document](https://keras.io/api/applications/) and fine tune these model on our mask detection task.

Once you construct a excellent model, remember to save the model configuration and weight, as well as evaluate on the test images and upload to the leaderboard to compete with your classmates.

In [None]:
from keras.applications import InceptionResNetV2
from tensorflow.python.keras.layers import Flatten, Dense, Dropout
from keras.models import Model
from keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import EarlyStopping, ModelCheckpoint

net = InceptionResNetV2(include_top=False,
                        weights='imagenet',
                        input_tensor=None,
                        input_shape=(128, 128, 3),
                        pooling='max')
num_classes = 2

x = net.output
x = Flatten()(x)
x = Dropout(0.5)(x)
output_layer = Dense(num_classes, activation='softmax', name='softmax')(x)
net_final = Model(inputs=net.input, outputs=output_layer)
for layer in net_final.layers[:2]:
  layer.trainable = False
for layer in net_final.layers[2:]:
  layer.trainable = True
net_final.compile(optimizer='sgd', loss='categorical_crossentropy', metrics=['accuracy']) 
#net_final.compile(optimizer=Adam(lr=1e-5), loss='categorical_crossentropy', metrics=['accuracy']) 

datagen = ImageDataGenerator(
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    fill_mode='nearest',
    vertical_flip=True,
    horizontal_flip=True)
earlystopping_callback = EarlyStopping(monitor='val_accuracy', mode='max', patience=20, restore_best_weights=True)
bestweights_callback= ModelCheckpoint(filepath='/bestweightscheckpoint', monitor='val_accuracy', mode='max', save_best_only=True, save_weights_only=True)

net_final.fit(datagen.flow(X_train_array, y_train_onehot, batch_size=32, shuffle=True), steps_per_epoch=len(X_train_array) / 32, epochs=20, validation_data=(X_val_array, y_val_onehot), callbacks=[earlystopping_callback, bestweights_callback])
net_final.load_weights('/bestweightscheckpoint')


########################Alternative model###############################
# net = InceptionResNetV2(include_top=False,
#                         weights='imagenet',
#                         input_tensor=None,
#                         input_shape=(128, 128, 3),
#                         pooling='max') #'max' vs 'avg'
# num_classes = 2

# x = net.output
# x = Flatten()(x)
# x = Dropout(0.5)(x) #Put back in for 97.5
# output_layer = Dense(num_classes, activation='softmax', name='softmax')(x)
# net_final = Model(inputs=net.input, outputs=output_layer)
# for layer in net_final.layers[:2]:
#   layer.trainable = False
# for layer in net_final.layers[2:]:
#   layer.trainable = True
# net_final.compile(optimizer='sgd', loss='categorical_crossentropy', metrics=['accuracy']) 
# #net_final.compile(optimizer=Adam(lr=1e-5), loss='categorical_crossentropy', metrics=['accuracy']) 

# datagen = ImageDataGenerator(
#     rotation_range=40,
#     width_shift_range=0.2,
#     height_shift_range=0.2,
#     shear_range=0.2,
#     zoom_range=0.2,
#     fill_mode='nearest',
#     vertical_flip=True,
#     horizontal_flip=True)
# net_final.fit(datagen.flow(X_train_array, y_train_onehot, batch_size=32, shuffle=True), steps_per_epoch=len(X_train_array) / 32, epochs=20, validation_data=(X_val_array, y_val_onehot))

In [None]:
prediction = []
processed_test_images = []
for image in test_images:
  processed_test_images.append(preprocess(image))
X_test_array = np.array(processed_test_images)
X_test_array_flattened = flatten(X_test_array)

prediction_one_hot = net_final.predict(X_test_array)

def one_hot_to_labels(prediction):
  def yielder(prediction):
    for elem in prediction_one_hot:
      elem = np.rint(elem).astype(int)
      if all(elem == np.array([1,0])):
        yield 1
      elif all(elem == np.array([0,1])):
        yield 0
  return list(yielder(prediction))

prediction = one_hot_to_labels(prediction_one_hot)
print(len(prediction))
print(prediction)

# Do not change the following line, you need to submit best.p to gradescope
# If you are using colab, it will show up on the left side, remember to download it
pickle.dump(prediction, open("best.p", "wb"))