In [1]:
import numpy as np
import cv2
import matplotlib.pyplot as plt
import math
import os
from cnn_classes import Utils, CustomCNNColor, CustomCNNGrayScale
import pickle

In [2]:
def plot_image(img):
    fig = plt.figure(figsize=(10,8))
    plt.imshow(img, cmap='gray')
    plt.show()
    
def plot_image_color(img):
    fig = plt.figure(figsize=(10,8))
    plt.imshow(img)
    plt.show()

In [3]:
class Utils:
    def __init__(self):
        pass
    
    def iter_image_regions_conv(self, image, kernel_dim):
        k_w,k_h = kernel_dim, kernel_dim
        i_w,i_h = image.shape[1], image.shape[0]
        for i in range(i_h-k_h+1):
            for j in range(i_w-k_w+1):
                yield image[i:i+k_h, j:j+k_w], i, j

    def convolve(self, image, kernels, kernel_dim):
        imc = image.copy()
        # assume squares for kernel and input
        num_kernels = len(kernels)
        k_dim = kernel_dim
        output_img_dim = len(image[0])-k_dim+1

        output = np.zeros((num_kernels,output_img_dim,output_img_dim))

        for region, i, j in self.iter_image_regions_conv(imc, k_dim):
            conv = np.sum(region*kernels, axis=(1,2))
            for f in range(num_kernels):
                output[f,i,j] = conv[f]

        return output
    
    def convolve_color(self, image, kernels, kernel_dim):
        imc = image.copy()
        # assume squares for kernel and input
        num_kernels = len(kernels)
        k_dim = kernel_dim
        output_img_dim = len(image[0])-k_dim+1

        output = np.zeros((num_kernels,output_img_dim,output_img_dim))

        # image in shape of (x,y,channels)
        # kernels in shape (8, 3, k_dim, k_dim)
        for c in range(3):
            for region, i, j in self.iter_image_regions_conv(imc[:,:,c], k_dim):
                conv = np.sum(region*kernels[:,c,:,:], axis=(1,2))
                for f in range(num_kernels):
                    output[f,i,j] += (1/3)*conv[f]
            
        return output
    
    def iter_image_regions_pool(self, image, pool_dim):
        # pool dim will be 2 for this project
        p_w,p_h = pool_dim, pool_dim
        i_w,i_h = image.shape[1], image.shape[0]
        for i in range(i_h//p_h):
            for j in range(i_w//p_w):
                yield image[p_h*i:p_h*i+p_h, p_w*j:p_w*j+p_w], i, j

    def max_pool(self, channels, pool_dim):
        n, img_h, img_w = channels.shape
        output = np.zeros((n, img_h//pool_dim, img_w//pool_dim))
        for c in range(n):
            for region, i, j in self.iter_image_regions_pool(channels[c], pool_dim):
                max_reg = np.max(region)
                output[c, i, j] = max_reg
        return output
    
    
    def convolve_backprop(self, image, dOut, kernel_dim):
        d,h,w = dOut.shape
        dKernels = np.zeros((d, kernel_dim, kernel_dim))
        for img_region, i, j in self.iter_image_regions_conv(image, kernel_dim):
            for f in range(d):
                dKernels[f,:,:] += dOut[f, i, j]*img_region
        return dKernels
    
    def convolve_backprop_color(self, image, dOut, kernel_dim):
        d,h,w = dOut.shape
        dKernels = np.zeros((d, 3, kernel_dim, kernel_dim))
        for c in range(3):
            for img_region, i, j in self.iter_image_regions_conv(image[:,:,c], kernel_dim):
                for f in range(d):
                    dKernels[f,c,:,:] += (1/3)*dOut[f, i, j]*img_region
        return dKernels
    
    def max_pool_backprop(self, channels, dInput, pool_dim):
        d,h,w = channels.shape
        dPool = np.zeros((d, h, w))
        for c in range(d):
            for region, i, j in self.iter_image_regions_pool(channels[:,:,c], pool_dim):
                # we get the original 2 by 2 region that produced
                max_val = np.max(region)
                inds = np.argwhere(region == max_val)
                dPool[c, 2*i+inds[:,0], 2*j+inds[:,1]] = dInput[c,i,j]
        return dPool

    def sigmoid(self, x):
        return 1/(1+np.exp(-x))
    
    def loss(self, y,y_pred):
        epsilon = 0.0001
        log_loss = y*np.log2(y_pred+epsilon) + (1-y)*np.log2(1-y_pred+epsilon)
        return -log_loss
    

In [4]:
class CustomCNNGrayScale:
    def __init__(self, epochs=50, lr=0.01):
        self.EPOCHS = epochs
        self.filters = np.random.rand(8,3,3)/9 
        self.w2_dim = 2048 
        self.w1_dim = 256
        self.W2 = np.random.rand(self.w1_dim, self.w2_dim).astype('float64')*np.sqrt(2/(self.w2_dim))
        self.W1 = np.random.rand(1, self.w1_dim).astype('float64')*np.sqrt(2/(self.w1_dim))
        self.LR = lr
        self.utils = Utils()
    
    def fit(self, X_train, y_train):
        for i in range(self.EPOCHS):
            avg_loss = []
            for img, y in zip(X_train, y_train):
                
                # =============== FORWARD-PROPIGATION ====================

                # CONVOLUTIONAL LAYER
                output = self.utils.convolve(img, self.filters, 3)
                CONV_OUTPUT = output.copy()

                output = self.utils.max_pool(output, 2)
                MAXPOOL_OUTPUT = output.copy()

                # LINEAR LAYER
                linear_input = output.flatten().reshape((1,-1))
                linear_output = linear_input.dot(self.W2.T)
                W2_OUT = linear_output.copy()

                linear_output = self.utils.sigmoid(self.W1.dot(linear_output.T))

                # =============== PREDICTION ====================
                
                pred = linear_output
                avg_loss.append(self.utils.loss(y, pred))

                # =============== BACK-PROPIGATION ====================

                # LINEAR LAYER
                dW1 = (pred-y).dot(W2_OUT)
                dW2 = ((pred-y).T.dot(self.W1)).T.dot(linear_input)
                dInput = ((pred-y).T.dot(self.W1)).dot(self.W2)

                self.W1 -= self.LR*dW1
                self.W2 -= self.LR*dW2

                # CONVOLUTIONAL LAYER
                IMAGE = img.copy()

                dInput_r = dInput.copy().reshape(MAXPOOL_OUTPUT.shape)
                dOut = self.utils.max_pool_backprop(CONV_OUTPUT, dInput_r, 2)
                dFilters = self.utils.convolve_backprop(IMAGE, dOut, 3)

                self.filters -= self.LR*dFilters
                
            avg_loss = np.array(avg_loss)
            print(f'EPOCH:{i} - Avg NLLoss: {np.sum(avg_loss)}')
            
    def predict(self, X):
        ''' 
        Return a probability that the individual is wearing a mask [0-1].
        The method can only accept one image (grayscale at a time)
        '''
        output = self.utils.convolve(X, self.filters, 3)
        CONV_OUTPUT = output.copy()

        output = self.utils.max_pool(output, 2)
        MAXPOOL_OUTPUT = output.copy()

        # LINEAR LAYER
        linear_input = output.flatten().reshape((1,-1))
        linear_output = linear_input.dot(self.W2.T)
        W2_OUT = linear_output.copy()

        linear_output = self.utils.sigmoid(self.W1.dot(linear_output.T))
        
        return linear_output

In [5]:
class CustomCNNColor:
    def __init__(self, epochs=50, lr=0.01):
        self.EPOCHS = epochs
        self.filters = np.random.rand(8,3,3,3)
        self.w2_dim = 2048 
        self.w1_dim = 256
        self.W2 = np.random.rand(self.w1_dim, self.w2_dim).astype('float64')*np.sqrt(2/(self.w2_dim))
        self.W1 = np.random.rand(1, self.w1_dim).astype('float64')*np.sqrt(2/(self.w1_dim))
        self.LR = lr
        self.utils = Utils()
    
    def fit(self, X_train, y_train):
        for i in range(self.EPOCHS):
            avg_loss = []
            for img, y in zip(X_train, y_train):
                
                # =============== FORWARD-PROPIGATION ====================

                # CONVOLUTIONAL LAYER
                output = self.utils.convolve_color(img, self.filters, 3)
                CONV_OUTPUT = output.copy()

                output = self.utils.max_pool(output, 2)
                MAXPOOL_OUTPUT = output.copy()

                # LINEAR LAYER
                linear_input = output.flatten().reshape((1,-1))
                linear_output = linear_input.dot(self.W2.T)
                W2_OUT = linear_output.copy()

                linear_output = self.utils.sigmoid(self.W1.dot(linear_output.T))

                # =============== PREDICTION ====================
                
                pred = linear_output
                avg_loss.append(self.utils.loss(y, pred))

                # =============== BACK-PROPIGATION ====================

                # LINEAR LAYER
                dW1 = (pred-y).dot(W2_OUT)
                dW2 = ((pred-y).T.dot(self.W1)).T.dot(linear_input)
                dInput = ((pred-y).T.dot(self.W1)).dot(self.W2)

                self.W1 -= self.LR*dW1
                self.W2 -= self.LR*dW2

                # CONVOLUTIONAL LAYER
                IMAGE = img.copy()

                dInput_r = dInput.copy().reshape(MAXPOOL_OUTPUT.shape)
                dOut = self.utils.max_pool_backprop(CONV_OUTPUT, dInput_r, 2)
                dFilters = self.utils.convolve_backprop_color(IMAGE, dOut, 3)

                self.filters -= self.LR*dFilters
                
            avg_loss = np.array(avg_loss)
            print(f'EPOCH:{i} - Avg NLLoss: {np.sum(avg_loss)}')
            
    def predict(self, X):
        ''' 
        Return a probability that the individual is wearing a mask [0-1].
        The method can only accept one image (grayscale at a time)
        '''
        output = self.utils.convolve_color(X, self.filters, 3)
        CONV_OUTPUT = output.copy()

        output = self.utils.max_pool(output, 2)
        MAXPOOL_OUTPUT = output.copy()

        # LINEAR LAYER
        linear_input = output.flatten().reshape((1,-1))
        linear_output = linear_input.dot(self.W2.T)
        W2_OUT = linear_output.copy()

        linear_output = self.utils.sigmoid(self.W1.dot(linear_output.T))
        
        return linear_output

#### Train Grayscale Model

In [31]:
model_gray = CustomCNNGrayScale(epochs=120, lr=0.005)

In [32]:
def return_labeled_data_gray():
    TARGET_NO_MASK_DIR="../images_gray_transformed_no_mask"
    TARGET_MASK_DIR="../images_gray_transformed_mask"
    # no mask
    data_list = []
    target_list = []
    for filename in os.listdir(TARGET_NO_MASK_DIR):
        img = cv2.imread(f'{TARGET_NO_MASK_DIR}/{filename}', cv2.IMREAD_GRAYSCALE)
        data_list.append(img)
        target_list.append(0.0)
        
    # mask
    for filename in os.listdir(TARGET_MASK_DIR):
        img = cv2.imread(f'{TARGET_MASK_DIR}/{filename}', cv2.IMREAD_GRAYSCALE)
        data_list.append(img)
        target_list.append(1.0)
    return data_list, target_list

In [33]:
# Grayscale Data
data_raw, target_raw = return_labeled_data_gray()
data_raw, target_raw = np.array(data_raw)/255, np.array(target_raw)

In [34]:
total_idxs = np.random.permutation(np.arange(0, len(data_raw), 1))
split_val_idx = int(np.floor(len(total_idxs)*0.7))
split_test_idx = int(np.floor(len(total_idxs)*0.8))
train_idxs, val_idxs, test_idxs = total_idxs[:split_val_idx], total_idxs[split_val_idx:split_test_idx], total_idxs[split_test_idx:]

In [35]:
X_train, y_train = data_raw[train_idxs], target_raw[train_idxs] 
X_val, y_val = data_raw[val_idxs], target_raw[val_idxs]
X_test, y_test = data_raw[test_idxs], target_raw[test_idxs] 

In [36]:
full_X, full_y = np.vstack([X_train, X_test, X_val]), np.hstack([y_train, y_test, y_val])

In [37]:
# train model on all available data to increase performance
model_gray.fit(full_X, full_y)
# model_gray.fit(X_train, y_train)

EPOCH:0 - Avg NLLoss: 440.0773011314672
EPOCH:1 - Avg NLLoss: 410.2858299219108
EPOCH:2 - Avg NLLoss: 406.48767008369236
EPOCH:3 - Avg NLLoss: 394.589003555824
EPOCH:4 - Avg NLLoss: 353.9470503733768
EPOCH:5 - Avg NLLoss: 301.1648711543754
EPOCH:6 - Avg NLLoss: 263.9452945845441
EPOCH:7 - Avg NLLoss: 234.15060251415116
EPOCH:8 - Avg NLLoss: 214.58603019863943
EPOCH:9 - Avg NLLoss: 202.590710562588
EPOCH:10 - Avg NLLoss: 194.17975477169472
EPOCH:11 - Avg NLLoss: 187.0940200214821
EPOCH:12 - Avg NLLoss: 180.57227586638558
EPOCH:13 - Avg NLLoss: 174.5303262461171
EPOCH:14 - Avg NLLoss: 168.9898246318062
EPOCH:15 - Avg NLLoss: 163.93088530364824
EPOCH:16 - Avg NLLoss: 159.30202791748738
EPOCH:17 - Avg NLLoss: 155.03386085903887
EPOCH:18 - Avg NLLoss: 151.07110880778112
EPOCH:19 - Avg NLLoss: 147.37343121057899
EPOCH:20 - Avg NLLoss: 143.900935981181
EPOCH:21 - Avg NLLoss: 140.6253660724864
EPOCH:22 - Avg NLLoss: 137.52582904873515
EPOCH:23 - Avg NLLoss: 134.57894807717105
EPOCH:24 - Avg NL

In [38]:
for i in range(len(y_test)):
    pred = model_gray.predict(X_test[i])
    print(pred, y_test[i])

[[0.99930861]] 1.0
[[5.8759144e-07]] 0.0
[[0.00045053]] 0.0
[[0.9664033]] 1.0
[[0.99879599]] 1.0
[[0.99960912]] 1.0
[[1.1478377e-10]] 0.0
[[2.11424567e-10]] 0.0
[[0.96215089]] 1.0
[[0.99984222]] 1.0
[[0.99881331]] 1.0
[[2.37356282e-09]] 0.0
[[0.99998432]] 1.0
[[0.76889924]] 1.0
[[3.01311175e-07]] 0.0
[[0.99997312]] 1.0
[[1.56699263e-06]] 0.0
[[0.99999978]] 1.0
[[0.00043681]] 0.0
[[1.]] 1.0
[[1.88997944e-10]] 0.0
[[0.99999997]] 1.0
[[0.95939119]] 1.0
[[2.15357523e-06]] 0.0
[[0.99999967]] 1.0
[[0.97215548]] 1.0
[[0.99995469]] 1.0
[[0.96700182]] 1.0
[[0.99999999]] 1.0
[[3.66648684e-14]] 0.0
[[8.72291656e-05]] 0.0
[[1.]] 1.0
[[4.26237939e-10]] 0.0
[[2.80119562e-12]] 0.0
[[0.9999988]] 1.0
[[0.99999259]] 1.0
[[3.50140778e-12]] 0.0
[[0.99998519]] 1.0
[[4.17502021e-05]] 0.0
[[0.99999515]] 1.0
[[4.85756921e-12]] 0.0
[[7.2958002e-11]] 0.0
[[5.09653578e-08]] 0.0
[[0.99942877]] 1.0
[[0.97450193]] 1.0
[[3.92682802e-10]] 0.0
[[6.96215954e-06]] 0.0
[[7.84553635e-11]] 0.0
[[0.99742591]] 1.0
[[4.322979

#### Export Trained Model With Pickle

In [39]:
model_name = './Trained Models/CNN_GRAY_CUSTOM_V3'

In [40]:
with open(model_name, 'wb') as cnn_file:
    pickle.dump(model_gray, cnn_file)

In [41]:
with open(model_name, 'rb') as cnn_file:
    m = pickle.load(cnn_file)

#### Train Color Model
- **Important Note, I had to use a different initialization for the weights! - No Xavier Init for Filters**

In [42]:
model_color = CustomCNNColor(epochs=120, lr=0.005)

In [43]:
def return_labeled_data_color():
    TARGET_NO_MASK_DIR="../images_color_transformed_no_mask"
    TARGET_MASK_DIR="../images_color_transformed_mask"
    # no mask
    data_list = []
    target_list = []
    for filename in os.listdir(TARGET_NO_MASK_DIR):
        img = cv2.imread(f'{TARGET_NO_MASK_DIR}/{filename}')
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        data_list.append(img)
        target_list.append(0.0)
        
    # mask
    for filename in os.listdir(TARGET_MASK_DIR):
        img = cv2.imread(f'{TARGET_MASK_DIR}/{filename}')
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        data_list.append(img)
        target_list.append(1.0)
    return data_list, target_list

In [44]:
#  Color Data
data_raw, target_raw = return_labeled_data_color()
data_raw, target_raw = np.array(data_raw)/255, np.array(target_raw)

In [45]:
total_idxs = np.random.permutation(np.arange(0, len(data_raw), 1))
split_val_idx = int(np.floor(len(total_idxs)*0.7))
split_test_idx = int(np.floor(len(total_idxs)*0.8))
train_idxs, val_idxs, test_idxs = total_idxs[:split_val_idx], total_idxs[split_val_idx:split_test_idx], total_idxs[split_test_idx:]

In [46]:
X_train, y_train = data_raw[train_idxs], target_raw[train_idxs] 
X_val, y_val = data_raw[val_idxs], target_raw[val_idxs]
X_test, y_test = data_raw[test_idxs], target_raw[test_idxs] 

In [47]:
full_X, full_y = np.vstack([X_train, X_test, X_val]), np.hstack([y_train, y_test, y_val])

In [48]:
model_color.fit(full_X, full_y)

  return 1/(1+np.exp(-x))


EPOCH:0 - Avg NLLoss: 792.5850033552672
EPOCH:1 - Avg NLLoss: 331.23277718108113
EPOCH:2 - Avg NLLoss: 303.2437975848765
EPOCH:3 - Avg NLLoss: 282.15484965164774
EPOCH:4 - Avg NLLoss: 266.6020409979815
EPOCH:5 - Avg NLLoss: 254.80184978067913
EPOCH:6 - Avg NLLoss: 245.51601351156876
EPOCH:7 - Avg NLLoss: 237.9417412302996
EPOCH:8 - Avg NLLoss: 231.54485054378756
EPOCH:9 - Avg NLLoss: 225.9765967029536
EPOCH:10 - Avg NLLoss: 221.0144233337365
EPOCH:11 - Avg NLLoss: 216.51592296366596
EPOCH:12 - Avg NLLoss: 212.38666313336864
EPOCH:13 - Avg NLLoss: 208.56017167868947
EPOCH:14 - Avg NLLoss: 204.98764686783238
EPOCH:15 - Avg NLLoss: 201.6319479033079
EPOCH:16 - Avg NLLoss: 198.4648424145896
EPOCH:17 - Avg NLLoss: 195.4643333035174
EPOCH:18 - Avg NLLoss: 192.61282605706847
EPOCH:19 - Avg NLLoss: 189.8955842325327
EPOCH:20 - Avg NLLoss: 187.30052033627777
EPOCH:21 - Avg NLLoss: 184.81660055335493
EPOCH:22 - Avg NLLoss: 182.43462221510492
EPOCH:23 - Avg NLLoss: 180.14678695687832
EPOCH:24 - A

In [50]:
for i in range(len(y_test)):
    pred = model_color.predict(X_test[i])
    print(pred, y_test[i])

[[0.99958607]] 1.0
[[0.99998789]] 1.0
[[0.22092804]] 0.0
[[0.50613071]] 0.0
[[0.00223364]] 0.0
[[0.06678568]] 0.0
[[0.99985642]] 1.0
[[0.01404214]] 0.0
[[0.99811932]] 1.0
[[0.98288379]] 1.0
[[0.99999721]] 1.0
[[0.00174473]] 0.0
[[0.9999793]] 1.0
[[0.78454696]] 0.0
[[6.64037406e-05]] 0.0
[[0.99831809]] 1.0
[[0.40572581]] 0.0
[[0.00044154]] 0.0
[[0.99938902]] 1.0
[[0.00289116]] 0.0
[[0.99999997]] 1.0
[[0.99986641]] 1.0
[[0.99999997]] 1.0
[[0.97941156]] 1.0
[[0.00254331]] 0.0
[[0.9999089]] 1.0
[[0.99999999]] 1.0
[[0.99999992]] 1.0
[[1.]] 1.0
[[0.87713093]] 0.0
[[0.99999535]] 1.0
[[0.99999997]] 1.0
[[0.00036658]] 0.0
[[0.99999855]] 1.0
[[0.99936914]] 1.0
[[0.99999975]] 1.0
[[0.17497472]] 0.0
[[0.99958015]] 1.0
[[0.927349]] 0.0
[[0.00184681]] 0.0
[[0.99994527]] 1.0
[[0.99999751]] 1.0
[[0.99996593]] 1.0
[[0.99999957]] 1.0
[[0.99900371]] 1.0
[[0.20622683]] 0.0
[[0.9999831]] 1.0
[[0.9999707]] 1.0
[[0.99063662]] 1.0
[[0.01472919]] 0.0
[[0.78122828]] 0.0
[[0.99997971]] 1.0
[[0.99999996]] 1.0
[[0

#### Export Trained Model With Pickle

In [51]:
model_name = './Trained Models/CNN_COLOR_CUSTOM_V3'

In [52]:
with open(model_name, 'wb') as cnn_file:
    pickle.dump(model_color, cnn_file)

In [53]:
with open(model_name, 'rb') as cnn_file:
    m = pickle.load(cnn_file)