# <span style='background :yellow' > MIDAS INTERNSHIP TASK 2 ( Part 1 ) </span>

#### By: Yashika Khurana

### Importing libraries

In [12]:
from PIL import Image
import cv2
import os
from PIL import ImageOps
import numpy as np
import matplotlib.pyplot as plt
import math
import pandas as pd
import keras

### Please ignore this dictionary for the moment. It was used later.

In [13]:
dictionary={
    "001":"0",
    "002":"1",
    "003":"2",
    "004":"3",
    "005":"4",
    "006":"5",
    "007":"6",
    "008":"7",
    "009":"8",
    "010":"9",
    "011":"A",
    "012":"B",
    "013":"C",
    "014":"D",
    "015":"E",
    "016":"F",
    "017":"G",
    "018":"H",
    "019":"I",
    "020":"J",
    "021":"K",
    "022":"L",
    "023":"M",
    "024":"N",
    "025":"O",
    "026":"P",
    "027":"Q",
    "028":"R",
    "029":"S",
    "030":"T",
    "031":"U",
    "032":"V",
    "033":"W",
    "034":"X",
    "035":"Y",
    "036":"Z",
    "037":"a",
    "038":"b",
    "039":"c",
    "040":"d",
    "041":"e",
    "042":"f",
    "043":"g",
    "044":"h",
    "045":"i",
    "046":"j",
    "047":"k",
    "048":"l",
    "049":"m",
    "050":"n",
    "051":"o",
    "052":"p",
    "053":"q",
    "054":"r",
    "055":"s",
    "056":"t",
    "057":"u",
    "058":"v",
    "059":"w",
    "060":"x",
    "061":"y",
    "062":"z"   
}

### Pre-processing : 
The pre-processing has been done in accordance with the pre-processing performed with the MNIST dataset to convert it into standard form.

- Extracting images from the file, appending them to data
- Extracting labels from the image names
- Converting images to (28,28)

#### The following steps in bullets have been taken from Medium blogs 

- Inverting image color
- Normalizing
- Removing rows that are completely zero (i.e. black) to extract only the digit part. So that the model doesn't learm extra features.
- Fitting image in a 20 rows and columns 
- Padding images to fit (28,28) size

 

In [14]:
data=[]
labels=[]

for i in os.listdir("train"):
    if str(i)==".DS_Store": # DS_Store file gets created automatically, if-condition helps ignore it
        pass
    else:
    
        for j in os.listdir("train/"+str(i)):
    
            link="train/"+str(i)+"/"+str(j)
         
            img=cv2.imread(link)
            img = cv2.resize(255-img, (28, 28))
            img=img[:,:,-1]
            img = img / 255.0
            while np.sum(img[0]) == 0:
                img = img[1:]

            while np.sum(img[:,0]) == 0:
                img = np.delete(img,0,1)

            while np.sum(img[-1]) == 0:
                img = img[:-1]

            while np.sum(img[:,-1]) == 0:
                img = np.delete(img,-1,1)

            rows,cols = img.shape
            
            if rows > cols:
                factor = 20.0/rows
                rows = 20
                cols = int(round(cols*factor))
                gray = cv2.resize(img, (cols,rows))
            else:
                factor = 20.0/cols
                cols = 20
                rows = int(round(rows*factor))
                gray = cv2.resize(img, (cols, rows))
              
            colsPadding = (int(math.ceil((28-cols)/2.0)),int(math.floor((28-cols)/2.0)))
            rowsPadding = (int(math.ceil((28-rows)/2.0)),int(math.floor((28-rows)/2.0)))
            imgf = np.lib.pad(gray,(rowsPadding,colsPadding),'constant')
            imgf -= imgf.min() 
            imgf /= imgf.max()
            imgf *= 255 # [0, 255] range
            
            data.append(imgf)
            
           
            labels.append(str(i)[7:9])
            
            
        

In [15]:
dff=pd.DataFrame()

In [16]:
dff["image"]=data

In [17]:
dff["label"]=labels

### Converting data to array

In [18]:
data=np.asarray(data)

In [19]:
data.shape

(2480, 28, 28)

### Reshaping data and labels before sending to the model

In [20]:
data=data.reshape(len(data),28,28,1)

In [21]:
data.shape

(2480, 28, 28, 1)

In [22]:
labels=np.asarray(labels)

In [23]:
labels.shape

(2480,)

### Converting labels to unint8 type

In [24]:
labels=labels.astype("uint8")

In [25]:
type(labels[0])

numpy.uint8

### Shuffling the data 

In [26]:
from sklearn.utils import shuffle


In [27]:
data=list(data)

In [28]:
labels=list(labels)

In [29]:
import random
temp=list(zip(data,labels))
random.shuffle(temp)
data,labels=zip(*temp)

In [30]:
data=np.asarray(data)

In [31]:
labels=np.asarray(labels)

### Making a CNN model by using Keras Tuner for Hyperparameter tuning

In [38]:
def build_model(hp):  
  model = keras.Sequential([
    keras.layers.Conv2D(
        filters=hp.Int('conv_1_filter', min_value=32, max_value=128, step=16),
        kernel_size=hp.Choice('conv_1_kernel', values = [3,5]),
        activation='relu',
        input_shape=(28,28,1)
    ),
    keras.layers.Conv2D(
        filters=hp.Int('conv_2_filter', min_value=32, max_value=64, step=16),
        kernel_size=hp.Choice('conv_2_kernel', values = [3,5]),
        activation='relu'
    ),
   # keras.layers.BatchNormalization(),
   # keras.layers.Activation(activation='relu'),
    keras.layers.MaxPooling2D(pool_size=(2, 2)),  #1
    keras.layers.Dropout(rate=0.2),  #2
      
    
   
    keras.layers.Flatten(), #3
    keras.layers.Dense( #4
        units=hp.Int('dense_1_units', min_value=32, max_value=128, step=16),
        activation='relu'
    ),
    keras.layers.Dense(63, activation='softmax')
  ])
  
  model.compile(optimizer=keras.optimizers.Adam(hp.Choice('learning_rate', values=[1e-2, 1e-3])),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
  
  return model

In [39]:
from kerastuner import RandomSearch
from kerastuner.engine.hyperparameters import HyperParameters

In [40]:
tuner_search=RandomSearch(build_model,
                          objective='val_accuracy',
                          max_trials=3,directory='output',project_name="yMidas")

INFO:tensorflow:Reloading Oracle from existing project output/yMidas/oracle.json
INFO:tensorflow:Reloading Tuner from output/yMidas/tuner0.json


In [32]:
data.shape

(2480, 28, 28, 1)

In [33]:
labels.shape

(2480,)

### Searching best parameters for 3 epochs

In [82]:
tuner_search.search(data,labels,epochs=3,validation_split=0.1)

Trial 20 Complete [00h 00m 07s]
val_accuracy: 0.27419355511665344

Best val_accuracy So Far: 0.6532257795333862
Total elapsed time: 00h 04m 39s
INFO:tensorflow:Oracle triggered exit


### Selecting best model
( Best model --> The model with best validation accuracy so far)

In [83]:
model=tuner_search.get_best_models(num_models=1)[0]

In [84]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 24, 24, 112)       2912      
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 20, 20, 48)        134448    
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 10, 10, 48)        0         
_________________________________________________________________
dropout (Dropout)            (None, 10, 10, 48)        0         
_________________________________________________________________
flatten (Flatten)            (None, 4800)              0         
_________________________________________________________________
dense (Dense)                (None, 112)               537712    
_________________________________________________________________
dense_1 (Dense)              (None, 63)                7

### Training the model

In [85]:
model.fit(data, labels, epochs=20, validation_split=0.1, initial_epoch=3)

Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7fe65253c4f0>

### Saving the model

In [86]:
model.save("midas_model.h5")

### Checking if model was successfully saved

In [1]:
from keras.models import load_model

In [2]:
model=load_model("midas_model.h5")



In [3]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 24, 24, 112)       2912      
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 20, 20, 48)        134448    
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 10, 10, 48)        0         
_________________________________________________________________
dropout (Dropout)            (None, 10, 10, 48)        0         
_________________________________________________________________
flatten (Flatten)            (None, 4800)              0         
_________________________________________________________________
dense (Dense)                (None, 112)               537712    
_________________________________________________________________
dense_1 (Dense)              (None, 63)                7

### Successfully saved