In [1]:
import os
import numpy as np
import cv2 as cv
import math
import shutil
from PIL import Image
import matplotlib.pyplot as plt
import pathlib
import pickle
from zipfile import ZipFile 
import keras
from keras.models import Sequential
from keras.layers import Dense, Conv2D , MaxPool2D , Flatten , Dropout 
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import Adam
from sklearn.metrics import classification_report,confusion_matrix
import tensorflow as tf

In [2]:
def get_folder_length(directory):
    labels = ['0','1','2','3','4','5','6','7','8','9']
    length = []
    
    for label in labels:
        store = []
        path = os.path.join(directory,label)
        for img in os.listdir(path):
            store.append(img)
        length.append(len(store))
    return length

In [3]:
total = get_folder_length("data/")

In [4]:
print(total)

[20250, 20385, 20538, 21762, 21690, 20691, 20601, 19746, 20637, 20448]


In [5]:
def percentage_calculator(number,percent):
    return (math.floor((percent*(number/100))))

In [6]:
divide = []

for i in total:
    percent = 0
    percent = percentage_calculator(i,80)
    print(percent)
    divide.append(percent)

16200
16308
16430
17409
17352
16552
16480
15796
16509
16358


In [7]:
remaining = []

for i in range(10):
    remaining.append(total[i] - divide[i])
    i = i + 1 

In [8]:
print(total)
print(divide)
print(remaining)

[20250, 20385, 20538, 21762, 21690, 20691, 20601, 19746, 20637, 20448]
[16200, 16308, 16430, 17409, 17352, 16552, 16480, 15796, 16509, 16358]
[4050, 4077, 4108, 4353, 4338, 4139, 4121, 3950, 4128, 4090]


In [9]:
def copy_images(directory,destination_train,destination_test):
    labels = ['0','1','2','3','4','5','6','7','8','9']
    index = 0
    destination = destination_train
    length = 0
    
    for label in labels:
        path = os.path.join(directory,label)
        for img in os.listdir(path):
            shutil.copy(os.path.join(path,img),os.path.join(destination,label))
            length = length + 1
            if length == divide[index]:
                destination = destination_test
                print(index , " : is inserted in train")
            elif length == total[index]:
                print(index , " : is inserted in test")
                index = index + 1
                length = 0
                destination = destination_train
                break

In [10]:
#copy_images("data/","data/train/","data/test/")

In [11]:
labels = ['0','1','2','3','4','5','6','7','8','9']
img_width = img_height = 28

def get_data(directory):
    
    data = []
    #reading the elements of list labels
    for label in labels:
        #joining the given path and 
        path = os.path.join(directory,label)
        class_num = labels.index(label)
        for img in os.listdir(path):
            try:
                img_arr = cv.imread(os.path.join(path,img))
                resized_arr = cv.resize(img_arr,(img_width,img_height))
                data.append([resized_arr,class_num])
            except Exception as e:
                print(e)
    
    return np.array(data)

In [12]:
train = np.load("train.npy",allow_pickle=True)

In [13]:
val = np.load("validation.npy",allow_pickle=True)

In [14]:
#np.save('train.npy',train)
#np.save('validation.npy',val)

In [15]:
x_train = []
y_train = []
x_val = []
y_val = []
img_width = img_height = 28

for features,label in train:
#     print("Feature : ", features,"\nLabel:", label)
#     n = input("hehe")
    x_train.append(features)
    y_train.append(label)
for features,label in val:
    x_val.append(features)
    y_val.append(label)
    
x_train = np.array(x_train)/255
x_val = np.array(x_val)/255

y_train = np.array(y_train)/255
y_val = np.array(y_val)/255

x_train.reshape(-1, img_width, img_height, 1)
y_train = np.array(y_train)

x_val.reshape(-1, img_width, img_height, 1)
y_val = np.array(y_val)

In [16]:
datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range = 30,  # randomly rotate images in the range (degrees, 0 to 180)
        zoom_range = 0.2, # Randomly zoom image 
        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip = True,  # randomly flip images
        vertical_flip=False)  # randomly flip images


datagen.fit(x_train)

In [17]:
model = Sequential()
model.add(Conv2D(32,3,padding="same", activation="relu", input_shape=(28, 28,3)))
model.add(MaxPool2D())

model.add(Conv2D(32, 3, padding="same", activation="relu"))
model.add(MaxPool2D())

model.add(Conv2D(64, 3, padding="same", activation="relu"))
model.add(MaxPool2D())
model.add(Dropout(0.4))

model.add(Flatten())
model.add(Dense(128,activation="relu"))
model.add(Dense(2, activation="softmax"))

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 28, 28, 32)        896       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 14, 14, 32)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 14, 14, 32)        9248      
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 7, 7, 32)         0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 7, 7, 64)          18496     
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 3, 3, 64)         0

In [18]:
opt = Adam(learning_rate=0.000001)
model.compile(optimizer = opt , loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False) , metrics = ['accuracy'])

In [19]:
# try:
history = model.fit(x_train,y_train,epochs = 10 , validation_data = (x_val,y_val))
# except Exception as e:
#     print(e)

UsageError: Line magic function `%%time` not found.
