In [1]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Dropout
from keras.utils import np_utils
import pandas as pd
from keras.callbacks import ModelCheckpoint
import keras.backend as K

Using TensorFlow backend.


In [5]:
data = pd.read_csv('hindi_digits.csv', header=None)
dataset = np.array(data)

In [6]:
data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1015,1016,1017,1018,1019,1020,1021,1022,1023,1024
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,digit_0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,digit_0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,digit_0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,digit_0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,digit_0


In [7]:
len(data)

20000

In [9]:
data[1024].unique()

array(['digit_0', 'digit_1', 'digit_2', 'digit_3', 'digit_4', 'digit_5',
       'digit_6', 'digit_7', 'digit_8', 'digit_9'], dtype=object)

In [10]:
np.random.shuffle(dataset)
X = dataset[:, 0:1024]
Y = dataset[:, 1024]

X_train = X[0:18000, :]
X_train = X_train / 255
X_test = X[18000:, :]
X_test = X_test / 255

print(X_train.shape)
print(X_test.shape)

(18000, 1024)
(2000, 1024)


In [11]:
Y

array(['digit_8', 'digit_4', 'digit_0', ..., 'digit_5', 'digit_7',
       'digit_5'], dtype=object)

In [12]:
label_to_int = {'digit_0': 0, 'digit_1': 1, 'digit_2': 2, 'digit_3': 3, 'digit_4': 4, 'digit_5': 5, 'digit_6': 6,
                'digit_7': 7, 'digit_8': 8, 'digit_9': 9}

In [13]:
for i in range(len(Y)):
    Y[i] = label_to_int[Y[i]]

In [14]:
Y

array([8, 4, 0, ..., 5, 7, 5], dtype=object)

In [15]:
Y_train, Y_test = Y[:18000], Y[18000:]

In [16]:
y_train = np_utils.to_categorical(Y_train)
y_test = np_utils.to_categorical(Y_test)

In [17]:
y_train.shape

(18000, 10)

In [18]:
X_train = X_train.reshape(X_train.shape[0], 32, 32, 1)
X_test = X_test.reshape(X_test.shape[0], 32, 32, 1)

X_train.shape

(18000, 32, 32, 1)

In [19]:
num_classes = 10

In [20]:
model = Sequential()
model.add(Conv2D(filters=32, kernel_size=(5, 5), input_shape=(32, 32, 1), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'))
model.add(Conv2D(64, (5, 5), activation='relu'))
model.add(MaxPooling2D(pool_size=(5, 5), strides=(5, 5), padding='same'))
model.add(Flatten())
model.add(Dense(num_classes, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

filepath = "hindi_digits_model.h5"
checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')

In [21]:
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=8, batch_size=64, callbacks=[checkpoint])

Train on 18000 samples, validate on 2000 samples
Epoch 1/8

Epoch 00001: val_acc improved from -inf to 0.97550, saving model to hindi_digits_model.h5
Epoch 2/8

Epoch 00002: val_acc improved from 0.97550 to 0.98300, saving model to hindi_digits_model.h5
Epoch 3/8

Epoch 00003: val_acc improved from 0.98300 to 0.98800, saving model to hindi_digits_model.h5
Epoch 4/8

Epoch 00004: val_acc improved from 0.98800 to 0.99300, saving model to hindi_digits_model.h5
Epoch 5/8

Epoch 00005: val_acc did not improve from 0.99300
Epoch 6/8

Epoch 00006: val_acc did not improve from 0.99300
Epoch 7/8

Epoch 00007: val_acc did not improve from 0.99300
Epoch 8/8

Epoch 00008: val_acc improved from 0.99300 to 0.99400, saving model to hindi_digits_model.h5


<keras.callbacks.History at 0x24fd9632cf8>

In [22]:
model.save('hindi_digits_trained.h5')