# **Gujarati Character Recognition**

## Drive Mount

In [0]:
from google.colab import drive
drive.mount('/gdrive')

Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


### Data Copy

In [0]:
# %rm data.tar.xz
# %rm -rf dataAll
%cp /gdrive/'My Drive'/ML/GujaratiML/data.tar.xz /content/

In [0]:
import tarfile
with tarfile.open('data.tar.xz') as f:
    f.extractall('.')

In [0]:
%ls dataAll

[0m[01;34m0[0m/   [01;34m11[0m/  [01;34m14[0m/  [01;34m17[0m/  [01;34m2[0m/   [01;34m22[0m/  [01;34m25[0m/  [01;34m28[0m/  [01;34m30[0m/  [01;34m33[0m/  [01;34m36[0m/  [01;34m39[0m/  [01;34m41[0m/  [01;34m44[0m/  [01;34m5[0m/  [01;34m8[0m/
[01;34m1[0m/   [01;34m12[0m/  [01;34m15[0m/  [01;34m18[0m/  [01;34m20[0m/  [01;34m23[0m/  [01;34m26[0m/  [01;34m29[0m/  [01;34m31[0m/  [01;34m34[0m/  [01;34m37[0m/  [01;34m4[0m/   [01;34m42[0m/  [01;34m45[0m/  [01;34m6[0m/  [01;34m9[0m/
[01;34m10[0m/  [01;34m13[0m/  [01;34m16[0m/  [01;34m19[0m/  [01;34m21[0m/  [01;34m24[0m/  [01;34m27[0m/  [01;34m3[0m/   [01;34m32[0m/  [01;34m35[0m/  [01;34m38[0m/  [01;34m40[0m/  [01;34m43[0m/  [01;34m46[0m/  [01;34m7[0m/


## **Imports**

In [0]:
import os
import cv2
import numpy as np
from tqdm import tqdm
from random import shuffle
import pickle

import keras
from keras.utils import to_categorical
from keras.datasets import cifar10
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D

### Global Variables

In [0]:
IMG_SIZE = 150
LR = 1e-3
batch_size = 32
num_classes = 47
epochs = 10
data_augmentation = False
num_predictions = 20
model_name = 'trained_model.h5'

# Data Preprocessing



In [0]:
#%rm train_data.dat
path = "dataAll"

def create_train_data():
  training_data = []

  if os.path.exists("train_data.dat"):
    file = open('train_data.dat', 'rb')
    training_data = pickle.load(file)
    file.close()
    return training_data

  #img_count = 0
  for folder in tqdm(os.listdir(path)):
    p = path + "/" + folder
    files = os.listdir(p)
    for i in files:
      label = folder
      img_loc = p + "/" + i
      img = cv2.imread(img_loc,cv2.IMREAD_GRAYSCALE)
      img = cv2.resize(img, (IMG_SIZE,IMG_SIZE))
      training_data.append([np.array(img),np.array(label)])
      #img_count += 1
      #if img_count > 6000 : break

  shuffle(training_data)

  file = open('train_data.dat', 'wb')
  pickle.dump(training_data, file)  
  file.close()

  return training_data

train_data = create_train_data()
%ls

100%|██████████| 47/47 [00:10<00:00,  4.40it/s]


[0m[01;34mdataAll[0m/  data.tar.xz  [01;34msample_data[0m/  train_data.dat


In [0]:
%cp /content/train_data.dat /gdrive/'My Drive'/dataset/GujaratiML/
%ls /gdrive/'My Drive'/dataset/GujaratiML

 dataold.tar.xz  'ML assign using Keras preprocessed.7z'   trained_model.h5
 data.tar.xz      train_data.dat


## Data Split

In [0]:
train = train_data[:-6000]
test = train_data[-6000:]
# Training Data
x_train = np.array([i[0] for i in train]).reshape(-1,IMG_SIZE,IMG_SIZE,1)
y_train = [i[1] for i in train]

y_train = to_categorical(y_train,47)
print(y_train)

# Testing Data
x_test = np.array([i[0] for i in test]).reshape(-1,IMG_SIZE,IMG_SIZE,1)

y_test = [i[1] for i in test]
print(y_test)
y_test = to_categorical(y_test,47)
print(y_test)

[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 1. 0. 0.]]
[array('3', dtype='<U1'), array('4', dtype='<U1'), array('1', dtype='<U1'), array('27', dtype='<U2'), array('6', dtype='<U1'), array('45', dtype='<U2'), array('14', dtype='<U2'), array('30', dtype='<U2'), array('34', dtype='<U2'), array('9', dtype='<U1'), array('18', dtype='<U2'), array('38', dtype='<U2'), array('38', dtype='<U2'), array('33', dtype='<U2'), array('42', dtype='<U2'), array('1', dtype='<U1'), array('35', dtype='<U2'), array('10', dtype='<U2'), array('44', dtype='<U2'), array('1', dtype='<U1'), array('1', dtype='<U1'), array('15', dtype='<U2'), array('24', dtype='<U2'), array('46', dtype='<U2'), array('38', dtype='<U2'), array('3', dtype='<U1'), array('19', dtype='<U2'), array('8', dtype='<U1'), array('21', dtype='<U2'), array('29', dtype='<U2'), array('3', dtype='<U1'), array('2', dtype='<U1'), array('18', dtype='<U2'

## Design Model

In [0]:
model = Sequential()
model.add(Conv2D(64, (3, 3), padding='same',
                 input_shape=x_train.shape[1:]))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(128, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(Conv2D(128, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))

# Initiate RMSprop optimizer
#opt = keras.optimizers.rmsprop(lr=0.001, decay=1e-6)
opt = keras.optimizers.Adam(0.0005, beta_1=0.9, beta_2=0.999, amsgrad=True)
#opt = keras.optimizers.SGD(lr=0.01, momentum=0.0, nesterov=False)






Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


# Model Train

In [0]:
model.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255

if not data_augmentation:
    print('Not using data augmentation.')
    model.fit(x_train, y_train,
              batch_size=batch_size,
              epochs=epochs,
              validation_data=(x_test, y_test),
              shuffle=True)
else:
    print('Using real-time data augmentation.')
    # This will do preprocessing and realtime data augmentation:
    datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        zca_epsilon=1e-06,  # epsilon for ZCA whitening
        rotation_range=0,  # randomly rotate images in the range (degrees, 0 to 180)
        # randomly shift images horizontally (fraction of total width)
        width_shift_range=0.1,
        # randomly shift images vertically (fraction of total height)
        height_shift_range=0.1,
        shear_range=0.,  # set range for random shear
        zoom_range=0.,  # set range for random zoom
        channel_shift_range=0.,  # set range for random channel shifts
        # set mode for filling points outside the input boundaries
        fill_mode='nearest',
        cval=0.,  # value used for fill_mode = "constant"
        horizontal_flip=True,  # randomly flip images
        vertical_flip=False,  # randomly flip images
        # set rescaling factor (applied before any other transformation)
        rescale=None,
        # set function that will be applied on each input
        preprocessing_function=None,
        # image data format, either "channels_first" or "channels_last"
        data_format=None,
        # fraction of images reserved for validation (strictly between 0 and 1)
        validation_split=0.0)

    # Compute quantities required for feature-wise normalization
    # (std, mean, and principal components if ZCA whitening is applied).
    datagen.fit(x_train)

    model.fit_generator(datagen.flow(x_train, y_train,
                                     batch_size=batch_size),
                        epochs=epochs,
                        validation_data=(x_test, y_test),
                        workers=4)



Not using data augmentation.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where



Train on 27735 samples, validate on 6000 samples
Epoch 1/10





Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


## Results

In [0]:
scores = model.evaluate(x_test, y_test, verbose=1)
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])

Test loss: 0.6315880841414133
Test accuracy: 0.8595


## Saving the model

In [0]:
# Save model and weights
model.save(model_name)
%cp /content/trained_model.h5 /gdrive/'My Drive'/dataset/GujaratiML/
%ls /gdrive/'My Drive'/dataset/GujaratiML

 dataold.tar.xz  'ML assign using Keras preprocessed.7z'   trained_model.h5
 data.tar.xz      train_data.dat


# Model Check

## Data Intake

In [0]:
def create_alientest_data():
    alien_test = []
    for i in os.listdir(test_path):
        img_loc = test_path+ "/" + i
        print(img_loc)
        try:
            img = cv2.imread(img_loc,cv2.IMREAD_GRAYSCALE)
            img = cv2.resize(img, (IMG_SIZE,IMG_SIZE))
            alien_test.append([np.array(img),np.array("NULL")])
        except:
            print("Error at :"+ img_loc)
    return alien_test

## Labelling

In [0]:
def labelling(result):
  #print(result)
  for i in range(result.shape[0]):
    #print(i)
    answer = 0
    for j in range(result[i].shape[0]):
      if(result[i][j]==1):
        #print(answer)
        if answer == 0:
          print("ru")
        elif answer == 1:
          print("a")
        elif answer == 2:
          print("Aa")
        elif answer == 3:
          print("i")
        elif answer == 4:
          print("I")
        elif answer == 5:
          print("u")
        elif answer == 6:
          print("U")
        elif answer == 7:
          print("e")
        elif answer == 8:
          print("ai")
        elif answer == 9:
          print("o")
        elif answer == 10:
          print("au")
        elif answer == 11:
          print("am")
        elif answer == 12:
          print("ah")
        elif answer == 13:
          print("ka")
        elif answer == 14:
          print("kha")
        elif answer == 15:
          print("g")
        elif answer == 16:
          print("gh")
        elif answer == 17:
          print("ch")
        elif answer == 18:
          print("chh")
        elif answer == 19:
          print("j")
        elif answer == 20:
          print("jh")
        elif answer == 21:
          print("T")
        elif answer == 22:
          print("Th")
        elif answer == 23:
          print("D")
        elif answer == 24:
          print("Dh")
        elif answer == 25:
          print("N")
        elif answer == 27:
          print("th")
        elif answer == 28:
          print("d")
        elif answer == 29:
          print("dh")
        elif answer == 30:
          print("n")
        elif answer == 31:
          print("p")
        elif answer == 32:
          print("ph")
        elif answer == 33:
          print("b")
        elif answer == 34:
          print("bh")
        elif answer == 35:
          print("m")
        elif answer == 36:
          print("y")
        elif answer == 37:
          print("r")
        elif answer == 38:
          print("l")
        elif answer == 39:
          print("v")
        elif answer == 40:
          print("S")
        elif answer == 41:
          print("s")
        elif answer == 42:
          print("sh")
        elif answer == 43:
          print("h")
        elif answer == 44:
          print("al")
        elif answer == 45:
          print("ksh")
        elif answer == 46:
          print("gy")
        else:
          print("Other")
      answer += 1

## Model Setup


In [0]:
from keras.models import Sequential, load_model

IMG_SIZE = 150

model_path = 'trained_model.h5'
model = load_model(model_path)
test_path = '/gdrive/My Drive/dataset/GujaratiML/alien_test'

#opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6)
opt = keras.optimizers.Adam(0.0005, beta_1=0.9, beta_2=0.999, amsgrad=True)
model.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

## Model.Predict

In [0]:
train = create_alientest_data()
x_train = np.array([i[0] for i in train]).reshape(-1,IMG_SIZE,IMG_SIZE,1)
array = model.predict(x_train)
labelling(array)

/gdrive/My Drive/dataset/GujaratiML/alien_test/20f-2.png
/gdrive/My Drive/dataset/GujaratiML/alien_test/001a-3.png
/gdrive/My Drive/dataset/GujaratiML/alien_test/9t-20.png
/gdrive/My Drive/dataset/GujaratiML/alien_test/003e-4.png
ph
a
T
i
