In [1]:
from IPython.display import Image
import csv
from PIL import Image as pil_image
import tensorflow as tf
from keras.preprocessing import image
import random
import numpy as np
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
import keras.callbacks
import keras.datasets
import joblib
import pickle

In [2]:
imgs = []
classes = []
with open("dataset/hasy-data-labels.csv") as csvfile:
    csvreader = csv.reader(csvfile)
    i = 0
    for row in csvreader:
        if i > 0:
            img = image.img_to_array(pil_image.open("dataset/"+row[0]))
            img /= 255.0
            imgs.append((row[0], row[2], img))
            classes.append(row[2])
        i += 1   

In [3]:
random.shuffle(imgs)
split_index = int(0.8*len(imgs))
train = imgs[:split_index]
test = imgs[split_index:]

In [4]:
train_input = np.asarray(list(map(lambda row: row[2], train)))
test_input = np.asarray(list(map(lambda row: row[2], test)))

train_output = np.asarray(list(map(lambda row: row[1], train)))
test_output = np.asarray(list(map(lambda row: row[1],test)))

In [5]:
label_encoder = LabelEncoder()
integer_encoded = label_encoder.fit_transform(classes)

onehot_encoder = OneHotEncoder(sparse_output=False)
integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)
onehot_encoder.fit(integer_encoded)

train_output_int = label_encoder.transform(train_output)
train_output = onehot_encoder.transform(train_output_int.reshape(len(train_output_int),1))
test_output_int = label_encoder.transform(test_output)
test_output = onehot_encoder.transform(test_output_int.reshape(len(test_output_int), 1))

num_classes = len(label_encoder.classes_)
print("Number of classes %d " % num_classes)

Number of classes 369 


In [6]:
model = Sequential()
model.add(Conv2D(32, kernel_size=(3,3), activation='relu', input_shape=np.shape(train_input[0])))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(32, kernel_size=(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Flatten())
model.add(Dense(128, activation='tanh'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 30, 30, 32)        896       
                                                                 
 max_pooling2d (MaxPooling2  (None, 15, 15, 32)        0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 13, 13, 32)        9248      
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 6, 6, 32)          0         
 g2D)                                                            
                                                                 
 flatten (Flatten)           (None, 1152)              0         
                                                                 
 dense (Dense)               (None, 128)               1

In [7]:
# tensorboard = keras.callbacks.TensorBoard(log_dir='./logs/mnist-style')

In [8]:
model.fit(np.concatenate((train_input, test_input)),
          np.concatenate((train_output, test_output)),
          batch_size=32, epochs=10, 
          verbose=2)

# score = model.evaluate(test_input, test_output, verbose=1)
# print(score)

Epoch 1/10
5258/5258 - 79s - loss: 1.8263 - accuracy: 0.5778 - 79s/epoch - 15ms/step
Epoch 2/10
5258/5258 - 77s - loss: 1.0992 - accuracy: 0.7027 - 77s/epoch - 15ms/step
Epoch 3/10
5258/5258 - 77s - loss: 0.9843 - accuracy: 0.7259 - 77s/epoch - 15ms/step
Epoch 4/10
5258/5258 - 77s - loss: 0.9217 - accuracy: 0.7396 - 77s/epoch - 15ms/step
Epoch 5/10
5258/5258 - 81s - loss: 0.8835 - accuracy: 0.7471 - 81s/epoch - 15ms/step
Epoch 6/10
5258/5258 - 82s - loss: 0.8549 - accuracy: 0.7526 - 82s/epoch - 16ms/step
Epoch 7/10
5258/5258 - 84s - loss: 0.8324 - accuracy: 0.7585 - 84s/epoch - 16ms/step
Epoch 8/10
5258/5258 - 83s - loss: 0.8159 - accuracy: 0.7614 - 83s/epoch - 16ms/step
Epoch 9/10
5258/5258 - 80s - loss: 0.7993 - accuracy: 0.7648 - 80s/epoch - 15ms/step
Epoch 10/10
5258/5258 - 76s - loss: 0.7898 - accuracy: 0.7669 - 76s/epoch - 14ms/step


<keras.src.callbacks.History at 0x1f49b88e5d0>

In [9]:
model.save("mathsymbols.model")
np.save('classes.npy', label_encoder.classes_)

INFO:tensorflow:Assets written to: mathsymbols.model\assets


INFO:tensorflow:Assets written to: mathsymbols.model\assets


In [10]:
# with open('model.pkl', 'wb') as file:
#     pickle.dump(model, file)