In [243]:
import numpy as np
import pandas as pd
import tensorflow as tf
import os
import cv2
from PIL import Image
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from keras.preprocessing.image import ImageDataGenerator

## Preparing Data

In [244]:
train_dir = "Training/train" 
eval_dir = "Training/eval"  
classes = sorted(os.listdir(train_dir))
num_classes = len(classes)
image_size = (28, 28)  

In [245]:
def load_images_from_folder(folder):
    images = []
    labels = []
    for i, cls in enumerate(classes):
        cls_dir = os.path.join(folder, cls)
        for image_name in os.listdir(cls_dir):
            image = cv2.imread(os.path.join(cls_dir, image_name), cv2.IMREAD_GRAYSCALE)
            inverted_image = cv2.bitwise_not(image)  # Perform binary inversion
            inverted_image = cv2.resize(inverted_image, image_size)
            images.append(inverted_image)
            symbol = None
            if(i==0) : symbol = 10
            elif (i==1) : symbol = 11
            elif (i==2) : symbol = 12
            elif (i==3) : symbol = 13
            labels.append(symbol)
    return np.array(images), np.array(labels)

train_images, train_labels = load_images_from_folder(train_dir)
eval_images, eval_labels = load_images_from_folder(eval_dir)

In [246]:
def getData(images, labels) :
    flattened_images = images.reshape(images.shape[0], -1)  # Reshape each image to a 1D array
    result = np.concatenate((labels.reshape(-1, 1), flattened_images), axis=1)  # Concatenate labels with flattened images
    return result

In [247]:
# Convert the NumPy arrays to DataFrames
train_df = pd.DataFrame(getData(train_images, train_labels))
eval_df = pd.DataFrame(getData(eval_images, eval_labels))

In [248]:
# Merge the dataframes
merged_df = pd.concat([train_df, eval_df], axis=0)

# Reset the index of the concatenated DataFrame
merged_df = merged_df.reset_index(drop=True)


In [249]:

# Add column headers
column_names = ['label'] + [f'pixel{i}' for i in range(0,784)]
merged_df.columns = column_names


In [250]:
# Randomize the rows in the concatenated DataFrame
randomized_df = merged_df.sample(frac=1, random_state=22)

In [251]:
# Write DataFrame to CSV file with headers
randomized_df.to_csv('symbols.csv', index=False)

## Preprocessing

In [252]:
symbols_data = pd.read_csv('symbols.csv')
digits_data = pd.read_csv('train.csv')

In [253]:
# Concatenate the symbols and digits data
combined_data = np.concatenate((symbols_data, digits_data))

# Split the data into train and validation sets
train_data, val_data = train_test_split(combined_data, test_size=0.2, random_state=42)

# Extract the labels and pixel data
train_labels = train_data[:, 0]
train_pixels = train_data[:, 1:]

val_labels = val_data[:, 0]
val_pixels = val_data[:, 1:]

# Reshape pixel data into images (assuming they are 28x28 pixels)
train_images = train_pixels.reshape(-1, 28, 28, 1)
val_images = val_pixels.reshape(-1, 28, 28, 1)

In [254]:
# Create an ImageDataGenerator for data augmentation and normalization
datagen = ImageDataGenerator(rescale=1.0/255.0)

# Create the train generator
training_set = datagen.flow(
    train_images,
    train_labels,
    batch_size=32,
    shuffle=True
)

# Create the validation generator
validation_set = datagen.flow(
    val_images,
    val_labels,
    batch_size=32,
    shuffle=True
)


In [255]:
model=Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dense(14, activation='softmax'))

In [256]:
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

history = model.fit(x = training_set, validation_data = validation_set, epochs = 25)

Epoch 1/25


Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


In [257]:
model.save("digits_symbols_model.h5")