In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report
import warnings
warnings.filterwarnings('ignore')

In [2]:
df= pd.read_csv('Alphabets_data.csv')
df.head()

Unnamed: 0,letter,xbox,ybox,width,height,onpix,xbar,ybar,x2bar,y2bar,xybar,x2ybar,xy2bar,xedge,xedgey,yedge,yedgex
0,T,2,8,3,5,1,8,13,0,6,6,10,8,0,8,0,8
1,I,5,12,3,7,2,10,5,5,4,13,3,9,2,8,4,10
2,D,4,11,6,8,6,10,6,2,6,10,3,7,3,7,3,9
3,N,7,11,6,6,3,5,9,4,6,4,4,10,6,10,2,8
4,G,2,1,3,1,1,8,6,6,6,6,5,9,1,7,5,10


## Exploratory Data Analysis

In [4]:
df.shape

(20000, 17)

In [5]:
df.isnull().sum()   # no missing values

letter    0
xbox      0
ybox      0
width     0
height    0
onpix     0
xbar      0
ybar      0
x2bar     0
y2bar     0
xybar     0
x2ybar    0
xy2bar    0
xedge     0
xedgey    0
yedge     0
yedgex    0
dtype: int64

In [6]:
df.duplicated().sum()

1332

In [7]:
df.drop_duplicates(inplace=True)  # no duplicates
df.duplicated().sum()

0

In [8]:
df.dtypes

letter    object
xbox       int64
ybox       int64
width      int64
height     int64
onpix      int64
xbar       int64
ybar       int64
x2bar      int64
y2bar      int64
xybar      int64
x2ybar     int64
xy2bar     int64
xedge      int64
xedgey     int64
yedge      int64
yedgex     int64
dtype: object

In [9]:
x= df.drop(columns=['letter'])  # features
y= df['letter']   # target

In [10]:
le= LabelEncoder()
y= le.fit_transform(y)   # Ecoding Target labels

In [11]:
features= df.drop('letter',axis=1)  # features
target= df['letter']   # target

In [12]:
x_train,x_test,y_train,y_test= train_test_split(x,y,test_size=0.2,random_state=42)

In [13]:
print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)

(14934, 16)
(3734, 16)
(14934,)
(3734,)


## ANN Model Implementation

In [15]:
pip install scikeras

Note: you may need to restart the kernel to use updated packages.


In [16]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential

In [17]:
model= Sequential([Dense(32,activation='relu',input_shape=(16,)),  # first hidden layer
                   Dense(16,activation='relu'),                    # second hidden layer                   
                   Dense(len(le.classes_),activation='softmax')])   # output layer

In [18]:
model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])

In [55]:
# Training model
history= model.fit(x_train,y_train,epochs=20,batch_size=32,validation_data=(x_test,y_test),verbose=1)

Epoch 1/20
[1m467/467[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.7981 - loss: 0.6719 - val_accuracy: 0.7954 - val_loss: 0.7137
Epoch 2/20
[1m467/467[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8068 - loss: 0.6578 - val_accuracy: 0.8005 - val_loss: 0.6924
Epoch 3/20
[1m467/467[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8048 - loss: 0.6480 - val_accuracy: 0.7946 - val_loss: 0.7083
Epoch 4/20
[1m467/467[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8025 - loss: 0.6579 - val_accuracy: 0.7906 - val_loss: 0.7016
Epoch 5/20
[1m467/467[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8058 - loss: 0.6426 - val_accuracy: 0.7994 - val_loss: 0.6882
Epoch 6/20
[1m467/467[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8055 - loss: 0.6403 - val_accuracy: 0.7970 - val_loss: 0.7008
Epoch 7/20
[1m467/467[0m 

## Hyperparameter Tuning

In [20]:
pip install keras_tuner

Note: you may need to restart the kernel to use updated packages.


In [21]:
import keras_tuner as kt

In [22]:
# defining function for hyperparameter tuning
def build_model(hp):
    model = Sequential()
    model.add(Dense(hp.Int('units_input', min_value=16, max_value=128, step=16), activation='relu', input_shape=(16,)))
    
    for i in range(hp.Int('num_layers', 1, 3)):
        model.add(Dense(hp.Int(f'units_{i}', min_value=16, max_value=128, step=16), activation=hp.Choice(f'activation_{i}', ['relu', 'tanh'])))
    
    model.add(Dense(len(le.classes_), activation='softmax'))
    
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=hp.Choice('learning_rate', [0.001, 0.0001])),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    return model

In [23]:
# using KerasTuner for hyperparameter search
tuner= kt.RandomSearch(build_model,objective='val_accuracy',max_trials=10,directory='kt_tuning',
                       project_name='alphabet_recognition')

Reloading Tuner from kt_tuning\alphabet_recognition\tuner0.json


In [24]:
tuner.search(x_train,y_train,epochs=10,validation_data=(x_test,y_test)) #perform the search

In [25]:
# retrieving the best model
best_hps= tuner.get_best_hyperparameters(num_trials=1)[0]
print('Best Hyperparameter:',best_hps.values)

Best Hyperparameter: {'units_input': 112, 'num_layers': 2, 'units_0': 96, 'activation_0': 'relu', 'learning_rate': 0.001, 'units_1': 48, 'activation_1': 'tanh', 'units_2': 96, 'activation_2': 'relu'}


In [26]:
# Train the best model
best_model= tuner.hypermodel.build(best_hps)
best_model.fit(x_train,y_train,epochs=20,validation_data= (x_test,y_test))

Epoch 1/20
[1m467/467[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.3723 - loss: 2.3612 - val_accuracy: 0.7076 - val_loss: 1.0867
Epoch 2/20
[1m467/467[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7299 - loss: 0.9960 - val_accuracy: 0.7517 - val_loss: 0.8387
Epoch 3/20
[1m467/467[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7978 - loss: 0.7188 - val_accuracy: 0.8278 - val_loss: 0.6326
Epoch 4/20
[1m467/467[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8401 - loss: 0.5629 - val_accuracy: 0.8396 - val_loss: 0.5399
Epoch 5/20
[1m467/467[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8620 - loss: 0.4839 - val_accuracy: 0.8618 - val_loss: 0.4658
Epoch 6/20
[1m467/467[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8816 - loss: 0.4086 - val_accuracy: 0.8629 - val_loss: 0.4333
Epoch 7/20
[1m467/467[0m 

<keras.src.callbacks.history.History at 0x20f70b84e90>

In [27]:
# Evaluate best model
test_loss,test_acc= best_model.evaluate(x_test,y_test)
print('Test Accuracy:',test_acc)

[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9379 - loss: 0.2041
Test Accuracy: 0.9359935522079468
