In [17]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
from sklearn.base import BaseEstimator, ClassifierMixin


In [18]:

data = pd.read_csv('Alphabets_data.csv')
data.head()


Unnamed: 0,letter,xbox,ybox,width,height,onpix,xbar,ybar,x2bar,y2bar,xybar,x2ybar,xy2bar,xedge,xedgey,yedge,yedgex
0,T,2,8,3,5,1,8,13,0,6,6,10,8,0,8,0,8
1,I,5,12,3,7,2,10,5,5,4,13,3,9,2,8,4,10
2,D,4,11,6,8,6,10,6,2,6,10,3,7,3,7,3,9
3,N,7,11,6,6,3,5,9,4,6,4,4,10,6,10,2,8
4,G,2,1,3,1,1,8,6,6,6,6,5,9,1,7,5,10


In [19]:

num_samples = data.shape[0]
num_features = data.shape[1] - 1  
classes = data['letter'].unique()
num_classes = len(classes)

# Check for missing values
missing_values = data.isnull().sum()

# Basic statistics for each feature
stats = data.describe()

(num_samples, num_features, num_classes, classes, missing_values, stats)


(20000,
 16,
 26,
 array(['T', 'I', 'D', 'N', 'G', 'S', 'B', 'A', 'J', 'M', 'X', 'O', 'R',
        'F', 'C', 'H', 'W', 'L', 'P', 'E', 'V', 'Y', 'Q', 'U', 'K', 'Z'],
       dtype=object),
 letter    0
 xbox      0
 ybox      0
 width     0
 height    0
 onpix     0
 xbar      0
 ybar      0
 x2bar     0
 y2bar     0
 xybar     0
 x2ybar    0
 xy2bar    0
 xedge     0
 xedgey    0
 yedge     0
 yedgex    0
 dtype: int64,
                xbox          ybox         width       height         onpix  \
 count  20000.000000  20000.000000  20000.000000  20000.00000  20000.000000   
 mean       4.023550      7.035500      5.121850      5.37245      3.505850   
 std        1.913212      3.304555      2.014573      2.26139      2.190458   
 min        0.000000      0.000000      0.000000      0.00000      0.000000   
 25%        3.000000      5.000000      4.000000      4.00000      2.000000   
 50%        4.000000      7.000000      5.000000      6.00000      3.000000   
 75%        5.000000    

 # Data Preprocessing

In [20]:


# Separate features and target variable
X = data.drop('letter', axis=1)
y = data['letter']

# Normalize the features
scaler = StandardScaler()
X_normalized = scaler.fit_transform(X)

# Encode the target variable
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Display the first few rows of the preprocessed data
(X_normalized[:5], y_encoded[:5])


(array([[-1.0576983 ,  0.29187713, -1.05327668, -0.16470367, -1.14401317,
          0.54413045,  2.36509711, -1.71435955,  0.34499439, -0.91707055,
          1.34777427,  0.03412531, -1.30594761, -0.21908163, -1.4381527 ,
          0.12291107],
        [ 0.51038497,  1.5023577 , -1.05327668,  0.71973007, -0.6874762 ,
          1.53130471, -1.07532563,  0.13756063, -0.4950723 ,  1.89596765,
         -1.31280664,  0.51476353, -0.4484921 , -0.21908163,  0.12008142,
          1.35944092],
        [-0.01230945,  1.19973756,  0.43590966,  1.16194694,  1.13867169,
          1.53130471, -0.64527279, -0.97359148,  0.34499439,  0.69037985,
         -1.31280664, -0.4465129 , -0.01976435, -0.8656262 , -0.26947711,
          0.74117599],
        [ 1.55577381,  1.19973756,  0.43590966,  0.2775132 , -0.23093923,
         -0.93663094,  0.64488574, -0.23282341,  0.34499439, -1.72079575,
         -0.93272365,  0.99540174,  1.26641891,  1.07400752, -0.65903564,
          0.12291107],
        [-1.0576983 

# Model Implementation

In [21]:


# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_normalized, y_encoded, test_size=0.2, random_state=42)

# Convert target variable to categorical
y_train_categorical = to_categorical(y_train, num_classes=26)
y_test_categorical = to_categorical(y_test, num_classes=26)

# Define a basic ANN model
model = Sequential()
model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(26, activation='softmax'))

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train_categorical, epochs=20, batch_size=32, validation_data=(X_test, y_test_categorical))

# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test, y_test_categorical)

(test_loss, test_accuracy)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.2884 - loss: 2.5339 - val_accuracy: 0.6900 - val_loss: 1.1192
Epoch 2/20
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7035 - loss: 1.0256 - val_accuracy: 0.7828 - val_loss: 0.7872
Epoch 3/20
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7774 - loss: 0.7656 - val_accuracy: 0.8148 - val_loss: 0.6523
Epoch 4/20
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8169 - loss: 0.6301 - val_accuracy: 0.8438 - val_loss: 0.5609
Epoch 5/20
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8404 - loss: 0.5455 - val_accuracy: 0.8537 - val_loss: 0.5093
Epoch 6/20
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8571 - loss: 0.4818 - val_accuracy: 0.8635 - val_loss: 0.4641
Epoch 7/20
[1m500/500[0m 

(0.23449911177158356, 0.9269999861717224)

In [22]:


# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_normalized, y_encoded, test_size=0.2, random_state=42)

# Convert target variable to categorical
y_train_categorical = to_categorical(y_train, num_classes=26)
y_test_categorical = to_categorical(y_test, num_classes=26)


In [23]:

# Define a basic ANN model
model = Sequential()
model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(26, activation='softmax'))


In [24]:
# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train_categorical, epochs=20, batch_size=32, validation_data=(X_test, y_test_categorical))


Epoch 1/20
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.3175 - loss: 2.5018 - val_accuracy: 0.7243 - val_loss: 1.0038
Epoch 2/20
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7365 - loss: 0.9324 - val_accuracy: 0.7878 - val_loss: 0.7359
Epoch 3/20
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7901 - loss: 0.7121 - val_accuracy: 0.8175 - val_loss: 0.6209
Epoch 4/20
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8210 - loss: 0.6043 - val_accuracy: 0.8403 - val_loss: 0.5397
Epoch 5/20
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8542 - loss: 0.5044 - val_accuracy: 0.8595 - val_loss: 0.4800
Epoch 6/20
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8675 - loss: 0.4561 - val_accuracy: 0.8763 - val_loss: 0.4361
Epoch 7/20
[1m500/500[0m 

In [25]:
# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test, y_test_categorical)
print(f'Test loss: {test_loss}, Test accuracy: {test_accuracy}')


[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9261 - loss: 0.2318
Test loss: 0.2263203263282776, Test accuracy: 0.9290000200271606
