In [20]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import accuracy_score, classification_report
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV

In [3]:
# Load dataset
file_path = "Alphabets_data.csv"
df = pd.read_csv(file_path)

In [6]:
# Display basic info
print(df.info())
print('*'*100)
print(df.head())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20000 entries, 0 to 19999
Data columns (total 17 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   letter  20000 non-null  object
 1   xbox    20000 non-null  int64 
 2   ybox    20000 non-null  int64 
 3   width   20000 non-null  int64 
 4   height  20000 non-null  int64 
 5   onpix   20000 non-null  int64 
 6   xbar    20000 non-null  int64 
 7   ybar    20000 non-null  int64 
 8   x2bar   20000 non-null  int64 
 9   y2bar   20000 non-null  int64 
 10  xybar   20000 non-null  int64 
 11  x2ybar  20000 non-null  int64 
 12  xy2bar  20000 non-null  int64 
 13  xedge   20000 non-null  int64 
 14  xedgey  20000 non-null  int64 
 15  yedge   20000 non-null  int64 
 16  yedgex  20000 non-null  int64 
dtypes: int64(16), object(1)
memory usage: 2.6+ MB
None
****************************************************************************************************
  letter  xbox  ybox  width  height  onpix  xbar  

In [7]:
# Check for missing values
print(df.isnull().sum())

letter    0
xbox      0
ybox      0
width     0
height    0
onpix     0
xbar      0
ybar      0
x2bar     0
y2bar     0
xybar     0
x2ybar    0
xy2bar    0
xedge     0
xedgey    0
yedge     0
yedgex    0
dtype: int64


In [8]:
# Handle missing values (if any)
df.dropna(inplace=True)

In [9]:
# Separate target (first column) and features (remaining columns)
y = df.iloc[:, 0]  # "letter" column as target
X = df.iloc[:, 1:] # Remaining columns as features

In [10]:
# Encode categorical labels (letters)
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)  # Convert letters to numerical values

In [11]:
# Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [15]:
# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_encoded, test_size=0.2, random_state=42)

In [16]:
# Define ANN model
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),  # Input layer
    Dense(32, activation='relu'),  # Hidden layer
    Dense(len(np.unique(y_encoded)), activation='softmax')  # Output layer (multi-class classification)
])

In [17]:
# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [18]:
# Train the model
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.2, verbose=1)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [19]:
# Evaluate the model
y_pred = np.argmax(model.predict(X_test), axis=1)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Accuracy: 0.9245
              precision    recall  f1-score   support

           0       0.96      0.97      0.97       149
           1       0.82      0.94      0.88       153
           2       0.97      0.88      0.92       137
           3       0.86      0.95      0.90       156
           4       0.85      0.97      0.90       141
           5       0.87      0.93      0.90       140
           6       0.93      0.89      0.91       160
           7       0.88      0.76      0.82       144
           8       0.92      0.95      0.93       146
           9       0.99      0.91      0.95       149
          10       0.80      0.92      0.86       130
          11       0.99      0.92      0.95       155
          12       0.98      0.93      0.96       168
          13       0.94      0.96      0.95       151
          14       0.94      0.92      0.93       145
          15       0.96      0.83      0.89       173
          16       0.96      0.93      0.95       166
          

In [21]:
# Function to create ANN model for hyperparameter tuning
def create_model(learning_rate=0.001, neurons=64):
    model = Sequential([
        Dense(neurons, activation='relu', input_shape=(X_train.shape[1],)),
        Dense(neurons//2, activation='relu'),
        Dense(len(np.unique(y_encoded)), activation='softmax')
    ])
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

In [23]:
# Wrap model for scikit-learn compatibility
model_wrapper = KerasClassifier(build_fn=create_model, epochs=20, batch_size=32, verbose=0)

  


In [24]:
# Define hyperparameter grid
param_grid = {
    'learning_rate': [0.01, 0.001, 0.0001],
    'neurons': [32, 64, 128]
}

# Perform grid search
grid = GridSearchCV(estimator=model_wrapper, param_grid=param_grid, scoring='accuracy', cv=3)
grid_result = grid.fit(X_train, y_train)

# Display best hyperparameters
print("Best parameters:", grid_result.best_params_)

Best parameters: {'learning_rate': 0.001, 'neurons': 128}


In [25]:
# Train the best model
best_model = create_model(learning_rate=grid_result.best_params_['learning_rate'], neurons=grid_result.best_params_['neurons'])
best_model.fit(X_train, y_train, epochs=20, batch_size=32, verbose=1)

# Evaluate the final model
y_pred_tuned = np.argmax(best_model.predict(X_test), axis=1)
print("Tuned Model Accuracy:", accuracy_score(y_test, y_pred_tuned))
print(classification_report(y_test, y_pred_tuned))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Tuned Model Accuracy: 0.94525
              precision    recall  f1-score   support

           0       0.94      0.99      0.97       149
           1       0.88      0.93      0.91       153
           2       0.97      0.91      0.94       137
           3       0.93      0.95      0.94       156
           4       0.94      0.94      0.94       141
           5       0.88      0.96      0.92       140
           6       0.88      0.96      0.92       160
           7       0.96      0.81      0.88       144
           8       0.99      0.92      0.95       146
           9       0.95      0.95      0.95       149
          10       0.88      0.92      0.90       130
          11       0.97      0.98      0.97       155
          12       1.00      0.93   