In [1]:
# Step 1: Import Libraries and Load Data


# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import accuracy_score

# Load the dataset
df = pd.read_csv('Alphabets_data.csv')



In [2]:
# Step 2: Explore and Preprocess Data


# Explore the data
print(df.head())
print(df.info())
print(df.describe())

# Preprocess the data
X = df.drop('letter', axis=1)
y = df['letter']


  letter  xbox  ybox  width  height  onpix  xbar  ybar  x2bar  y2bar  xybar  \
0      T     2     8      3       5      1     8    13      0      6      6   
1      I     5    12      3       7      2    10     5      5      4     13   
2      D     4    11      6       8      6    10     6      2      6     10   
3      N     7    11      6       6      3     5     9      4      6      4   
4      G     2     1      3       1      1     8     6      6      6      6   

   x2ybar  xy2bar  xedge  xedgey  yedge  yedgex  
0      10       8      0       8      0       8  
1       3       9      2       8      4      10  
2       3       7      3       7      3       9  
3       4      10      6      10      2       8  
4       5       9      1       7      5      10  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20000 entries, 0 to 19999
Data columns (total 17 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   letter  20000 non-null  object
 1   xbo

In [5]:
# step 3: Convert String Labels to Integer Labels

# Convert string labels to integer labels
unique_labels = np.unique(y)
label_to_int = {label: i for i, label in enumerate(unique_labels)}
y_int = np.array([label_to_int[label] for label in y])

print("Unique labels:", unique_labels)
print("Label to integer mapping:", label_to_int)
print("Integer labels:", y_int)

# One-hot encode the target variable
y_encoded = to_categorical(y_int)

print("One-hot encoded labels:", y_encoded)


Unique labels: ['A' 'B' 'C' 'D' 'E' 'F' 'G' 'H' 'I' 'J' 'K' 'L' 'M' 'N' 'O' 'P' 'Q' 'R'
 'S' 'T' 'U' 'V' 'W' 'X' 'Y' 'Z']
Label to integer mapping: {'A': 0, 'B': 1, 'C': 2, 'D': 3, 'E': 4, 'F': 5, 'G': 6, 'H': 7, 'I': 8, 'J': 9, 'K': 10, 'L': 11, 'M': 12, 'N': 13, 'O': 14, 'P': 15, 'Q': 16, 'R': 17, 'S': 18, 'T': 19, 'U': 20, 'V': 21, 'W': 22, 'X': 23, 'Y': 24, 'Z': 25}
Integer labels: [19  8  3 ... 19 18  0]
One-hot encoded labels: [[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]]


In [9]:
#step 4: Split Data into Training and Testing Sets


# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)
# Print the shapes of the training and testing sets
print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)


X_train shape: (16000, 16)
X_test shape: (4000, 16)
y_train shape: (16000, 26)
y_test shape: (4000, 26)


In [14]:
# Step 5: Scale Data using StandardScaler


# Scale the data using StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)                                
# Print the shapes of the scaled training and testing sets
print("X_train_scaled shape:", X_train_scaled.shape)
print("X_test_scaled shape:", X_test_scaled.shape)

# Print the first few rows of the scaled training and testing sets
print("X_train_scaled:\n", X_train_scaled[:5])
print("X_test_scaled:\n", X_test_scaled[:5])


X_train_scaled shape: (16000, 16)
X_test_scaled shape: (4000, 16)
X_train_scaled:
 [[-0.01291349 -0.01275171 -0.06013429 -0.16638781  0.22545989 -0.44203264
  -0.21256131 -0.60314772  0.76100746  1.08879289  0.59047067  0.51394072
  -0.01948363 -0.21675086  0.12044468  0.12215017]
 [-0.01291349 -0.01275171  0.43581861 -0.16638781  0.68208751 -0.44203264
   0.21739836 -0.60314772  0.76100746  1.08879289  0.59047067  0.51394072
  -0.01948363 -0.21675086  0.12044468 -0.49633169]
 [-0.53599158 -0.618174   -0.55608718 -1.04893543 -0.23116773  0.05041425
   0.21739836  0.13461945 -0.07532191 -0.51676332  0.2096754  -0.92625745
   0.83811784  0.43123377 -0.65897489 -1.11481354]
 [-0.01291349  0.89538173  0.43581861  0.71615981  1.13871514 -0.93447953
  -0.21256131  0.13461945  0.76100746 -0.91815237 -0.17111987  1.95413889
  -0.01948363 -0.21675086  0.89986424  0.74063202]
 [-0.01291349  0.89538173  0.43581861  1.15743362  0.22545989  0.54286114
   1.50727738 -0.9720313  -0.91165129 -1.720930

In [16]:
# Step 6: Define ANN Model Architecture


# Define the ANN model architecture
model = Sequential()
model.add(Input(shape=(X_train_scaled.shape[1],)))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(y_train.shape[1], activation='softmax'))

# Print the model summary
print("Model Summary:")
model.summary()



Model Summary:


In [18]:
# Step 7: Compile ANN Model


# Compile the ANN model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# Print a message to confirm that the model has been compiled
print("Model compiled successfully!")



Model compiled successfully!


In [19]:
# Step 8: Train ANN Model


# Train the ANN model
model.fit(X_train_scaled, y_train, epochs=10, batch_size=128, validation_data=(X_test_scaled, y_test))



Epoch 1/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.1593 - loss: 2.9647 - val_accuracy: 0.5238 - val_loss: 1.8626
Epoch 2/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5733 - loss: 1.6159 - val_accuracy: 0.6852 - val_loss: 1.1389
Epoch 3/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6992 - loss: 1.0827 - val_accuracy: 0.7460 - val_loss: 0.9165
Epoch 4/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7420 - loss: 0.8942 - val_accuracy: 0.7755 - val_loss: 0.7956
Epoch 5/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7742 - loss: 0.7866 - val_accuracy: 0.7937 - val_loss: 0.7187
Epoch 6/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8011 - loss: 0.7071 - val_accuracy: 0.8145 - val_loss: 0.6487
Epoch 7/10
[1m125/125[0m 

<keras.src.callbacks.history.History at 0x24176ffe570>

In [20]:
# Step 9: Evaluate ANN Model


# Make predictions
predictions = model.predict(X_test_scaled)

# Evaluate the ANN model
predicted_classes = np.argmax(predictions, axis=1)
actual_classes = np.argmax(y_test, axis=1)
accuracy = accuracy_score(actual_classes, predicted_classes)
print(f'Test accuracy: {accuracy:.2f}')


[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Test accuracy: 0.87
