In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.utils import to_categorical


In [2]:
filename = "letter-recognition.data"
column_names = ['letter'] + [f'feature_{i}' for i in range(1, 17)]
df = pd.read_csv(filename, names=column_names)
df.head()

Unnamed: 0,letter,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,feature_10,feature_11,feature_12,feature_13,feature_14,feature_15,feature_16
0,T,2,8,3,5,1,8,13,0,6,6,10,8,0,8,0,8
1,I,5,12,3,7,2,10,5,5,4,13,3,9,2,8,4,10
2,D,4,11,6,8,6,10,6,2,6,10,3,7,3,7,3,9
3,N,7,11,6,6,3,5,9,4,6,4,4,10,6,10,2,8
4,G,2,1,3,1,1,8,6,6,6,6,5,9,1,7,5,10


In [3]:
X = df.iloc[:, 1:].values
y = df['letter'].values

# Encode the labels (A–Z → 0–25)
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# One-hot encoding for categorical output
y_categorical = to_categorical(y_encoded)

# Feature scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [4]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_categorical, test_size=0.2, random_state=42)

In [5]:
model = Sequential([
    Dense(64, activation='relu', input_shape=(16,)),
    Dropout(0.2),
    Dense(64, activation='relu'),
    Dropout(0.2),
    Dense(26, activation='softmax')  # 26 classes (A-Z)
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [6]:
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.1)

Epoch 1/20
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.2584 - loss: 2.5958 - val_accuracy: 0.6712 - val_loss: 1.1263
Epoch 2/20
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.6167 - loss: 1.2611 - val_accuracy: 0.7575 - val_loss: 0.8477
Epoch 3/20
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.6640 - loss: 1.0612 - val_accuracy: 0.7850 - val_loss: 0.7263
Epoch 4/20
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7104 - loss: 0.9255 - val_accuracy: 0.8069 - val_loss: 0.6471
Epoch 5/20
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7327 - loss: 0.8458 - val_accuracy: 0.8300 - val_loss: 0.5859
Epoch 6/20
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7512 - loss: 0.7910 - val_accuracy: 0.8469 - val_loss: 0.5325
Epoch 7/20
[1m450/450[0m 

In [7]:
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_accuracy:.4f}")

[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9086 - loss: 0.3077
Test Accuracy: 0.9060


In [8]:
# Let's take the first 5 samples from the test set
sample_X = X_test[:5]
sample_y = y_test[:5]

# Predict probabilities and convert to class labels
predictions = model.predict(sample_X)
predicted_classes = np.argmax(predictions, axis=1)
actual_classes = np.argmax(sample_y, axis=1)

# Convert numerical class indices back to letters
predicted_letters = le.inverse_transform(predicted_classes)
actual_letters = le.inverse_transform(actual_classes)

# Show results
for i in range(5):
    print(f"Sample {i+1}: Predicted = {predicted_letters[i]}, Actual = {actual_letters[i]}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 165ms/step
Sample 1: Predicted = X, Actual = T
Sample 2: Predicted = L, Actual = L
Sample 3: Predicted = A, Actual = A
Sample 4: Predicted = E, Actual = E
Sample 5: Predicted = Q, Actual = Q
