In [8]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [9]:
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/letter-recognition/letter-recognition.data"

In [10]:
# Define column names based on dataset description
names = ['letter', 'x-box', 'y-box', 'width', 'height', 'onpix', 'x-bar', 'y-bar', 'x2bar', 'y2bar', 'xybar', 'x2ybr', 'xy2br', 'x-ege', 'xegvy', 'y-ege', 'yegvx']


In [11]:
data = pd.read_csv(url,names=names)

In [12]:
#seaparte features and target
X = data.drop('letter',axis=1)
y= data['letter']

In [13]:
X

Unnamed: 0,x-box,y-box,width,height,onpix,x-bar,y-bar,x2bar,y2bar,xybar,x2ybr,xy2br,x-ege,xegvy,y-ege,yegvx
0,2,8,3,5,1,8,13,0,6,6,10,8,0,8,0,8
1,5,12,3,7,2,10,5,5,4,13,3,9,2,8,4,10
2,4,11,6,8,6,10,6,2,6,10,3,7,3,7,3,9
3,7,11,6,6,3,5,9,4,6,4,4,10,6,10,2,8
4,2,1,3,1,1,8,6,6,6,6,5,9,1,7,5,10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19995,2,2,3,3,2,7,7,7,6,6,6,4,2,8,3,7
19996,7,10,8,8,4,4,8,6,9,12,9,13,2,9,3,7
19997,6,9,6,7,5,6,11,3,7,11,9,5,2,12,2,4
19998,2,3,4,2,1,8,7,2,6,10,6,8,1,9,5,8


In [28]:
y

0        T
1        I
2        D
3        N
4        G
        ..
19995    D
19996    C
19997    T
19998    S
19999    A
Name: letter, Length: 20000, dtype: object

In [14]:
#Encode target labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

In [15]:
X_train,X_test,y_train,y_test = train_test_split(X,y_encoded,test_size=0.2,random_state=42)

In [16]:
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense

In [17]:
#DEfine model
model = Sequential([
    # First hidden layer with 64 neurons and ReLU activation function
    Dense(64,activation='relu',input_shape=(X_train.shape[1],)),
    # Second hidden layer with 64 neurons and ReLU activation function
    Dense(64,activation='relu'),
    # Output layer with neurons equal to the number of classes (26 for letters
    # A-Z) and softmax activation function
    Dense(len(label_encoder.classes_), activation='softmax')
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [18]:
#Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [19]:
#Train the model
model.fit(X_train,y_train, epochs=20, batch_size=32, validation_split=0.1)

Epoch 1/20
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.3008 - loss: 2.5269 - val_accuracy: 0.7000 - val_loss: 1.1281
Epoch 2/20
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 985us/step - accuracy: 0.7047 - loss: 1.0456 - val_accuracy: 0.7331 - val_loss: 0.9115
Epoch 3/20
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7625 - loss: 0.8497 - val_accuracy: 0.7725 - val_loss: 0.7815
Epoch 4/20
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 998us/step - accuracy: 0.7967 - loss: 0.7213 - val_accuracy: 0.8050 - val_loss: 0.7015
Epoch 5/20
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8132 - loss: 0.6466 - val_accuracy: 0.8238 - val_loss: 0.6244
Epoch 6/20
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8342 - loss: 0.5663 - val_accuracy: 0.8281 - val_loss: 0.5892
Epoch 7/20
[1m450/450[

<keras.src.callbacks.history.History at 0x1741e397f10>

In [20]:
#save the trained model
model.save("letter_recognition_model.h5")



In [21]:
#Evaluate the model
test_loss, test_acc = model.evaluate(X_test, y_test)
print('Test Accuracy: ',test_acc)

[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 854us/step - accuracy: 0.9157 - loss: 0.2686
Test Accuracy:  0.9154999852180481


In [22]:
#save the label encoder
np.save("label_encoder_classes.npy",label_encoder.classes_)

In [23]:
from tensorflow.keras.models import load_model

In [24]:
# Function to preprocess input dimensions
def preprocess_input(dimensions):
  # Convert dimensions to a numpy array and reshape it to match the input shape
  # of the model
  input_data=np.array(dimensions).reshape(1,-1)
  return input_data

In [25]:
# Function to predict the output using the trained model
def predict_output(model,input_data,label_encoder):
   # Use the model to predict the output
  predicted_class=np.argmax(model.predict(input_data),axis=-1)
  # Convert the predicted class index back to the original letter
  predicted_letter = label_encoder.inverse_transform(predicted_class)
  return predicted_letter[0]

In [26]:
# Load the trained model and label encoder
model=load_model("letter_recognition_model.h5")
label_encoder = LabelEncoder()
label_encoder.classes_ = np.load("label_encoder_classes.npy",allow_pickle=True)



In [34]:
# Input dimensions dynamically from the user
dimension_str = input("Enter the dimensions separated by commas: ")
# Convert the input string to a list of integers
dimensions = list(map(int, dimension_str.split(',')))

Enter the dimensions separated by commas:  5,11,6,8,2,3,7,8,2,7,6,11,4,8,2,11


In [35]:
# Preprocess the input dimensions
input_data = preprocess_input(dimensions)

In [36]:
# Predict the output
predicted_letter = predict_output(model, input_data,label_encoder)
print("Predicted Letter: ",predicted_letter)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
Predicted Letter:  K
