In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [2]:
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/letter-recognition/letter-recognition.data"
column_names = ["letter", "x-box", "y-box", "width", "height", "onpix","x-bar", "y-bar", "x2bar", "y2bar", "xybar", "x2ybr", "xy2br", "x-ege","xegvy", "y-ege", "yegvx"]
data = pd.read_csv(url, names=column_names)

In [3]:
X = data.drop('letter', axis=1)
y = data['letter']

In [4]:
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,random_state=42)

In [7]:
model = Sequential([
Dense(128, activation='relu', input_shape=(16,)),
Dense(64, activation='relu'),
Dense(26, activation='softmax') # 26 classes for letters A-Z
])

In [8]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy',metrics=['accuracy'])

In [9]:
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fed3032cda0>

In [10]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {accuracy * 100:.2f}%')

Test Accuracy: 89.32%


In [11]:
model.save('ocr_model.keras')

In [12]:
from tensorflow.keras.models import load_model
# Load the model
loaded_model = load_model('ocr_model.keras')

In [13]:
new_data = np.array([[2, 3, 5, 4, 7, 8, 3, 4, 5, 6, 7, 8, 3, 4, 5, 6]])
# Predict the class probabilities
predicted_probabilities = loaded_model.predict(new_data)
# Get the index of the class with the highest probability
predicted_class_index = np.argmax(predicted_probabilities)
# Convert the predicted class index back to original letter
predicted_letter = label_encoder.inverse_transform([predicted_class_index])
print(f"The predicted letter is: {predicted_letter[0]}")

The predicted letter is: Q
