In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Load the dataset
df = pd.read_csv('/content/Alphabets_data.csv')

# Display key features of the dataset
print(f"Number of samples: {df.shape[0]}")
print(f"Number of features: {df.shape[1] - 1}")
print(f"Classes: {df['letter'].unique()}")



Number of samples: 20000
Number of features: 16
Classes: ['T' 'I' 'D' 'N' 'G' 'S' 'B' 'A' 'J' 'M' 'X' 'O' 'R' 'F' 'C' 'H' 'W' 'L'
 'P' 'E' 'V' 'Y' 'Q' 'U' 'K' 'Z']


In [3]:
# Preprocessing
# Convert class labels to numeric
label_encoder = LabelEncoder()
df['letter'] = label_encoder.fit_transform(df['letter'])

# Split the data into features and target
X = df.drop('letter', axis=1)
y = df['letter']

# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)



In [5]:
# Function to create the ANN model
def create_model(layers=[64, 32], activation='relu', learning_rate=0.001):
    model = Sequential()
    model.add(Dense(layers[0], input_dim=X_train.shape[1], activation=activation))
    for layer in layers[1:]:
        model.add(Dense(layer, activation=activation))
    model.add(Dense(len(np.unique(y)), activation='softmax'))
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    return model

# Manual Hyperparameter Tuning
param_grid = {
    'layers': [[64, 32], [128, 64], [128, 64, 32]],
    'activation': ['relu', 'tanh'],
    'learning_rate': [0.001, 0.01]
}

best_params = None
best_score = 0
for layers in param_grid['layers']:
    for activation in param_grid['activation']:
        for learning_rate in param_grid['learning_rate']:
            model = create_model(layers=layers, activation=activation, learning_rate=learning_rate)
            model.fit(X_train, y_train, epochs=10, batch_size=8, verbose=0)
            y_pred = np.argmax(model.predict(X_test), axis=1)
            accuracy = accuracy_score(y_test, y_pred)
            if accuracy > best_score:
                best_score = accuracy
                best_params = {'layers': layers, 'activation': activation, 'learning_rate': learning_rate}

print(f"Best parameters: {best_params}")

# Train the final model with the best parameters
model = create_model(layers=best_params['layers'], activation=best_params['activation'], learning_rate=best_params['learning_rate'])
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test), verbose=1)




Best parameters: {'layers': [128, 64], 'activation': 'relu', 'learning_rate': 0.001}
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [6]:
# Step 4: Evaluation
# Make predictions with the best model
y_pred = np.argmax(model.predict(X_test), axis=1)

# Evaluate the model's performance
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")


Accuracy: 0.965
Precision: 0.9655570427215572
Recall: 0.965
F1 Score: 0.9650036348756832


The model achieves high accuracy (96.5%) in correctly predicting alphabet categories from the test data, indicating robust performance across precision, recall, and F1 score metrics.