In [6]:
# Task 1: Data Exploration and Preprocessing

# Import necessary libraries
import pandas as pd
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense
from keras.utils import to_categorical
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import warnings
warnings.filterwarnings('ignore')

# Load the dataset
data = pd.read_csv('Alphabets_data.csv')
data

Unnamed: 0,letter,xbox,ybox,width,height,onpix,xbar,ybar,x2bar,y2bar,xybar,x2ybar,xy2bar,xedge,xedgey,yedge,yedgex
0,T,2,8,3,5,1,8,13,0,6,6,10,8,0,8,0,8
1,I,5,12,3,7,2,10,5,5,4,13,3,9,2,8,4,10
2,D,4,11,6,8,6,10,6,2,6,10,3,7,3,7,3,9
3,N,7,11,6,6,3,5,9,4,6,4,4,10,6,10,2,8
4,G,2,1,3,1,1,8,6,6,6,6,5,9,1,7,5,10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19995,D,2,2,3,3,2,7,7,7,6,6,6,4,2,8,3,7
19996,C,7,10,8,8,4,4,8,6,9,12,9,13,2,9,3,7
19997,T,6,9,6,7,5,6,11,3,7,11,9,5,2,12,2,4
19998,S,2,3,4,2,1,8,7,2,6,10,6,8,1,9,5,8


In [5]:
print(data.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20000 entries, 0 to 19999
Data columns (total 17 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   letter  20000 non-null  object
 1   xbox    20000 non-null  int64 
 2   ybox    20000 non-null  int64 
 3   width   20000 non-null  int64 
 4   height  20000 non-null  int64 
 5   onpix   20000 non-null  int64 
 6   xbar    20000 non-null  int64 
 7   ybar    20000 non-null  int64 
 8   x2bar   20000 non-null  int64 
 9   y2bar   20000 non-null  int64 
 10  xybar   20000 non-null  int64 
 11  x2ybar  20000 non-null  int64 
 12  xy2bar  20000 non-null  int64 
 13  xedge   20000 non-null  int64 
 14  xedgey  20000 non-null  int64 
 15  yedge   20000 non-null  int64 
 16  yedgex  20000 non-null  int64 
dtypes: int64(16), object(1)
memory usage: 2.6+ MB
None


In [8]:
# Step 1.1: Encode the target 'letter' column
label_encoder = LabelEncoder()
data['letter'] = label_encoder.fit_transform(data['letter'])

# Step 1.2: Normalize the feature columns using MinMaxScaler
scaler = MinMaxScaler()
features = data.drop('letter', axis=1)
scaled_features = scaler.fit_transform(features)

# Prepare final feature matrix (X) and target labels (y)
X = scaled_features
y = data['letter']
X

array([[0.13333333, 0.53333333, 0.2       , ..., 0.53333333, 0.        ,
        0.53333333],
       [0.33333333, 0.8       , 0.2       , ..., 0.53333333, 0.26666667,
        0.66666667],
       [0.26666667, 0.73333333, 0.4       , ..., 0.46666667, 0.2       ,
        0.6       ],
       ...,
       [0.4       , 0.6       , 0.4       , ..., 0.8       , 0.13333333,
        0.26666667],
       [0.13333333, 0.2       , 0.26666667, ..., 0.6       , 0.33333333,
        0.53333333],
       [0.26666667, 0.6       , 0.4       , ..., 0.46666667, 0.13333333,
        0.53333333]])

In [9]:
# Task 2: Model Implementation

# Step 2.1: Split the dataset into training and test sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 2.2: One-hot encode the target labels for training the ANN model
y_train_cat = to_categorical(y_train)
y_test_cat = to_categorical(y_test)

In [42]:
# Step 2.3: Define a basic Artificial Neural Network (ANN) model
model = Sequential()
model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))  # Input layer + hidden layer
model.add(Dense(y_train_cat.shape[1], activation='softmax'))  # Output layer

# Step 2.4: Compile the model (using Adam optimizer and categorical cross-entropy loss)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Step 2.5: Train the model
history = model.fit(X_train, y_train_cat, epochs=10, batch_size=32, validation_split=0.2)

Epoch 1/10
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 16ms/step - accuracy: 0.0467 - loss: 0.7148 - val_accuracy: 0.0375 - val_loss: 0.5930
Epoch 2/10
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.0424 - loss: 0.5536 - val_accuracy: 0.0375 - val_loss: 0.4636
Epoch 3/10
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.0525 - loss: 0.4305 - val_accuracy: 0.0375 - val_loss: 0.3658
Epoch 4/10
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.0466 - loss: 0.3390 - val_accuracy: 0.0375 - val_loss: 0.2950
Epoch 5/10
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.0742 - loss: 0.2698 - val_accuracy: 0.0375 - val_loss: 0.2480
Epoch 6/10
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.0715 - loss: 0.2285 - val_accuracy: 0.0312 - val_loss: 0.2190
Epoch 7/10
[1m20/20[0m [32m━━━━━━━━━

In [13]:
!pip install scikeras

Collecting scikeras
  Downloading scikeras-0.13.0-py3-none-any.whl.metadata (3.1 kB)
Downloading scikeras-0.13.0-py3-none-any.whl (26 kB)
Installing collected packages: scikeras
Successfully installed scikeras-0.13.0


In [None]:
from sklearn.model_selection import GridSearchCV
from scikeras.wrappers import KerasClassifier

In [2]:
# 3. Hyperparameter Tuning

from sklearn.model_selection import GridSearchCV
from tensorflow.keras import Sequential # Import Sequential for defining model architecture
from tensorflow.keras.layers import Dense # Import Dense layer for fully connected layers
from scikeras.wrappers import KerasClassifier  # Correct import for KerasClassifier

# Sample data (replace this with your actual data loading)
from sklearn.datasets import make_classification
X, y = make_classification(n_samples=1000, n_features=20, random_state=42)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

def create_model(activation='relu', neurons=32):
    model = Sequential()
    model.add(Dense(neurons, input_dim=X_train.shape[1], activation=activation))
    model.add(Dense(neurons, activation=activation))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

model = KerasClassifier(build_fn=create_model, verbose=0)
param_grid = {
    'model__activation': ['relu', 'tanh'],  # Pass 'activation' as an argument to create_model
    'model__neurons': [32, 64, 128],  # Pass 'neurons' as an argument to create_model
    'batch_size': [10, 20],
    'epochs': [50, 100]
}
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X_train, y_train)

  X, y = self._initialize(X, y)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [8]:
# 4. Evaluation

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Predictions
# Use the fitted grid search object to make predictions
y_pred = grid_result.predict(X_test)
y_pred_classes = [1 if prob > 0.5 else 0 for prob in y_pred]

# Evaluation metrics
accuracy = accuracy_score(y_test, y_pred_classes)
precision = precision_score(y_test, y_pred_classes)
recall = recall_score(y_test, y_pred_classes)
f1 = f1_score(y_test, y_pred_classes)

print(f'Accuracy: {accuracy}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1-score: {f1}')

Accuracy: 0.81
Precision: 0.8349514563106796
Recall: 0.8037383177570093
F1-score: 0.819047619047619
