# Assignment 18: Neural Network

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelBinarizer
from sklearn.metrics import accuracy_score, classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from scikeras.wrappers import KerasClassifier
from tensorflow.keras.optimizers import Adam
import warnings
warnings.filterwarnings('ignore') # Suppress KerasClassifier deprecation warnings

##   1. Data Exploration and Preprocessing

In [2]:
df=pd.read_csv("Alphabets_data.csv")

In [3]:
df.head()

Unnamed: 0,letter,xbox,ybox,width,height,onpix,xbar,ybar,x2bar,y2bar,xybar,x2ybar,xy2bar,xedge,xedgey,yedge,yedgex
0,T,2,8,3,5,1,8,13,0,6,6,10,8,0,8,0,8
1,I,5,12,3,7,2,10,5,5,4,13,3,9,2,8,4,10
2,D,4,11,6,8,6,10,6,2,6,10,3,7,3,7,3,9
3,N,7,11,6,6,3,5,9,4,6,4,4,10,6,10,2,8
4,G,2,1,3,1,1,8,6,6,6,6,5,9,1,7,5,10


In [4]:
df.shape

(20000, 17)

In [5]:
print(df.columns.tolist())

['letter', 'xbox', 'ybox', 'width', 'height', 'onpix', 'xbar', 'ybar', 'x2bar', 'y2bar', 'xybar', 'x2ybar', 'xy2bar', 'xedge', 'xedgey', 'yedge', 'yedgex']


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20000 entries, 0 to 19999
Data columns (total 17 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   letter  20000 non-null  object
 1   xbox    20000 non-null  int64 
 2   ybox    20000 non-null  int64 
 3   width   20000 non-null  int64 
 4   height  20000 non-null  int64 
 5   onpix   20000 non-null  int64 
 6   xbar    20000 non-null  int64 
 7   ybar    20000 non-null  int64 
 8   x2bar   20000 non-null  int64 
 9   y2bar   20000 non-null  int64 
 10  xybar   20000 non-null  int64 
 11  x2ybar  20000 non-null  int64 
 12  xy2bar  20000 non-null  int64 
 13  xedge   20000 non-null  int64 
 14  xedgey  20000 non-null  int64 
 15  yedge   20000 non-null  int64 
 16  yedgex  20000 non-null  int64 
dtypes: int64(16), object(1)
memory usage: 2.6+ MB


In [7]:
df.describe()

Unnamed: 0,xbox,ybox,width,height,onpix,xbar,ybar,x2bar,y2bar,xybar,x2ybar,xy2bar,xedge,xedgey,yedge,yedgex
count,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0
mean,4.02355,7.0355,5.12185,5.37245,3.50585,6.8976,7.50045,4.6286,5.17865,8.28205,6.454,7.929,3.0461,8.33885,3.69175,7.8012
std,1.913212,3.304555,2.014573,2.26139,2.190458,2.026035,2.325354,2.699968,2.380823,2.488475,2.63107,2.080619,2.332541,1.546722,2.567073,1.61747
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,3.0,5.0,4.0,4.0,2.0,6.0,6.0,3.0,4.0,7.0,5.0,7.0,1.0,8.0,2.0,7.0
50%,4.0,7.0,5.0,6.0,3.0,7.0,7.0,4.0,5.0,8.0,6.0,8.0,3.0,8.0,3.0,8.0
75%,5.0,9.0,6.0,7.0,5.0,8.0,9.0,6.0,7.0,10.0,8.0,9.0,4.0,9.0,5.0,9.0
max,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0


In [8]:
df.isnull().sum()

letter    0
xbox      0
ybox      0
width     0
height    0
onpix     0
xbar      0
ybar      0
x2bar     0
y2bar     0
xybar     0
x2ybar    0
xy2bar    0
xedge     0
xedgey    0
yedge     0
yedgex    0
dtype: int64

## 1. Data Exploration & Preprocessing

- **Dataset:** `Alphabets_data.csv`  
- **Samples:** 20,000 rows (handwritten alphabet examples)  
- **Features:** 16 numerical features describing each alphabet  
- **Target:** `letter` column (A–Z, 26 classes)  

**Preprocessing steps applied:**
- Checked dataset shape and missing values.  
- Converted categorical target (`letter`) into **one-hot encoded** vectors using `LabelBinarizer`.  
- Normalized all features using **StandardScaler**.  
- Performed an **80/20 stratified train-test split** to preserve class balance.


## 2.Model Implementation

In [9]:
# Separate features (X) and target (y)
X = df.drop('letter', axis=1)
y = df['letter']

In [10]:
# Target variable preparation: One-Hot Encoding
# Convert the 'letter' classes (A, B, C, ...) into numerical format for the model
binarizer = LabelBinarizer()
y_encoded = binarizer.fit_transform(y)
num_classes = y_encoded.shape[1] # Total number of unique alphabet classes
target_names = binarizer.classes_


In [11]:
# Feature Scaling: Normalization using StandardScaler
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [12]:
# Split the data into training and test sets (80/20 split is common)
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y_encoded, test_size=0.2, random_state=42, stratify=y
)

In [13]:
print(f"\nTraining set size: {X_train.shape[0]} samples")
print(f"Test set size: {X_test.shape[0]} samples")
print("--------------------------------------\n")



Training set size: 16000 samples
Test set size: 4000 samples
--------------------------------------



## 2. Basic Model Implementation

- **Model Architecture:**  
  - Input Layer → 16 features  
  - Hidden Layer → 64 neurons, `ReLU` activation  
  - Output Layer → 26 neurons, `Softmax` activation  

- **Compilation:**  
  - Optimizer: `Adam`  
  - Loss: `Categorical Crossentropy`  
  - Metric: `Accuracy`

- **Training:**  
  - 20 epochs  
  - Batch size: 32  

The basic model was trained and evaluated on the test set.


## 3.Hyperparameter Tuning

In [14]:
# Define a function to create the basic model
def create_basic_model(input_dim=X_train.shape[1], output_dim=num_classes):
    model = Sequential([
        # Input layer and one Hidden layer (required by assignment)
        Dense(units=64, activation='relu', input_shape=(input_dim,)),
        # Output layer
        Dense(units=output_dim, activation='softmax')
    ])
    
    # Compile the model
    # Using 'adam' optimizer and 'categorical_crossentropy' for multi-class classification
    model.compile(optimizer='adam', 
                  loss='categorical_crossentropy', 
                  metrics=['accuracy'])
    return model


In [15]:
# Create and train the basic model
basic_model = create_basic_model()
print("--- Training Basic ANN Model ---")
history = basic_model.fit(
    X_train, y_train,
    epochs=10, 
    batch_size=32, 
    verbose=0, # Set to 1 or 2 for progress bar
    validation_data=(X_test, y_test)
)
print("Basic Model Training Complete.")

--- Training Basic ANN Model ---
Basic Model Training Complete.


In [16]:
# Evaluate the basic model
loss, accuracy = basic_model.evaluate(X_test, y_test, verbose=0)
print(f"\nBasic Model (Default Hyperparameters) Accuracy: {accuracy*100:.2f}%")


Basic Model (Default Hyperparameters) Accuracy: 88.00%


In [17]:
# The KerasClassifier wrapper is used to make Keras models compatible with scikit-learn's GridSearchCV
def create_tunable_model(optimizer='adam', neurons=64, activation='relu', input_dim=X_train.shape[1], output_dim=num_classes):
    model = Sequential([
        Dense(units=neurons, activation=activation, input_shape=(input_dim,)),
        Dense(units=output_dim, activation='softmax')
    ])
    # Note: Learning rate is adjusted via the specific optimizer (e.g., Adam)
    model.compile(optimizer=optimizer, 
                  loss='categorical_crossentropy', 
                  metrics=['accuracy'])
    return model

In [18]:
# Wrap the Keras model in the KerasClassifier
keras_clf = KerasClassifier(
    model=create_tunable_model, 
    verbose=0,
    # Set static parameters for tuning (these are part of the 'fit' process)
    batch_size=32, 
    epochs=10 
)

In [19]:
# Define the Hyperparameter Grid for GridSearchCV
param_grid = {
    'model__neurons': [32, 64, 128],             # Number of neurons in the hidden layer
    'model__activation': ['relu', 'tanh'],      # Activation function
    'optimizer': ['adam', 'rmsprop'],           # Optimization algorithm
}

In [20]:
# Setup GridSearchCV (Cross-Validation for tuning)
# Note: This process can be computationally intensive and take a while.
grid_search = GridSearchCV(
    estimator=keras_clf, 
    param_grid=param_grid, 
    scoring='accuracy', 
    cv=3, # 3-fold cross-validation
    verbose=1, 
    n_jobs=-1 # Use all available cores
)

In [21]:
print("\n--- Starting Hyperparameter Tuning (Grid Search) ---")
grid_search.fit(X_train, y_train)
print("Hyperparameter Tuning Complete.")


--- Starting Hyperparameter Tuning (Grid Search) ---
Fitting 3 folds for each of 12 candidates, totalling 36 fits
Hyperparameter Tuning Complete.


In [22]:
# Get the best parameters and score
best_params = grid_search.best_params_
best_score = grid_search.best_score_
best_model = grid_search.best_estimator_

In [23]:
print(f"\nBest Cross-Validation Accuracy Score: {best_score*100:.2f}%")
print(f"Best Parameters Found: {best_params}")


Best Cross-Validation Accuracy Score: 88.22%
Best Parameters Found: {'model__activation': 'relu', 'model__neurons': 128, 'optimizer': 'adam'}


## 3. Hyperparameter Tuning

To improve the model, **GridSearchCV** was used with `KerasClassifier` from `scikeras`.

- **Parameters tuned:**
  - Number of neurons in hidden layer: [32, 64, 128]
  - Activation function: ['relu', 'tanh']
  - Optimizer: ['adam', 'rmsprop']

- **Method:**
  - 3-fold cross-validation  
  - Accuracy used as the scoring metric  

- **Output:**
  - Best parameter combination (neurons, activation, optimizer)  
  - Best cross-validation accuracy


## 4. Evaluation

In [26]:
# Evaluate the best (tuned) model on the held-out test set
print("\n--- Final Evaluation of Tuned Model ---")
test_accuracy = best_model.score(X_test, y_test)
print(f"Tuned Model Test Accuracy: {test_accuracy*100:.2f}%")


--- Final Evaluation of Tuned Model ---
Tuned Model Test Accuracy: 90.62%


In [27]:
# Generate detailed classification report
y_pred_probs = best_model.predict(X_test)
y_pred_classes = np.argmax(y_pred_probs, axis=1)
y_test_classes = np.argmax(y_test, axis=1)


In [28]:

# Get class names for the report (A, B, C, ...)
target_names = binarizer.classes_

In [29]:
print("\nDetailed Classification Report (Tuned Model):")
# Use the class names derived from the LabelBinarizer
print(classification_report(y_test_classes, y_pred_classes, target_names=target_names, zero_division=0))


Detailed Classification Report (Tuned Model):
              precision    recall  f1-score   support

           A       0.93      0.95      0.94       158
           B       0.84      0.91      0.87       153
           C       0.91      0.95      0.93       147
           D       0.88      0.91      0.90       161
           E       0.79      0.94      0.86       154
           F       0.82      0.86      0.84       155
           G       0.90      0.88      0.89       155
           H       0.89      0.76      0.82       147
           I       0.96      0.85      0.91       151
           J       0.96      0.94      0.95       149
           K       0.89      0.89      0.89       148
           L       0.96      0.90      0.93       152
           M       0.97      0.92      0.94       158
           N       0.92      0.90      0.91       157
           O       0.87      0.91      0.89       150
           P       0.97      0.90      0.94       161
           Q       0.94      0.96 

In [30]:
# --- Discussion Summary ---
print("\n--- Discussion Summary ---")
print(f"Initial Model Accuracy (Test Set): {accuracy*100:.2f}%")
print(f"Tuned Model Accuracy (Test Set): {test_accuracy*100:.2f}%")
print("\nDiscussion: The tuned model typically achieves a higher accuracy by optimizing the architecture and training process (neurons, activation, optimizer). For example, a wider model (more neurons) or a more robust optimizer (like 'adam' over 'rmsprop' for this dataset) can lead to better generalization and a noticeable improvement in classification performance compared to the basic model.")


--- Discussion Summary ---
Initial Model Accuracy (Test Set): 88.00%
Tuned Model Accuracy (Test Set): 90.62%

Discussion: The tuned model typically achieves a higher accuracy by optimizing the architecture and training process (neurons, activation, optimizer). For example, a wider model (more neurons) or a more robust optimizer (like 'adam' over 'rmsprop' for this dataset) can lead to better generalization and a noticeable improvement in classification performance compared to the basic model.


## 4. Final Evaluation & Discussion

- **Evaluation Metrics:**  
  - Test set accuracy of the **basic model**  
  - Test set accuracy of the **tuned model**  
  - Classification report (precision, recall, F1-score for all 26 classes)  

- **Comparison:**  
  - The tuned model showed improved accuracy compared to the baseline model.  
  - Increasing neurons and using an appropriate optimizer helped the network learn complex patterns better.  
  - Some letters (like those visually similar) had lower precision/recall, highlighting dataset challenges.

---

### 📌 Summary
- **Initial Model Accuracy:** ~ (from output)  
- **Tuned Model Accuracy:** ~ (from output)  
- **Improvement:** Hyperparameter tuning significantly improved performance.  

**Conclusion:** ANN with hyperparameter tuning is effective for alphabet classification.  
