### Install required libraries

In [None]:
%pip install -r requirements.txt

### Prepare data

In [None]:
import numpy as np
from utils import onehot_encode

def stratified_split(val_ratio, data):
    classes = np.unique(data['target'])

    train_indices = []
    val_indices = []

    for cls in classes:

        cls_indices = np.where(data['target'] == cls)[0]
        print(f"cls is {cls}, cls_indices is {len(cls_indices)}")
        # random state for reproducibility
        rng = np.random.RandomState(seed=148)
        cls_indices = rng.permutation(cls_indices)        
        val_size = int(len(cls_indices) * val_ratio)
        val_cls_indices = cls_indices[:val_size]
        train_cls_indices = cls_indices[val_size:]
        
        val_indices.extend(val_cls_indices)
        train_indices.extend(train_cls_indices)

    train_split = data[train_indices]
    val_split = data[val_indices]
    return train_split, val_split

data = np.loadtxt(
    'dataset/2d.trn.dat', 
    skiprows=1,
    dtype=[('x', float), ('y', float), ('target', 'U1')]
)

test_data = np.loadtxt(
    'dataset/2d.tst.dat', 
    skiprows=1,
    dtype=[('x', float), ('y', float), ('target', 'U1')]
)

def normalize(x, mean, std):
    return (x-mean)/std

train_data, val_data = stratified_split(0.2, data)

X_train_raw = np.column_stack((train_data['x'], train_data['y']))
X_val_raw = np.column_stack((val_data['x'], val_data['y']))
X_test_raw = np.column_stack((test_data['x'], test_data['y']))

y_train = onehot_encode(train_data['target']).T
y_val = onehot_encode(val_data['target']).T
y_test = onehot_encode(test_data['target']).T

# z-score normalization
train_mean = np.mean(X_train_raw, axis=0)
train_std = np.std(X_train_raw, axis=0)
X_train_zs = normalize(X_train_raw, train_mean, train_std).T
X_val_zs = normalize(X_val_raw, train_mean, train_std).T
X_test_zs = normalize(X_test_raw, train_mean, train_std).T

# min-max normalization
min_val = np.min(X_train_raw, axis=0)
max_val = np.max(X_train_raw, axis=0)
X_train_mm = ((X_train_raw - min_val) / (max_val - min_val)).T
X_val_mm = ((X_val_raw - min_val) / (max_val - min_val)).T
X_test_mm = ((X_test_raw - min_val) / (max_val - min_val)).T

## Grid Search

In [None]:
# stop notebook caching
%load_ext autoreload
%autoreload 2

import pandas as pd
from neural_network import NeuralNetwork, Layer, ActivationFunction
from utils import compute_accuracy

def grid_search():
    init_lr = [0.1, 0.05, 0.025]
    lr_decay = ['exponential', 'step']
    decay_k =[1/2, 1/4, 1/8]
    weight_init = ['he-normal', 'he-uniform']
    epochs = [25, 50, 75]
    data = ['z-score', 'min-max']
    
    results = []
    best_accuracy = 0
    best_params = None

    for ilr in init_lr:
        for lrd in lr_decay:
            for dk in decay_k:   
                for wi in weight_init:
                    for ep in epochs:
                        for d in data:
                            nn = NeuralNetwork(
                                layers=[
                                    Layer(ActivationFunction.relu, 12, input_dim=2),
                                    Layer(ActivationFunction.relu, 8),
                                    Layer(ActivationFunction.softmax, 3),
                                ],
                                weights_init=wi,
                            )

                            if d == 'z-score':
                                train_losses, val_losses = nn.train(X_train_zs, y_train, lr_init=ilr, lr_decay=lrd, decay_k=dk, epochs=ep, X_val=X_val_zs, y_val=y_val)
                                y_pred = nn.predict(X_val_zs)
                            elif d == 'min-max':
                                train_losses, val_losses = nn.train(X_train_mm, y_train, lr_init=ilr, lr_decay=lrd, decay_k=dk, epochs=ep, X_val=X_val_mm, y_val=y_val)
                                y_pred = nn.predict(X_val_mm)

                            acc = compute_accuracy(y_val, y_pred)

                            results.append({
                                'init_lr': ilr,
                                'lr_decay': lrd,
                                'decay_k': dk,
                                'weight_init': wi,
                                'epochs': ep,
                                'data': d,
                                'val_accuracy': acc
                            })

                            if acc > best_accuracy:
                                best_accuracy = acc
                                best_params = (ilr, lrd, dk, wi, ep, d)
                                
                            print(f"Accuracy: {acc}, Params: {ilr, lrd, dk, wi, ep, d}")

    df_results = pd.DataFrame(results)
    df_results.to_csv('grid_search_results.csv', index=False)
    print(f"\nBest accuracy: {best_accuracy}")
    print(f"Best params: {best_params}")

grid_search()

## Best performing model

In [None]:
nn = NeuralNetwork(
    layers=[
        Layer(ActivationFunction.relu, 12, input_dim=2),
        Layer(ActivationFunction.relu, 8),
        Layer(ActivationFunction.softmax, 3),
    ],
    weights_init='he-uniform',
)
    
train_losses, val_losses = nn.train(X_train_zs, y_train, lr_init=0.1, lr_decay='step', decay_k=1/2, epochs=75, X_val=X_val_zs, y_val=y_val)
y_pred_val = nn.predict(X_val_zs)
print(f"Validation accuuracy {compute_accuracy(y_val, y_pred_val)}")
y_pred_test = nn.predict(X_test_zs)
print(f"Test accuracy {compute_accuracy(y_test, y_pred_test)}")

## Plot loss vs epochs

In [None]:
from matplotlib import pyplot as plt


plt.figure(figsize=(6, 4))

plt.plot(train_losses, label='Training Loss', color='blue', linewidth=2)
plt.plot(val_losses, label='Validation Loss', color='orange', linewidth=2)

plt.xlabel('Epochs', fontsize=12)
plt.ylabel('Loss', fontsize=12)
plt.title('Training and Validation Loss vs. Epochs', fontsize=14, pad=15)

plt.legend(fontsize=12, loc='upper right')

plt.grid(True, linestyle='--', alpha=0.7)
plt.savefig('loss_plot.png', dpi=300, bbox_inches='tight')
plt.tight_layout()
plt.show()

## Plot confusion matrix

In [None]:
import numpy as np
import matplotlib.pyplot as plt

confusion_matrix = y_test @ y_pred_test.T
plt.figure(figsize=(6, 4))
plt.imshow(confusion_matrix, cmap='Blues', interpolation='nearest')

for i in range(3):
    for j in range(3):
        plt.text(j, i, str(int(confusion_matrix[i, j])), ha='center', va='center', color='black')

plt.xticks(np.arange(3), ['A', 'B', 'C'])
plt.yticks(np.arange(3), ['A', 'B', 'C'])
plt.xlabel('Predicted label')
plt.ylabel('True label')
plt.title('Confusion Matrix')
# plt.savefig('confusion_matrix.png', dpi=300, bbox_inches='tight')
plt.show()

## Plot classification results

In [None]:
import matplotlib.pyplot as plt
import numpy as np

plt.figure(figsize=(6, 4))
true_class = np.argmax(y_test, axis=0)
pred_class = np.argmax(y_pred_test, axis=0)

colors = np.where(pred_class != true_class, 'red', 'blue')

class_colors = ['green', 'orange', 'blue']

for i, color in enumerate(class_colors):
    class_indices = np.where(true_class == i)[0]
    plt.scatter(X_test_zs[0, class_indices], X_test_zs[1, class_indices], color=color, label=f'Class {chr(65 + i)}')

misclassified_indices = np.where(pred_class != true_class)[0]
plt.scatter(X_test_zs[0, misclassified_indices], X_test_zs[1, misclassified_indices], color='red', label='Misclassified', edgecolor='black')

plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.title('Classification Results')
# plt.savefig('classification_results.png', dpi=300, bbox_inches='tight')
plt.legend()
plt.show()