<a href="https://colab.research.google.com/github/showravj2-create/Compact-NN-pipeline-/blob/main/Compact_NN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
import os
import json
import random
from datetime import datetime
from dataclasses import dataclass, asdict

import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# --- Reproducibility helpers ---
def set_seed(seed: int):
    random.seed(seed)
    np.random.seed(seed)

# --- Simple feed-forward neural network implemented with NumPy ---
class SimpleMLP:
    """A tiny, clear MLP implemented in NumPy (sufficient to demonstrate core ideas)."""
    def __init__(self, in_dim, hidden_dim, out_dim, seed=0):
        rng = np.random.RandomState(seed)
        # Xavier init
        self.W1 = rng.randn(in_dim, hidden_dim) * np.sqrt(2.0 / (in_dim + hidden_dim))
        self.b1 = np.zeros((hidden_dim,))
        self.W2 = rng.randn(hidden_dim, out_dim) * np.sqrt(2.0 / (hidden_dim + out_dim))
        self.b2 = np.zeros((out_dim,))

    @staticmethod
    def relu(x):
        return np.maximum(0, x)

    @staticmethod
    def relu_deriv(x):
        return (x > 0).astype(float)

    @staticmethod
    def softmax(x):
        # numerically stable
        x = x - x.max(axis=1, keepdims=True)
        ex = np.exp(x)
        return ex / ex.sum(axis=1, keepdims=True)

    def forward(self, X):
        z1 = X.dot(self.W1) + self.b1
        a1 = self.relu(z1)
        logits = a1.dot(self.W2) + self.b2
        probs = self.softmax(logits)
        cache = {"X": X, "z1": z1, "a1": a1, "logits": logits, "probs": probs}
        return probs, cache

    def compute_loss(self, probs, y_onehot):
        # cross-entropy
        N = y_onehot.shape[0]
        eps = 1e-12
        # Convert sparse y_onehot to dense for element-wise multiplication
        if hasattr(y_onehot, 'toarray'):
            y_onehot = y_onehot.toarray()
        loss = -np.sum(y_onehot * np.log(probs + eps)) / N
        return loss

    def backward(self, cache, y_onehot):
        N = y_onehot.shape[0]
        probs = cache["probs"]
        a1 = cache["a1"]
        X = cache["X"]

        # Convert sparse y_onehot to dense if necessary
        if hasattr(y_onehot, 'toarray'):
             y_onehot = y_onehot.toarray()

        dlogits = (probs - y_onehot) / N  # shape: (N, C)
        dW2 = a1.T.dot(dlogits)  # (H, C)
        db2 = dlogits.sum(axis=0)
        da1 = dlogits.dot(self.W2.T)  # (N, H)
        dz1 = da1 * self.relu_deriv(cache["z1"])
        dW1 = X.T.dot(dz1)
        db1 = dz1.sum(axis=0)

        grads = {"dW1": dW1, "db1": db1, "dW2": dW2, "db2": db2}
        return grads

    def step(self, grads, lr=1e-3):
        self.W1 -= lr * grads["dW1"]
        self.b1 -= lr * grads["db1"]
        self.W2 -= lr * grads["dW2"]
        self.b2 -= lr * grads["db2"]

    def predict(self, X):
        probs, _ = self.forward(X)
        return probs.argmax(axis=1)

    def save(self, path):
        np.savez_compressed(path,
                           W1=self.W1, b1=self.b1,
                           W2=self.W2, b2=self.b2)

    @classmethod
    def load(cls, path):
        d = np.load(path)
        inst = cls(1,1,1)  # temporary shape; will replace attributes
        inst.W1 = d["W1"]
        inst.b1 = d["b1"]
        inst.W2 = d["W2"]
        inst.b2 = d["b2"]
        return inst

# --- Small utilities for plotting and reporting ---
def plot_training(history, out_dir):
    fig, ax = plt.subplots(1, 2, figsize=(10,4))
    ax[0].plot(history["train_loss"], label="train")
    ax[0].set_title("Loss")
    ax[0].set_xlabel("epoch")
    ax[0].legend()
    ax[1].plot(history["val_acc"], label="val_acc")
    ax[1].set_title("Validation Accuracy")
    ax[1].set_xlabel("epoch")
    ax[1].legend()
    plt.tight_layout()
    p = os.path.join(out_dir, "training_plots.png")
    plt.savefig(p)
    plt.close(fig)
    return p

def plot_confusion(y_true, y_pred, classes, out_dir):
    cm = confusion_matrix(y_true, y_pred)
    fig, ax = plt.subplots(figsize=(6,5))
    im = ax.imshow(cm, interpolation='nearest')
    ax.figure.colorbar(im, ax=ax)
    ax.set_xticks(np.arange(len(classes)))
    ax.set_yticks(np.arange(len(classes)))
    ax.set_xticklabels(classes, rotation=45)
    ax.set_yticklabels(classes)
    ax.set_ylabel('True label')
    ax.set_xlabel('Predicted label')
    plt.tight_layout()
    p = os.path.join(out_dir, "confusion_matrix.png")
    plt.savefig(p)
    plt.close(fig)
    return p

# --- Data loading & preprocessing ---
def load_and_preprocess(test_size=0.2, val_size=0.1, seed=0):
    digits = load_digits()
    X = digits.data.astype(np.float32)  # shape (n_samples, 64)
    y = digits.target.astype(np.int64)
    classes = [str(i) for i in range(10)]

    # train/val/test split
    X_train, X_temp, y_train, y_temp = train_test_split(
        X, y, test_size=(test_size + val_size), random_state=seed, stratify=y
    )
    # separate val and test from X_temp
    val_fraction = val_size / (test_size + val_size)
    X_val, X_test, y_val, y_test = train_test_split(
        X_temp, y_temp, test_size=val_fraction, random_state=seed, stratify=y_temp
    )

    scaler = StandardScaler().fit(X_train)
    X_train = scaler.transform(X_train)
    X_val = scaler.transform(X_val)
    X_test = scaler.transform(X_test)

    onehot = OneHotEncoder(categories='auto')
    y_train_oh = onehot.fit_transform(y_train.reshape(-1,1))
    y_val_oh = onehot.transform(y_val.reshape(-1,1))

    return (X_train, y_train, y_train_oh,
            X_val, y_val, y_val_oh,
            X_test, y_test, classes)

# --- Training loop ---
def train(model, data, epochs=50, lr=0.01, batch_size=64, out_dir="out"):
    X_train, y_train, y_train_oh, X_val, y_val, y_val_oh, X_test, y_test, classes = data
    n = X_train.shape[0]
    history = {"train_loss": [], "val_acc": []}

    for epoch in range(1, epochs+1):
        # Shuffle
        idx = np.random.permutation(n)
        X_sh, y_sh, y_sh_oh = X_train[idx], y_train[idx], y_train_oh[idx]

        # Mini-batch training
        epoch_loss = 0.0
        for i in range(0, n, batch_size):
            Xb = X_sh[i:i+batch_size]
            yb_oh = y_sh_oh[i:i+batch_size]
            probs, cache = model.forward(Xb)
            loss = model.compute_loss(probs, yb_oh)
            grads = model.backward(cache, yb_oh)
            model.step(grads, lr=lr)
            epoch_loss += loss * Xb.shape[0]

        epoch_loss /= n
        # validation
        y_val_pred = model.predict(X_val)
        val_acc = accuracy_score(y_val, y_val_pred)
        history["train_loss"].append(epoch_loss)
        history["val_acc"].append(val_acc)

        if epoch % max(1, epochs // 10) == 0 or epoch == 1:
            print(f"[{epoch:3d}/{epochs}] loss={epoch_loss:.4f} val_acc={val_acc:.4f}")

    # Final evaluation on test set
    y_test_pred = model.predict(X_test)
    test_acc = accuracy_score(y_test, y_test_pred)
    print(f">>> Final test accuracy: {test_acc:.4f}")

    # Save artifacts
    os.makedirs(out_dir, exist_ok=True)
    model_path = os.path.join(out_dir, "model.npz")
    model.save(model_path)
    training_plot = plot_training(history, out_dir)
    conf_plot = plot_confusion(y_test, y_test_pred, classes, out_dir)
    report = classification_report(y_test, y_test_pred, digits=4)
    with open(os.path.join(out_dir, "classification_report.txt"), "w") as f:
        f.write(report)

    metadata = {
        "trained_at": datetime.utcnow().isoformat() + "Z",
        "test_accuracy": float(test_acc),
        "n_train": int(n),
        "model_file": model_path,
        "training_plot": training_plot,
        "confusion_plot": conf_plot,
    }
    with open(os.path.join(out_dir, "metadata.json"), "w") as f:
        json.dump(metadata, f, indent=2)

    print(f"Artifacts saved to: {out_dir}")
    return history, metadata

# --- CLI & Entrypoint ---
def main():
    # Set parameters directly in the notebook
    epochs = 40
    lr = 0.01
    hidden = 128
    batch = 64
    seed = 0
    out = "out"

    set_seed(seed)
    data = load_and_preprocess(seed=seed)
    in_dim = data[0].shape[1]
    out_dim = 10
    model = SimpleMLP(in_dim=in_dim, hidden_dim=hidden, out_dim=out_dim, seed=seed)
    history, metadata = train(model, data, epochs=epochs, lr=lr, batch_size=batch, out_dir=out)
    # Print short summary for quick copy-paste in an application email
    summary = f"Model trained: test_acc={metadata['test_accuracy']:.4f}, epochs={epochs}, hidden={hidden}"
    print("\nQuick summary (copy this in an email/GitHub README):")
    print(summary)

main()

[  1/40] loss=2.4280 val_acc=0.2139
[  4/40] loss=1.4846 val_acc=0.6833
[  8/40] loss=0.9453 val_acc=0.8333
[ 12/40] loss=0.6854 val_acc=0.8639
[ 16/40] loss=0.5351 val_acc=0.8861
[ 20/40] loss=0.4383 val_acc=0.9028
[ 24/40] loss=0.3719 val_acc=0.9167
[ 28/40] loss=0.3236 val_acc=0.9167
[ 32/40] loss=0.2871 val_acc=0.9194
[ 36/40] loss=0.2582 val_acc=0.9306
[ 40/40] loss=0.2349 val_acc=0.9361
>>> Final test accuracy: 0.9389
Artifacts saved to: out

Quick summary (copy this in an email/GitHub README):
Model trained: test_acc=0.9389, epochs=40, hidden=128


  "trained_at": datetime.utcnow().isoformat() + "Z",
