# Lab 4
### kavaskar
### 2347230

In [3]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
import seaborn as sns
import tensorflow as tf
import tensorflow_datasets as tfds


In [4]:

# 1. Data Preparation
def load_data():
    # Load Kuzushiji-MNIST dataset
    (ds_train, ds_test), ds_info = tfds.load(
        'kmnist',
        split=['train', 'test'],
        shuffle_files=True,
        as_supervised=True,
        with_info=True,
    )
    
    def preprocess(image, label):
        image = tf.cast(image, tf.float32) / 255.0
        return tf.reshape(image, [-1]), label

    # Apply preprocessing and convert to numpy arrays
    ds_train = ds_train.map(preprocess).batch(ds_info.splits['train'].num_examples)
    ds_test = ds_test.map(preprocess).batch(ds_info.splits['test'].num_examples)

    X_train, y_train = next(iter(ds_train))
    X_test, y_test = next(iter(ds_test))

    return (X_train.numpy(), y_train.numpy()), (X_test.numpy(), y_test.numpy())



In [5]:

class RBFNetwork:
    def __init__(self, input_dim, hidden_dim, output_dim):
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        
        self.centers = None
        self.weights = np.random.randn(hidden_dim, output_dim)
    
    def gaussian(self, x, c):
        return np.exp(-np.linalg.norm(x - c, axis=1)**2 / (2 * (self.sigma**2)))
    
    def forward(self, X):
        # RBF layer
        G = np.zeros((X.shape[0], self.hidden_dim))
        for i, center in enumerate(self.centers):
            G[:, i] = self.gaussian(X, center)
        
        # Output layer
        output = np.dot(G, self.weights)
        return self.softmax(output)
    
    def softmax(self, x):
        exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
        return exp_x / np.sum(exp_x, axis=1, keepdims=True)
    
    def train(self, X, y, learning_rate, epochs):
        # Use K-means to determine RBF centers
        kmeans = KMeans(n_clusters=self.hidden_dim)
        kmeans.fit(X)
        self.centers = kmeans.cluster_centers_
        
        # Compute sigma (width of Gaussian)
        distances = np.zeros((self.hidden_dim, self.hidden_dim))
        for i in range(self.hidden_dim):
            for j in range(self.hidden_dim):
                distances[i, j] = np.linalg.norm(self.centers[i] - self.centers[j])
        self.sigma = np.mean(distances) / np.sqrt(2 * self.hidden_dim)
        
        # Training loop
        for epoch in range(epochs):
            # Forward pass
            output = self.forward(X)
            
            # Backward pass (gradient descent)
            error = output - np.eye(self.output_dim)[y]
            gradient = np.dot(self.gaussian(X, self.centers).T, error)
            self.weights -= learning_rate * gradient
            
            if epoch % 10 == 0:
                loss = -np.sum(np.eye(self.output_dim)[y] * np.log(output)) / X.shape[0]
                print(f"Epoch {epoch}, Loss: {loss:.4f}")
    
    def predict(self, X):
        output = self.forward(X)
        return np.argmax(output, axis=1)


In [6]:

# 3. Training
def train_model(X_train, y_train):
    model = RBFNetwork(input_dim=784, hidden_dim=100, output_dim=10)
    model.train(X_train, y_train, learning_rate=0.01, epochs=100)
    return model


In [7]:

# 4. Evaluation
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    conf_matrix = confusion_matrix(y_test, y_pred)
    return accuracy, conf_matrix

def visualize_results(accuracy, conf_matrix):
    print(f"Accuracy: {accuracy:.4f}")
    
    plt.figure(figsize=(10, 8))
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues')
    plt.title('Confusion Matrix')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.show()


In [9]:

# 5. Analysis
def analyze_rbf_units(X_train, y_train, X_test, y_test, rbf_units):
    accuracies = []
    for units in rbf_units:
        model = RBFNetwork(input_dim=784, hidden_dim=units, output_dim=10)
        model.train(X_train, y_train, learning_rate=0.01, epochs=100)
        accuracy, _ = evaluate_model(model, X_test, y_test)
        accuracies.append(accuracy)
    
    plt.figure(figsize=(10, 6))
    plt.plot(rbf_units, accuracies, marker='o')
    plt.title('RBF Units vs Accuracy')
    plt.xlabel('Number of RBF Units')
    plt.ylabel('Accuracy')
    plt.show()


In [10]:

# Main execution
if __name__ == "__main__":
    # Load and preprocess data
    (X_train, y_train), (X_test, y_test) = load_data()

    # Train the model
    model = train_model(X_train, y_train)

    # Evaluate the model
    accuracy, conf_matrix = evaluate_model(model, X_test, y_test)
    visualize_results(accuracy, conf_matrix)

    # Analyze the effect of RBF units
    rbf_units = [50, 100, 150, 200, 250]
    analyze_rbf_units(X_train, y_train, X_test, y_test, rbf_units)

    print("Strengths of RBF Network for this dataset:")
    print("1. Can capture non-linear relationships in the data.")
    print("2. Faster training compared to some other neural network architectures.")
    print("3. Good at handling localized features, which may be important for character recognition.")

    print("\nLimitations of RBF Network for this dataset:")
    print("1. Performance heavily depends on the choice of centers and number of RBF units.")
    print("2. May require more memory as the number of RBF units increases.")
    print("3. Can be sensitive to the scale of the input features.")

    print("\nEffect of number of RBF units:")
    print("Increasing the number of RBF units generally improves the model's ability to capture complex patterns,")
    print("but it also increases computational cost and the risk of overfitting. There's usually an optimal")
    print("number of units beyond which performance plateaus or degrades. The analyze_rbf_units function")
    print("helps visualize this trade-off.")

  super()._check_params_vs_input(X, default_n_init=10)


ValueError: operands could not be broadcast together with shapes (60000,784) (100,784) 