# Artificial Neural Network (ANN) Implementation

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import make_classification, make_regression, load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, mean_squared_error, classification_report
import pandas as pd

np.random.seed(42)

plt.style.use('seaborn-v0_8')
plt.rcParams['figure.figsize'] = (10, 6)

print("Libraries imported successfully!")
print(f"NumPy version: {np.__version__}")

try:
    import tensorflow as tf
    print(f"TensorFlow version: {tf.__version__}")
except ImportError:
    print("TensorFlow not available. Will install later if needed.")

## Part 1: Building ANN from Scratch

In [None]:
class ActivationFunctions:
    
    @staticmethod
    def sigmoid(x):
        x = np.clip(x, -500, 500)
        return 1 / (1 + np.exp(-x))
    
    @staticmethod
    def sigmoid_derivative(x):
        s = ActivationFunctions.sigmoid(x)
        return s * (1 - s)
    
    @staticmethod
    def relu(x):
        return np.maximum(0, x)
    
    @staticmethod
    def relu_derivative(x):
        return (x > 0).astype(float)
    
    @staticmethod
    def tanh(x):
        return np.tanh(x)
    
    @staticmethod
    def tanh_derivative(x):
        return 1 - np.tanh(x)**2
    
    @staticmethod
    def linear(x):
        return x
    
    @staticmethod
    def linear_derivative(x):
        return np.ones_like(x)

x = np.linspace(-5, 5, 100)
fig, axes = plt.subplots(2, 2, figsize=(12, 8))
fig.suptitle('Activation Functions and Their Derivatives')

axes[0,0].plot(x, ActivationFunctions.sigmoid(x), 'b-', label='Sigmoid')
axes[0,0].plot(x, ActivationFunctions.sigmoid_derivative(x), 'r--', label='Derivative')
axes[0,0].set_title('Sigmoid')
axes[0,0].legend()
axes[0,0].grid(True)

axes[0,1].plot(x, ActivationFunctions.relu(x), 'b-', label='ReLU')
axes[0,1].plot(x, ActivationFunctions.relu_derivative(x), 'r--', label='Derivative')
axes[0,1].set_title('ReLU')
axes[0,1].legend()
axes[0,1].grid(True)

axes[1,0].plot(x, ActivationFunctions.tanh(x), 'b-', label='Tanh')
axes[1,0].plot(x, ActivationFunctions.tanh_derivative(x), 'r--', label='Derivative')
axes[1,0].set_title('Tanh')
axes[1,0].legend()
axes[1,0].grid(True)

axes[1,1].plot(x, ActivationFunctions.linear(x), 'b-', label='Linear')
axes[1,1].plot(x, ActivationFunctions.linear_derivative(x), 'r--', label='Derivative')
axes[1,1].set_title('Linear')
axes[1,1].legend()
axes[1,1].grid(True)

plt.tight_layout()
plt.show()

### Neural Network Class Implementation

In [None]:
class NeuralNetwork:
    
    def __init__(self, layers, activation='sigmoid', learning_rate=0.01):
        self.layers = layers
        self.learning_rate = learning_rate
        self.num_layers = len(layers)
        
        if activation == 'sigmoid':
            self.activation = ActivationFunctions.sigmoid
            self.activation_derivative = ActivationFunctions.sigmoid_derivative
        elif activation == 'relu':
            self.activation = ActivationFunctions.relu
            self.activation_derivative = ActivationFunctions.relu_derivative
        elif activation == 'tanh':
            self.activation = ActivationFunctions.tanh
            self.activation_derivative = ActivationFunctions.tanh_derivative
        else:
            self.activation = ActivationFunctions.sigmoid
            self.activation_derivative = ActivationFunctions.sigmoid_derivative
        
        self.weights = []
        self.biases = []
        
        for i in range(self.num_layers - 1):
            w = np.random.randn(layers[i], layers[i+1]) * np.sqrt(2.0 / layers[i])
            b = np.zeros((1, layers[i+1]))
            self.weights.append(w)
            self.biases.append(b)
        
        self.activations = []
        self.z_values = []
        
        self.cost_history = []
    
    def forward_propagation(self, X):
        self.activations = [X]
        self.z_values = []
        
        current_input = X
        
        for i in range(self.num_layers - 1):
            z = np.dot(current_input, self.weights[i]) + self.biases[i]
            self.z_values.append(z)
            
            if i == self.num_layers - 2 and self.layers[-1] == 1:
                activation = z
            else:
                activation = self.activation(z)
            
            self.activations.append(activation)
            current_input = activation
        
        return current_input
    
    def compute_cost(self, y_true, y_pred):
        m = y_true.shape[0]
        
        if self.layers[-1] == 1:
            cost = np.mean((y_true - y_pred) ** 2)
        else:
            epsilon = 1e-15
            y_pred_clipped = np.clip(y_pred, epsilon, 1 - epsilon)
            cost = -np.mean(y_true * np.log(y_pred_clipped))
        
        return cost
    
    def backward_propagation(self, X, y):
        m = X.shape[0]
        
        dW = [np.zeros_like(w) for w in self.weights]
        db = [np.zeros_like(b) for b in self.biases]
        
        if self.layers[-1] == 1:
            delta = self.activations[-1] - y
        else:
            delta = self.activations[-1] - y
        
        for i in range(self.num_layers - 2, -1, -1):
            dW[i] = np.dot(self.activations[i].T, delta) / m
            db[i] = np.mean(delta, axis=0, keepdims=True)
            
            if i > 0:
                delta = np.dot(delta, self.weights[i].T) * self.activation_derivative(self.z_values[i-1])
        
        return dW, db
    
    def update_parameters(self, dW, db):
        for i in range(len(self.weights)):
            self.weights[i] -= self.learning_rate * dW[i]
            self.biases[i] -= self.learning_rate * db[i]
    
    def train(self, X, y, epochs=1000, verbose=True):
        for epoch in range(epochs):
            y_pred = self.forward_propagation(X)
            
            cost = self.compute_cost(y, y_pred)
            self.cost_history.append(cost)
            
            dW, db = self.backward_propagation(X, y)
            
            self.update_parameters(dW, db)
            
            if verbose and epoch % 100 == 0:
                print(f"Epoch {epoch}, Cost: {cost:.6f}")
    
    def predict(self, X):
        return self.forward_propagation(X)
    
    def predict_classes(self, X):
        predictions = self.predict(X)
        if self.layers[-1] == 1:
            return (predictions > 0.5).astype(int)
        else:
            return np.argmax(predictions, axis=1)

print("Neural Network class implemented successfully!")

## Part 2: Dataset Creation and Examples

Let's create some synthetic datasets to test our neural network implementation:

In [None]:
print("Creating Binary Classification Dataset...")
X_binary, y_binary = make_classification(
    n_samples=1000,
    n_features=2,
    n_redundant=0,
    n_informative=2,
    n_clusters_per_class=1,
    random_state=42
)

scaler_binary = StandardScaler()
X_binary = scaler_binary.fit_transform(X_binary)
y_binary = y_binary.reshape(-1, 1)

print("Creating Multi-class Classification Dataset...")
X_multi, y_multi = make_classification(
    n_samples=1000,
    n_features=2,
    n_redundant=0,
    n_informative=2,
    n_classes=3,
    n_clusters_per_class=1,
    random_state=42
)

scaler_multi = StandardScaler()
X_multi = scaler_multi.fit_transform(X_multi)

y_multi_onehot = np.zeros((len(y_multi), 3))
for i, label in enumerate(y_multi):
    y_multi_onehot[i, label] = 1

print("Creating Regression Dataset...")
X_reg, y_reg = make_regression(
    n_samples=1000,
    n_features=1,
    noise=10,
    random_state=42
)

scaler_reg_X = StandardScaler()
scaler_reg_y = StandardScaler()
X_reg = scaler_reg_X.fit_transform(X_reg)
y_reg = scaler_reg_y.fit_transform(y_reg.reshape(-1, 1))

fig, axes = plt.subplots(1, 3, figsize=(15, 4))

scatter = axes[0].scatter(X_binary[:, 0], X_binary[:, 1], c=y_binary.ravel(), 
                         cmap='viridis', alpha=0.7)
axes[0].set_title('Binary Classification Dataset')
axes[0].set_xlabel('Feature 1')
axes[0].set_ylabel('Feature 2')
plt.colorbar(scatter, ax=axes[0])

scatter = axes[1].scatter(X_multi[:, 0], X_multi[:, 1], c=y_multi, 
                         cmap='viridis', alpha=0.7)
axes[1].set_title('Multi-class Classification Dataset')
axes[1].set_xlabel('Feature 1')
axes[1].set_ylabel('Feature 2')
plt.colorbar(scatter, ax=axes[1])

axes[2].scatter(X_reg, y_reg, alpha=0.7)
axes[2].set_title('Regression Dataset')
axes[2].set_xlabel('Feature')
axes[2].set_ylabel('Target')

plt.tight_layout()
plt.show()

print(f"Binary classification: {X_binary.shape[0]} samples, {X_binary.shape[1]} features")
print(f"Multi-class classification: {X_multi.shape[0]} samples, {X_multi.shape[1]} features, {len(np.unique(y_multi))} classes")
print(f"Regression: {X_reg.shape[0]} samples, {X_reg.shape[1]} features")

## Part 3: Training and Evaluating Custom ANN

### Example 1: Binary Classification

In [None]:
print("Training Neural Network for Binary Classification")
print("=" * 50)

X_train, X_test, y_train, y_test = train_test_split(
    X_binary, y_binary, test_size=0.2, random_state=42
)

nn_binary = NeuralNetwork([2, 8, 4, 1], activation='sigmoid', learning_rate=0.1)

nn_binary.train(X_train, y_train, epochs=1000, verbose=True)

y_pred_train = nn_binary.predict(X_train)
y_pred_test = nn_binary.predict(X_test)

y_pred_train_classes = (y_pred_train > 0.5).astype(int)
y_pred_test_classes = (y_pred_test > 0.5).astype(int)

train_accuracy = accuracy_score(y_train, y_pred_train_classes)
test_accuracy = accuracy_score(y_test, y_pred_test_classes)

print(f"\nBinary Classification Results:")
print(f"Training Accuracy: {train_accuracy:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")

fig, axes = plt.subplots(1, 2, figsize=(15, 5))

axes[0].plot(nn_binary.cost_history)
axes[0].set_title('Training Loss Curve')
axes[0].set_xlabel('Epochs')
axes[0].set_ylabel('Loss')
axes[0].grid(True)

h = 0.02
x_min, x_max = X_binary[:, 0].min() - 1, X_binary[:, 0].max() + 1
y_min, y_max = X_binary[:, 1].min() - 1, X_binary[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                     np.arange(y_min, y_max, h))

mesh_points = np.c_[xx.ravel(), yy.ravel()]
Z = nn_binary.predict(mesh_points)
Z = Z.reshape(xx.shape)

axes[1].contourf(xx, yy, Z, alpha=0.3, cmap='viridis')
scatter = axes[1].scatter(X_binary[:, 0], X_binary[:, 1], c=y_binary.ravel(), 
                         cmap='viridis', edgecolors='black')
axes[1].set_title('Decision Boundary')
axes[1].set_xlabel('Feature 1')
axes[1].set_ylabel('Feature 2')

plt.tight_layout()
plt.show()

### Example 2: Multi-class Classification

In [None]:
print("Training Neural Network for Multi-class Classification")
print("=" * 55)

X_train_multi, X_test_multi, y_train_multi, y_test_multi = train_test_split(
    X_multi, y_multi_onehot, test_size=0.2, random_state=42
)

nn_multi = NeuralNetwork([2, 10, 6, 3], activation='sigmoid', learning_rate=0.1)

nn_multi.train(X_train_multi, y_train_multi, epochs=1000, verbose=True)

y_pred_train_multi = nn_multi.predict(X_train_multi)
y_pred_test_multi = nn_multi.predict(X_test_multi)

y_pred_train_multi_classes = np.argmax(y_pred_train_multi, axis=1)
y_pred_test_multi_classes = np.argmax(y_pred_test_multi, axis=1)
y_train_multi_classes = np.argmax(y_train_multi, axis=1)
y_test_multi_classes = np.argmax(y_test_multi, axis=1)

train_accuracy_multi = accuracy_score(y_train_multi_classes, y_pred_train_multi_classes)
test_accuracy_multi = accuracy_score(y_test_multi_classes, y_pred_test_multi_classes)

print(f"\nMulti-class Classification Results:")
print(f"Training Accuracy: {train_accuracy_multi:.4f}")
print(f"Test Accuracy: {test_accuracy_multi:.4f}")

print("\nClassification Report:")
print(classification_report(y_test_multi_classes, y_pred_test_multi_classes))

fig, axes = plt.subplots(1, 2, figsize=(15, 5))

axes[0].plot(nn_multi.cost_history)
axes[0].set_title('Multi-class Training Loss Curve')
axes[0].set_xlabel('Epochs')
axes[0].set_ylabel('Loss')
axes[0].grid(True)

h = 0.02
x_min, x_max = X_multi[:, 0].min() - 1, X_multi[:, 0].max() + 1
y_min, y_max = X_multi[:, 1].min() - 1, X_multi[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                     np.arange(y_min, y_max, h))

mesh_points = np.c_[xx.ravel(), yy.ravel()]
Z_multi = nn_multi.predict(mesh_points)
Z_multi = np.argmax(Z_multi, axis=1)
Z_multi = Z_multi.reshape(xx.shape)

axes[1].contourf(xx, yy, Z_multi, alpha=0.3, cmap='viridis')
scatter = axes[1].scatter(X_multi[:, 0], X_multi[:, 1], c=y_multi, 
                         cmap='viridis', edgecolors='black')
axes[1].set_title('Multi-class Decision Boundary')
axes[1].set_xlabel('Feature 1')
axes[1].set_ylabel('Feature 2')

plt.tight_layout()
plt.show()

### Example 3: Regression

In [None]:
print("Training Neural Network for Regression")
print("=" * 40)

X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(
    X_reg, y_reg, test_size=0.2, random_state=42
)

nn_reg = NeuralNetwork([1, 8, 4, 1], activation='relu', learning_rate=0.01)

nn_reg.train(X_train_reg, y_train_reg, epochs=1000, verbose=True)

y_pred_train_reg = nn_reg.predict(X_train_reg)
y_pred_test_reg = nn_reg.predict(X_test_reg)

train_mse = mean_squared_error(y_train_reg, y_pred_train_reg)
test_mse = mean_squared_error(y_test_reg, y_pred_test_reg)
train_rmse = np.sqrt(train_mse)
test_rmse = np.sqrt(test_mse)

print(f"\nRegression Results:")
print(f"Training RMSE: {train_rmse:.4f}")
print(f"Test RMSE: {test_rmse:.4f}")

fig, axes = plt.subplots(1, 2, figsize=(15, 5))

axes[0].plot(nn_reg.cost_history)
axes[0].set_title('Regression Training Loss Curve')
axes[0].set_xlabel('Epochs')
axes[0].set_ylabel('Loss (MSE)')
axes[0].grid(True)

sort_idx = np.argsort(X_test_reg.ravel())
X_test_sorted = X_test_reg[sort_idx]
y_test_sorted = y_test_reg[sort_idx]
y_pred_sorted = y_pred_test_reg[sort_idx]

axes[1].scatter(X_test_reg, y_test_reg, alpha=0.6, label='Actual', color='blue')
axes[1].plot(X_test_sorted, y_pred_sorted, color='red', linewidth=2, label='Predicted')
axes[1].set_title('Regression: Actual vs Predicted')
axes[1].set_xlabel('Feature')
axes[1].set_ylabel('Target')
axes[1].legend()
axes[1].grid(True)

plt.tight_layout()
plt.show()

## Part 4: ANN Implementation using TensorFlow/Keras

Let's implement the same problems using TensorFlow/Keras for comparison:

In [None]:
try:
    import tensorflow as tf
    from tensorflow import keras
    from tensorflow.keras import layers
    print(f"TensorFlow version: {tf.__version__}")
except ImportError:
    print("Installing TensorFlow...")
    import subprocess
    import sys
    subprocess.check_call([sys.executable, "-m", "pip", "install", "tensorflow"])
    import tensorflow as tf
    from tensorflow import keras
    from tensorflow.keras import layers
    print(f"TensorFlow installed successfully! Version: {tf.__version__}")

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
tf.get_logger().setLevel('ERROR')

### Binary Classification with Keras

In [None]:
print("Training Keras Neural Network for Binary Classification")
print("=" * 55)

model_binary = keras.Sequential([
    layers.Dense(8, activation='sigmoid', input_shape=(2,)),
    layers.Dense(4, activation='sigmoid'),
    layers.Dense(1, activation='sigmoid')
])

model_binary.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

model_binary.summary()

history_binary = model_binary.fit(
    X_train, y_train,
    epochs=100,
    batch_size=32,
    validation_split=0.2,
    verbose=0
)

train_loss, train_acc = model_binary.evaluate(X_train, y_train, verbose=0)
test_loss, test_acc = model_binary.evaluate(X_test, y_test, verbose=0)

print(f"\nKeras Binary Classification Results:")
print(f"Training Accuracy: {train_acc:.4f}")
print(f"Test Accuracy: {test_acc:.4f}")

fig, axes = plt.subplots(1, 2, figsize=(15, 5))

axes[0].plot(history_binary.history['loss'], label='Training Loss')
axes[0].plot(history_binary.history['val_loss'], label='Validation Loss')
axes[0].set_title('Keras Binary Classification - Loss')
axes[0].set_xlabel('Epochs')
axes[0].set_ylabel('Loss')
axes[0].legend()
axes[0].grid(True)

axes[1].plot(history_binary.history['accuracy'], label='Training Accuracy')
axes[1].plot(history_binary.history['val_accuracy'], label='Validation Accuracy')
axes[1].set_title('Keras Binary Classification - Accuracy')
axes[1].set_xlabel('Epochs')
axes[1].set_ylabel('Accuracy')
axes[1].legend()
axes[1].grid(True)

plt.tight_layout()
plt.show()

### Multi-class Classification with Keras

In [None]:
print("Training Keras Neural Network for Multi-class Classification")
print("=" * 60)

model_multi = keras.Sequential([
    layers.Dense(10, activation='relu', input_shape=(2,)),
    layers.Dense(6, activation='relu'),
    layers.Dense(3, activation='softmax')
])

model_multi.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

model_multi.summary()

history_multi = model_multi.fit(
    X_train_multi, y_train_multi,
    epochs=100,
    batch_size=32,
    validation_split=0.2,
    verbose=0
)

train_loss_multi, train_acc_multi = model_multi.evaluate(X_train_multi, y_train_multi, verbose=0)
test_loss_multi, test_acc_multi = model_multi.evaluate(X_test_multi, y_test_multi, verbose=0)

print(f"\nKeras Multi-class Classification Results:")
print(f"Training Accuracy: {train_acc_multi:.4f}")
print(f"Test Accuracy: {test_acc_multi:.4f}")

fig, axes = plt.subplots(1, 2, figsize=(15, 5))

axes[0].plot(history_multi.history['loss'], label='Training Loss')
axes[0].plot(history_multi.history['val_loss'], label='Validation Loss')
axes[0].set_title('Keras Multi-class Classification - Loss')
axes[0].set_xlabel('Epochs')
axes[0].set_ylabel('Loss')
axes[0].legend()
axes[0].grid(True)

axes[1].plot(history_multi.history['accuracy'], label='Training Accuracy')
axes[1].plot(history_multi.history['val_accuracy'], label='Validation Accuracy')
axes[1].set_title('Keras Multi-class Classification - Accuracy')
axes[1].set_xlabel('Epochs')
axes[1].set_ylabel('Accuracy')
axes[1].legend()
axes[1].grid(True)

plt.tight_layout()
plt.show()

## Part 5: Real-world Example - Iris Dataset

Let's apply our ANN implementations to the classic Iris dataset for flower classification:

In [None]:
print("Loading Iris Dataset")
print("=" * 25)

iris = load_iris()
X_iris = iris.data
y_iris = iris.target

iris_df = pd.DataFrame(X_iris, columns=iris.feature_names)
iris_df['species'] = [iris.target_names[i] for i in y_iris]

print("Dataset Shape:", X_iris.shape)
print("Number of classes:", len(iris.target_names))
print("Class names:", iris.target_names)
print("\nFirst 5 rows:")
print(iris_df.head())

print("\nDataset Statistics:")
print(iris_df.describe())

fig, axes = plt.subplots(2, 2, figsize=(15, 10))
features = iris.feature_names

for i, ax in enumerate(axes.flat):
    if i < len(features):
        for j, species in enumerate(iris.target_names):
            mask = y_iris == j
            ax.hist(X_iris[mask, i], alpha=0.7, label=species, bins=20)
        ax.set_title(features[i])
        ax.set_xlabel('Value')
        ax.set_ylabel('Frequency')
        ax.legend()
        ax.grid(True)

plt.tight_layout()
plt.show()

plt.figure(figsize=(12, 8))
colors = ['red', 'green', 'blue']
for i, species in enumerate(iris.target_names):
    mask = y_iris == i
    plt.scatter(X_iris[mask, 0], X_iris[mask, 1], 
               c=colors[i], label=species, alpha=0.7, s=50)

plt.xlabel(features[0])
plt.ylabel(features[1])
plt.title('Iris Dataset: Sepal Length vs Sepal Width')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
scaler_iris = StandardScaler()
X_iris_scaled = scaler_iris.fit_transform(X_iris)

y_iris_onehot = np.zeros((len(y_iris), 3))
for i, label in enumerate(y_iris):
    y_iris_onehot[i, label] = 1

X_train_iris, X_test_iris, y_train_iris, y_test_iris = train_test_split(
    X_iris_scaled, y_iris_onehot, test_size=0.3, random_state=42, stratify=y_iris
)

y_train_iris_labels = np.argmax(y_train_iris, axis=1)
y_test_iris_labels = np.argmax(y_test_iris, axis=1)

print("Iris Dataset Preparation:")
print(f"Training set: {X_train_iris.shape}")
print(f"Test set: {X_test_iris.shape}")
print(f"Features: {X_iris.shape[1]}")
print(f"Classes: {len(iris.target_names)}")

print("\nTraining Custom Neural Network on Iris Dataset")
print("=" * 50)

nn_iris = NeuralNetwork([4, 8, 6, 3], activation='sigmoid', learning_rate=0.1)
nn_iris.train(X_train_iris, y_train_iris, epochs=1000, verbose=False)

y_pred_iris = nn_iris.predict(X_test_iris)
y_pred_iris_classes = np.argmax(y_pred_iris, axis=1)

custom_accuracy = accuracy_score(y_test_iris_labels, y_pred_iris_classes)
print(f"Custom Neural Network Accuracy: {custom_accuracy:.4f}")

print("\nTraining Keras Neural Network on Iris Dataset")
print("=" * 48)

model_iris = keras.Sequential([
    layers.Dense(8, activation='relu', input_shape=(4,)),
    layers.Dense(6, activation='relu'),
    layers.Dense(3, activation='softmax')
])

model_iris.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

history_iris = model_iris.fit(
    X_train_iris, y_train_iris_labels,
    epochs=100,
    batch_size=16,
    validation_split=0.2,
    verbose=0
)

keras_loss, keras_accuracy = model_iris.evaluate(X_test_iris, y_test_iris_labels, verbose=0)
print(f"Keras Neural Network Accuracy: {keras_accuracy:.4f}")

print(f"\nComparison on Iris Dataset:")
print(f"Custom Implementation: {custom_accuracy:.4f}")
print(f"Keras Implementation:  {keras_accuracy:.4f}")

print(f"\nDetailed Classification Report (Custom NN):")
print(classification_report(y_test_iris_labels, y_pred_iris_classes, 
                          target_names=iris.target_names))

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

axes[0, 0].plot(nn_iris.cost_history)
axes[0, 0].set_title('Custom NN - Training Loss (Iris)')
axes[0, 0].set_xlabel('Epochs')
axes[0, 0].set_ylabel('Loss')
axes[0, 0].grid(True)

axes[0, 1].plot(history_iris.history['loss'], label='Training Loss')
axes[0, 1].plot(history_iris.history['val_loss'], label='Validation Loss')
axes[0, 1].set_title('Keras NN - Training Loss (Iris)')
axes[0, 1].set_xlabel('Epochs')
axes[0, 1].set_ylabel('Loss')
axes[0, 1].legend()
axes[0, 1].grid(True)

models = ['Custom NN', 'Keras NN']
accuracies = [custom_accuracy, keras_accuracy]
colors = ['skyblue', 'lightcoral']

axes[1, 0].bar(models, accuracies, color=colors)
axes[1, 0].set_title('Model Accuracy Comparison (Iris)')
axes[1, 0].set_ylabel('Accuracy')
axes[1, 0].set_ylim(0, 1)
for i, v in enumerate(accuracies):
    axes[1, 0].text(i, v + 0.01, f'{v:.3f}', ha='center', va='bottom')

from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test_iris_labels, y_pred_iris_classes)
im = axes[1, 1].imshow(cm, interpolation='nearest', cmap='Blues')
axes[1, 1].set_title('Confusion Matrix (Custom NN)')
axes[1, 1].set_xlabel('Predicted')
axes[1, 1].set_ylabel('Actual')

for i in range(cm.shape[0]):
    for j in range(cm.shape[1]):
        axes[1, 1].text(j, i, str(cm[i, j]), ha='center', va='center')

axes[1, 1].set_xticks(range(len(iris.target_names)))
axes[1, 1].set_yticks(range(len(iris.target_names)))
axes[1, 1].set_xticklabels(iris.target_names, rotation=45)
axes[1, 1].set_yticklabels(iris.target_names)

plt.tight_layout()
plt.show()