# Q3: Perceptron From Scratch

This notebook implements a single-neuron logistic model (perceptron) from scratch using pure NumPy to classify fruits (apples vs bananas).

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Set style for better plots
plt.style.use('seaborn-v0_8')
np.random.seed(42)

## 1. Load and Explore the Dataset

In [None]:
# Load the fruit dataset
df = pd.read_csv('fruit.csv')
print("Dataset Info:")
print(f"Shape: {df.shape}")
print(f"\nColumns: {list(df.columns)}")
print(f"\nFirst few rows:")
display(df.head(10))

print(f"\nLabel distribution:")
print(df['label'].value_counts())
print("0 = Apple, 1 = Banana")

In [None]:
# Visualize the dataset
fig, axes = plt.subplots(2, 2, figsize=(12, 10))

# Feature distributions
for i, feature in enumerate(['length_cm', 'weight_g', 'yellow_score']):
    ax = axes[i//2, i%2]
    for label in [0, 1]:
        data = df[df['label'] == label][feature]
        fruit_name = 'Apple' if label == 0 else 'Banana'
        ax.hist(data, alpha=0.7, label=fruit_name, bins=8)
    ax.set_xlabel(feature)
    ax.set_ylabel('Count')
    ax.set_title(f'Distribution of {feature}')
    ax.legend()

# Scatter plot
ax = axes[1, 1]
colors = ['red', 'yellow']
labels = ['Apple', 'Banana']
for label in [0, 1]:
    data = df[df['label'] == label]
    ax.scatter(data['length_cm'], data['weight_g'], 
              c=colors[label], label=labels[label], alpha=0.7, s=60)
ax.set_xlabel('Length (cm)')
ax.set_ylabel('Weight (g)')
ax.set_title('Length vs Weight')
ax.legend()

plt.tight_layout()
plt.show()

## 2. Perceptron Implementation

In [None]:
class Perceptron:
    def __init__(self, learning_rate=0.01, random_seed=42):
        """
        Initialize perceptron with random weights and bias
        """
        np.random.seed(random_seed)
        self.learning_rate = learning_rate
        self.weights = None
        self.bias = None
        self.cost_history = []
        self.accuracy_history = []
    
    def sigmoid(self, z):
        """Sigmoid activation function with clipping to prevent overflow"""
        z = np.clip(z, -500, 500)
        return 1 / (1 + np.exp(-z))
    
    def initialize_parameters(self, n_features):
        """Initialize weights and bias randomly"""
        self.weights = np.random.normal(0, 0.1, n_features)
        self.bias = np.random.normal(0, 0.1)
        print(f"Initialized weights: {self.weights}")
        print(f"Initialized bias: {self.bias}")
    
    def forward(self, X):
        """Forward pass: compute predictions"""
        z = np.dot(X, self.weights) + self.bias
        return self.sigmoid(z)
    
    def compute_cost(self, y_true, y_pred):
        """Compute binary cross-entropy loss"""
        y_pred = np.clip(y_pred, 1e-15, 1 - 1e-15)
        cost = -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))
        return cost
    
    def compute_accuracy(self, y_true, y_pred):
        """Compute accuracy"""
        predictions = (y_pred >= 0.5).astype(int)
        return np.mean(predictions == y_true)
    
    def fit(self, X, y, epochs=500, verbose=True):
        """Train the perceptron using batch gradient descent"""
        n_samples, n_features = X.shape
        self.initialize_parameters(n_features)
        
        # Store initial random predictions
        initial_predictions = self.forward(X)
        initial_cost = self.compute_cost(y, initial_predictions)
        initial_accuracy = self.compute_accuracy(y, initial_predictions)
        
        print(f"\nInitial random predictions:")
        print(f"Initial cost: {initial_cost:.4f}")
        print(f"Initial accuracy: {initial_accuracy:.4f}")
        
        # Training loop
        for epoch in range(epochs):
            # Forward pass
            y_pred = self.forward(X)
            
            # Compute cost and accuracy
            cost = self.compute_cost(y, y_pred)
            accuracy = self.compute_accuracy(y, y_pred)
            
            # Store metrics
            self.cost_history.append(cost)
            self.accuracy_history.append(accuracy)
            
            # Compute gradients
            dw = np.dot(X.T, (y_pred - y)) / n_samples
            db = np.mean(y_pred - y)
            
            # Update parameters
            self.weights -= self.learning_rate * dw
            self.bias -= self.learning_rate * db
            
            # Print progress
            if verbose and (epoch + 1) % 100 == 0:
                print(f"Epoch {epoch + 1}/{epochs} - Cost: {cost:.4f}, Accuracy: {accuracy:.4f}")
            
            # Early stopping
            if cost < 0.05:
                print(f"Early stopping at epoch {epoch + 1} - Target cost reached: {cost:.4f}")
                break
        
        # Final results
        final_predictions = self.forward(X)
        final_cost = self.compute_cost(y, final_predictions)
        final_accuracy = self.compute_accuracy(y, final_predictions)
        
        print(f"\nTraining completed!")
        print(f"Final cost: {final_cost:.4f}")
        print(f"Final accuracy: {final_accuracy:.4f}")
        print(f"Final weights: {self.weights}")
        print(f"Final bias: {self.bias}")
        
        return {
            'initial_cost': initial_cost,
            'initial_accuracy': initial_accuracy,
            'final_cost': final_cost,
            'final_accuracy': final_accuracy,
            'epochs_trained': len(self.cost_history)
        }
    
    def predict(self, X):
        """Make predictions on new data"""
        probabilities = self.forward(X)
        return (probabilities >= 0.5).astype(int)
    
    def predict_proba(self, X):
        """Return prediction probabilities"""
        return self.forward(X)

## 3. Data Preprocessing

In [None]:
# Prepare features and labels
X = df[['length_cm', 'weight_g', 'yellow_score']].values
y = df['label'].values

print("Original features:")
print(f"X shape: {X.shape}")
print(f"Feature means: {np.mean(X, axis=0)}")
print(f"Feature stds: {np.std(X, axis=0)}")

# Normalize features for better convergence
X_normalized = (X - np.mean(X, axis=0)) / np.std(X, axis=0)

print("\nNormalized features:")
print(f"Feature means: {np.mean(X_normalized, axis=0)}")
print(f"Feature stds: {np.std(X_normalized, axis=0)}")

print(f"\nLabels: {y}")
print(f"Label distribution: {np.bincount(y)}")

## 4. Train the Perceptron

In [None]:
# Create and train perceptron
perceptron = Perceptron(learning_rate=0.1, random_seed=42)
results = perceptron.fit(X_normalized, y, epochs=500, verbose=True)

## 5. Visualize Training Progress

In [None]:
# Plot training metrics
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))

# Plot cost
ax1.plot(perceptron.cost_history, 'b-', linewidth=2, label='Training Loss')
ax1.set_title('Training Loss Over Epochs', fontsize=14)
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Binary Cross-Entropy Loss')
ax1.grid(True, alpha=0.3)
ax1.legend()

# Plot accuracy
ax2.plot(perceptron.accuracy_history, 'r-', linewidth=2, label='Training Accuracy')
ax2.set_title('Training Accuracy Over Epochs', fontsize=14)
ax2.set_xlabel('Epoch')
ax2.set_ylabel('Accuracy')
ax2.set_ylim(0, 1)
ax2.grid(True, alpha=0.3)
ax2.legend()

plt.tight_layout()
plt.show()

print(f"Training completed in {len(perceptron.cost_history)} epochs")
print(f"Final loss: {perceptron.cost_history[-1]:.4f}")
print(f"Final accuracy: {perceptron.accuracy_history[-1]:.4f}")

## 6. Evaluate Predictions

In [None]:
# Make predictions
predictions = perceptron.predict(X_normalized)
probabilities = perceptron.predict_proba(X_normalized)

# Create results dataframe
results_df = df.copy()
results_df['probability'] = probabilities
results_df['prediction'] = predictions
results_df['correct'] = (predictions == y)
results_df['fruit_name'] = results_df['label'].map({0: 'Apple', 1: 'Banana'})
results_df['pred_name'] = results_df['prediction'].map({0: 'Apple', 1: 'Banana'})

print("Prediction Results:")
display(results_df[['fruit_name', 'pred_name', 'probability', 'correct']])

print(f"\nOverall Accuracy: {np.mean(predictions == y):.4f}")
print(f"Correct predictions: {np.sum(predictions == y)}/{len(y)}")

## 7. Learning Rate Impact Analysis

In [None]:
# Test different learning rates
learning_rates = [0.001, 0.01, 0.1, 1.0]
colors = ['blue', 'green', 'red', 'orange']
results_comparison = {}

fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 12))

for i, lr in enumerate(learning_rates):
    print(f"\nTesting learning rate: {lr}")
    perceptron_test = Perceptron(learning_rate=lr, random_seed=42)
    test_results = perceptron_test.fit(X_normalized, y, epochs=500, verbose=False)
    results_comparison[lr] = test_results
    
    # Plot loss
    ax1.plot(perceptron_test.cost_history, color=colors[i], label=f'LR={lr}', linewidth=2)
    
    # Plot accuracy
    ax2.plot(perceptron_test.accuracy_history, color=colors[i], label=f'LR={lr}', linewidth=2)
    
    print(f"Epochs trained: {test_results['epochs_trained']}")
    print(f"Final accuracy: {test_results['final_accuracy']:.4f}")

# Configure loss plot
ax1.set_title('Loss vs Epochs for Different Learning Rates', fontsize=14)
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Loss')
ax1.legend()
ax1.grid(True, alpha=0.3)

# Configure accuracy plot
ax2.set_title('Accuracy vs Epochs for Different Learning Rates', fontsize=14)
ax2.set_xlabel('Epoch')
ax2.set_ylabel('Accuracy')
ax2.legend()
ax2.grid(True, alpha=0.3)
ax2.set_ylim(0, 1)

# Convergence comparison
lrs = list(results_comparison.keys())
epochs_to_converge = [results_comparison[lr]['epochs_trained'] for lr in lrs]
final_accuracies = [results_comparison[lr]['final_accuracy'] for lr in lrs]

ax3.bar(range(len(lrs)), epochs_to_converge, color=colors)
ax3.set_title('Epochs to Convergence', fontsize=14)
ax3.set_xlabel('Learning Rate')
ax3.set_ylabel('Epochs')
ax3.set_xticks(range(len(lrs)))
ax3.set_xticklabels(lrs)
ax3.grid(True, alpha=0.3)

ax4.bar(range(len(lrs)), final_accuracies, color=colors)
ax4.set_title('Final Accuracy', fontsize=14)
ax4.set_xlabel('Learning Rate')
ax4.set_ylabel('Accuracy')
ax4.set_xticks(range(len(lrs)))
ax4.set_xticklabels(lrs)
ax4.set_ylim(0, 1)
ax4.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 8. Decision Boundary Visualization

In [None]:
# Visualize decision boundary (using first two features)
def plot_decision_boundary(X, y, perceptron, feature_names):
    # Create a mesh
    h = 0.02
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                         np.arange(y_min, y_max, h))
    
    # Make predictions on mesh (using average yellow_score)
    mesh_points = np.c_[xx.ravel(), yy.ravel(), 
                       np.full(xx.ravel().shape, X[:, 2].mean())]
    Z = perceptron.predict_proba(mesh_points)
    Z = Z.reshape(xx.shape)
    
    # Plot
    plt.figure(figsize=(10, 8))
    plt.contourf(xx, yy, Z, levels=50, alpha=0.8, cmap=plt.cm.RdYlBu)
    plt.colorbar(label='Prediction Probability')
    
    # Plot data points
    colors = ['red', 'yellow']
    labels = ['Apple', 'Banana']
    for i in [0, 1]:
        plt.scatter(X[y == i, 0], X[y == i, 1], 
                   c=colors[i], label=labels[i], 
                   edgecolors='black', s=100, alpha=0.9)
    
    plt.xlabel(feature_names[0])
    plt.ylabel(feature_names[1])
    plt.title('Decision Boundary (Yellow Score = Average)')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.show()

# Plot decision boundary
plot_decision_boundary(X_normalized, y, perceptron, 
                      ['Length (normalized)', 'Weight (normalized)'])

## 9. Reflection Analysis

In [None]:
print("=== REFLECTION ANALYSIS ===")
print("\n1. Initial Random Predictions vs Final Results:")
print(f"   - Initial accuracy: {results['initial_accuracy']:.4f} ({results['initial_accuracy']*100:.1f}%)")
print(f"   - Final accuracy: {results['final_accuracy']:.4f} ({results['final_accuracy']*100:.1f}%)")
print(f"   - Improvement: {(results['final_accuracy'] - results['initial_accuracy'])*100:.1f} percentage points")
print(f"   - Initial loss: {results['initial_cost']:.4f}")
print(f"   - Final loss: {results['final_cost']:.4f}")

print("\n2. Learning Rate Impact on Convergence:")
for lr in learning_rates:
    res = results_comparison[lr]
    print(f"   - LR {lr:5.3f}: {res['epochs_trained']:3d} epochs, "
          f"accuracy {res['final_accuracy']:.4f}")

print("\n3. DJ-Knob / Child-Learning Analogy:")
print("   The learning rate acts like a 'DJ knob' controlling how quickly")
print("   our perceptron learns from mistakes:")
print(f"   - Too low (0.001): Like a timid child, learns very slowly")
print(f"   - Just right (0.01-0.1): Like an attentive student, steady progress")
print(f"   - Too high (1.0): Like an impulsive child, may overshoot the target")

# Show the learning curves for different LRs side by side
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))

# Focus on first 100 epochs to see differences
for i, lr in enumerate([0.001, 0.01, 0.1, 1.0]):
    perceptron_viz = Perceptron(learning_rate=lr, random_seed=42)
    perceptron_viz.fit(X_normalized, y, epochs=100, verbose=False)
    
    ax1.plot(perceptron_viz.cost_history[:50], 
             color=colors[i], label=f'LR={lr}', linewidth=2, marker='o', markersize=3)
    ax2.plot(perceptron_viz.accuracy_history[:50], 
             color=colors[i], label=f'LR={lr}', linewidth=2, marker='s', markersize=3)

ax1.set_title('Learning Speed Comparison (First 50 Epochs)', fontsize=14)
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Loss')
ax1.legend()
ax1.grid(True, alpha=0.3)

ax2.set_title('Accuracy Improvement (First 50 Epochs)', fontsize=14)
ax2.set_xlabel('Epoch')
ax2.set_ylabel('Accuracy')
ax2.legend()
ax2.grid(True, alpha=0.3)
ax2.set_ylim(0, 1)

plt.tight_layout()
plt.show()

## Summary

This notebook demonstrates:
1. **Dataset Creation**: 16 fruit samples with length, weight, and yellowness features
2. **Pure NumPy Implementation**: Single-neuron logistic regression from scratch
3. **Batch Gradient Descent**: Training with 500+ epochs until loss < 0.05
4. **Visualization**: Training metrics and decision boundary plots
5. **Learning Rate Analysis**: Impact on convergence speed and stability
6. **Child Learning Analogy**: How learning rate affects the learning process

The perceptron successfully learned to distinguish apples from bananas using their physical characteristics!