# Perceptron From Scratch

In this notebook, we'll implement a single-neuron logistic model (perceptron) using pure NumPy and train it on our fruit dataset to classify apples (0) and bananas (1).

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Set random seed for reproducibility
np.random.seed(42)

## Load and Prepare Data

In [None]:
# Load the fruit dataset
data = pd.read_csv('fruit.csv')

# Display the first few rows
print(data.head())
print(f'
Total samples: {len(data)}')

In [None]:
# Prepare features (X) and target (y)
X = data[['length_cm', 'weight_g', 'yellow_score']].values
y = data['label'].values.reshape(-1, 1)

# Normalize features for better convergence
X_mean = X.mean(axis=0)
X_std = X.std(axis=0)
X_normalized = (X - X_mean) / X_std

# Add bias term (intercept)
X_normalized = np.hstack((np.ones((X.shape[0], 1)), X_normalized))

print(f'Features shape: {X_normalized.shape}')
print(f'Target shape: {y.shape}')

## Implement Logistic Regression Model

In [None]:
class LogisticPerceptron:
    def __init__(self, input_size):
        # Initialize weights randomly
        self.weights = np.random.randn(input_size, 1) * 0.01
        
    def sigmoid(self, z):
        # Sigmoid activation function
        return 1 / (1 + np.exp(-z))
    
    def forward(self, X):
        # Forward pass
        z = np.dot(X, self.weights)
        return self.sigmoid(z)
    
    def compute_loss(self, y_true, y_pred):
        # Binary cross-entropy loss
        m = y_true.shape[0]
        loss = -np.sum(y_true * np.log(y_pred + 1e-10) + (1 - y_true) * np.log(1 - y_pred + 1e-10)) / m
        return loss
    
    def compute_accuracy(self, y_true, y_pred):
        # Calculate accuracy
        predictions = (y_pred > 0.5).astype(int)
        return np.mean(predictions == y_true)
    
    def compute_gradients(self, X, y_true, y_pred):
        # Compute gradients for batch gradient descent
        m = y_true.shape[0]
        dw = np.dot(X.T, (y_pred - y_true)) / m
        return dw
    
    def update_weights(self, gradients, learning_rate):
        # Update weights using gradient descent
        self.weights -= learning_rate * gradients
        
    def train(self, X, y, learning_rate=0.1, epochs=1000, early_stopping_loss=0.05):
        # Training loop
        losses = []
        accuracies = []
        
        for epoch in range(epochs):
            # Forward pass
            y_pred = self.forward(X)
            
            # Compute loss and accuracy
            loss = self.compute_loss(y, y_pred)
            accuracy = self.compute_accuracy(y, y_pred)
            
            # Store metrics
            losses.append(loss)
            accuracies.append(accuracy)
            
            # Compute gradients
            gradients = self.compute_gradients(X, y, y_pred)
            
            # Update weights
            self.update_weights(gradients, learning_rate)
            
            # Print progress every 100 epochs
            if epoch % 100 == 0:
                print(f'Epoch {epoch}/{epochs}, Loss: {loss:.4f}, Accuracy: {accuracy:.4f}')
            
            # Early stopping if loss is below threshold
            if loss < early_stopping_loss:
                print(f'
Early stopping at epoch {epoch}. Loss: {loss:.4f} < {early_stopping_loss}')
                break
                
        return losses, accuracies

## Train the Model

In [None]:
# Create and train the model
input_size = X_normalized.shape[1]  # Number of features including bias
model = LogisticPerceptron(input_size)

# Store initial weights for comparison
initial_weights = model.weights.copy()

# Make initial predictions
initial_predictions = model.forward(X_normalized)
initial_loss = model.compute_loss(y, initial_predictions)
initial_accuracy = model.compute_accuracy(y, initial_predictions)

print(f'Initial random model - Loss: {initial_loss:.4f}, Accuracy: {initial_accuracy:.4f}')

# Train the model
losses, accuracies = model.train(X_normalized, y, learning_rate=0.1, epochs=1000, early_stopping_loss=0.05)

# Final predictions
final_predictions = model.forward(X_normalized)
final_loss = model.compute_loss(y, final_predictions)
final_accuracy = model.compute_accuracy(y, final_predictions)

print(f'
Final model - Loss: {final_loss:.4f}, Accuracy: {final_accuracy:.4f}')

## Visualize Training Progress

In [None]:
# Plot loss and accuracy over epochs
plt.figure(figsize=(12, 5))

# Plot loss
plt.subplot(1, 2, 1)
plt.plot(losses)
plt.title('Loss per Epoch')
plt.xlabel('Epoch')
plt.ylabel('Binary Cross-Entropy Loss')
plt.grid(True)

# Plot accuracy
plt.subplot(1, 2, 2)
plt.plot(accuracies)
plt.title('Accuracy per Epoch')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.grid(True)

plt.tight_layout()
plt.show()

## Experiment with Different Learning Rates

In [None]:
# Function to train model with different learning rates
def train_with_lr(X, y, learning_rate, epochs=500):
    model = LogisticPerceptron(X.shape[1])
    losses, _ = model.train(X, y, learning_rate=learning_rate, epochs=epochs, early_stopping_loss=0.01)
    return losses

# Try different learning rates
learning_rates = [0.001, 0.01, 0.1, 0.5]
all_losses = []

for lr in learning_rates:
    losses = train_with_lr(X_normalized, y, lr)
    all_losses.append(losses)
    
# Plot comparison
plt.figure(figsize=(10, 6))
for i, lr in enumerate(learning_rates):
    plt.plot(all_losses[i], label=f'LR = {lr}')
    
plt.title('Impact of Learning Rate on Convergence')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.show()

## Visualize Decision Boundary

In [None]:
# Let's visualize the decision boundary using two features
# We'll use length_cm and yellow_score as they're likely the most discriminative

# Extract the two features
feature1_idx = 1  # length_cm (after normalization)
feature2_idx = 3  # yellow_score (after normalization)

# Create a mesh grid
x_min, x_max = X_normalized[:, feature1_idx].min() - 0.5, X_normalized[:, feature1_idx].max() + 0.5
y_min, y_max = X_normalized[:, feature2_idx].min() - 0.5, X_normalized[:, feature2_idx].max() + 0.5
xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100), np.linspace(y_min, y_max, 100))

# Create grid points
grid = np.zeros((xx.ravel().shape[0], X_normalized.shape[1]))
grid[:, 0] = 1  # Bias term
grid[:, feature1_idx] = xx.ravel()
grid[:, feature2_idx] = yy.ravel()

# Predict on grid points
Z = model.forward(grid).reshape(xx.shape)

# Plot decision boundary
plt.figure(figsize=(10, 8))
plt.contourf(xx, yy, Z, alpha=0.3, cmap=plt.cm.coolwarm)
plt.colorbar(label='Probability of being a banana')

# Plot training points
scatter = plt.scatter(X_normalized[:, feature1_idx], X_normalized[:, feature2_idx], c=y.ravel(), 
                     cmap=plt.cm.coolwarm, edgecolors='k')

plt.title('Decision Boundary')
plt.xlabel('Normalized Length (cm)')
plt.ylabel('Normalized Yellow Score')
plt.legend(*scatter.legend_elements(), title='Fruit')
plt.grid(True)
plt.show()