In [1]:
import numpy as np

def load_data(filename):
    # Load data directly as float
    data = np.genfromtxt(filename, delimiter=',')
    
    # Split into features and labels
    X = data[:, :-1]
    y = data[:, -1]
    
    # Add bias term
    X_with_bias = np.column_stack([np.ones(len(X)), X])
    
    # Convert 0/1 labels to -1/1
    y = 2 * y - 1
    
    return X_with_bias, y

def perceptron(X, y, T=10):
    """
    Standard Perceptron implementation
    X: features with bias term (n_samples, n_features + 1)
    y: labels (-1 or 1)
    T: maximum number of epochs
    """
    n_samples, n_features = X.shape
    w = np.zeros(n_features)  # Initialize weights to zero
    
    for epoch in range(T):
        mistakes = 0
        for i in range(n_samples):
            prediction = np.sign(np.dot(w, X[i]))
            if prediction * y[i] <= 0:  # Mistake was made
                w += y[i] * X[i]  # Update weights
                mistakes += 1
        
        print(f"Epoch {epoch + 1}, mistakes: {mistakes}")
        if mistakes == 0:  # If no mistakes, we've converged
            print(f"Converged at epoch {epoch + 1}")
            break
    
    return w

def evaluate(X, y, w):
    """Calculate prediction error"""
    predictions = np.sign(np.dot(X, w))
    errors = np.sum(predictions != y)
    return errors / len(y)


In [5]:
# Load training data
train_data, train_labels = load_data('train.csv')

# Train perceptron
print("Training Perceptron...")
w = perceptron(train_data, train_labels, T=10)

# Load and evaluate on test data
test_data, test_labels = load_data('test.csv')
test_error = evaluate(test_data, test_labels, w)

print("\nFinal weight vector:", w)
print(f"Test error rate: {test_error:.4f}")

# Print weights with feature names for interpretability
feature_names = ['bias', 'variance', 'skewness', 'curtosis', 'entropy']
print("\nWeights by feature:")
for name, weight in zip(feature_names, w):
    print(f"{name}: {weight:.4f}")


Training Perceptron...
Epoch 1, mistakes: 53
Epoch 2, mistakes: 30
Epoch 3, mistakes: 22
Epoch 4, mistakes: 25
Epoch 5, mistakes: 20
Epoch 6, mistakes: 21
Epoch 7, mistakes: 22
Epoch 8, mistakes: 25
Epoch 9, mistakes: 17
Epoch 10, mistakes: 24

Final weight vector: [ 53.       -61.086591 -42.70582  -40.30786   -3.146269]
Test error rate: 0.0200

Weights by feature:
bias: 53.0000
variance: -61.0866
skewness: -42.7058
curtosis: -40.3079
entropy: -3.1463
