In [1]:
import numpy as np

def load_data(filename):
    data = np.genfromtxt(filename, delimiter=',', dtype=str)
    
    X = data[:, :-1]
    y = data[:, -1]
    
    X_encoded = np.zeros(X.shape, dtype=float)
    for i in range(X.shape[1]):
        try:
            X_encoded[:, i] = X[:, i].astype(float)
        except ValueError:
            unique_values = np.unique(X[:, i])
            X_encoded[:, i] = np.array([np.where(unique_values == val)[0][0] for val in X[:, i]])
    
    y = (y == 'yes').astype(int) * 2 - 1
    
    return X_encoded, y

def perceptron(X, y, T=10):
    """
    Standard Perceptron implementation
    X: features with bias term (n_samples, n_features + 1)
    y: labels (-1 or 1)
    T: maximum number of epochs
    """
    n_samples, n_features = X.shape
    w = np.zeros(n_features)  # Initialize weights to zero
    
    for epoch in range(T):
        mistakes = 0
        for i in range(n_samples):
            prediction = np.sign(np.dot(w, X[i]))
            if prediction * y[i] <= 0:  # Mistake was made
                w += y[i] * X[i]  # Update weights
                mistakes += 1
        
        print(f"Epoch {epoch + 1}, mistakes: {mistakes}")
        if mistakes == 0:  # If no mistakes, we've converged
            print(f"Converged at epoch {epoch + 1}")
            break
    
    return w

def evaluate(X, y, w):
    """Calculate prediction error"""
    predictions = np.sign(np.dot(X, w))
    errors = np.sum(predictions != y)
    return errors / len(y)


In [2]:
# Load training data
train_data, train_labels = load_data('train.csv')

# Train perceptron
print("Training Perceptron...")
w = perceptron(train_data, train_labels, T=10)

# Load and evaluate on test data
test_data, test_labels = load_data('test.csv')
test_error = evaluate(test_data, test_labels, w)

print("\nFinal weight vector:", w)
print(f"Test error rate: {test_error:.4f}")

# Print weights with feature names for interpretability
feature_names = ['bias', 'variance', 'skewness', 'curtosis', 'entropy']
print("\nWeights by feature:")
for name, weight in zip(feature_names, w):
    print(f"{name}: {weight:.4f}")


Training Perceptron...
Epoch 1, mistakes: 160
Epoch 2, mistakes: 148
Epoch 3, mistakes: 159
Epoch 4, mistakes: 143
Epoch 5, mistakes: 150
Epoch 6, mistakes: 148
Epoch 7, mistakes: 155
Epoch 8, mistakes: 141
Epoch 9, mistakes: 146
Epoch 10, mistakes: 155

Final weight vector: [-2.3029648 -4.746074  -6.529434  14.276519 ]
Test error rate: 0.1660

Weights by feature:
bias: -2.3030
variance: -4.7461
skewness: -6.5294
curtosis: 14.2765
