In [3]:
import numpy as np

def load_data(filename):
    # Load data directly as float
    data = np.genfromtxt(filename, delimiter=',')
    
    # Split into features and labels
    X = data[:, :-1]
    y = data[:, -1]
    
    # Add bias term
    X_with_bias = np.column_stack([np.ones(len(X)), X])
    
    # Convert 0/1 labels to -1/1
    y = 2 * y - 1
    
    return X_with_bias, y

def average_perceptron(X, y, T=10):
    """
    Average Perceptron implementation
    X: features with bias term (n_samples, n_features + 1)
    y: labels (-1 or 1)
    T: maximum number of epochs
    """
    n_samples, n_features = X.shape
    
    # Initialize weights and accumulated weights
    w = np.zeros(n_features)
    a = np.zeros(n_features)  # accumulated weights
    
    # Count total updates for averaging
    count = 1
    
    # Training loop
    for epoch in range(T):
        mistakes = 0
        for i in range(n_samples):
            prediction = np.sign(np.dot(w, X[i]))
            
            if prediction * y[i] <= 0:  # Mistake was made
                mistakes += 1
                # Update weights
                w = w + y[i] * X[i]
            
            # Accumulate weights
            a = a + w
            count += 1
                
        print(f"Epoch {epoch + 1}, mistakes: {mistakes}")
    
    # Calculate average weights
    a = a / count
    
    return a

def predict(X, w):
    """Make predictions using weight vector"""
    return np.sign(np.dot(X, w))

def evaluate(y_true, y_pred):
    """Calculate prediction error"""
    return np.mean(y_true != y_pred)


In [4]:
# Load data
print("Loading data...")
train_data, train_labels = load_data('train.csv')
test_data, test_labels = load_data('test.csv')

# Train average perceptron
print("\nTraining Average Perceptron...")
w_avg = average_perceptron(train_data, train_labels, T=10)

# Make predictions
train_pred = predict(train_data, w_avg)
test_pred = predict(test_data, w_avg)

# Calculate errors
train_error = evaluate(train_labels, train_pred)
test_error = evaluate(test_labels, test_pred)

# Report results
print("\nResults:")
print("Average weight vector:")
feature_names = ['bias', 'variance', 'skewness', 'curtosis', 'entropy']
for name, weight in zip(feature_names, w_avg):
    print(f"{name}: {weight:.4f}")

print(f"\nTraining error rate: {train_error:.4f}")
print(f"Test error rate: {test_error:.4f}")

Loading data...

Training Average Perceptron...
Epoch 1, mistakes: 53
Epoch 2, mistakes: 30
Epoch 3, mistakes: 22
Epoch 4, mistakes: 25
Epoch 5, mistakes: 20
Epoch 6, mistakes: 21
Epoch 7, mistakes: 22
Epoch 8, mistakes: 25
Epoch 9, mistakes: 17
Epoch 10, mistakes: 24

Results:
Average weight vector:
bias: 36.3779
variance: -46.5954
skewness: -29.0640
curtosis: -30.1413
entropy: -8.9411

Training error rate: 0.0161
Test error rate: 0.0140
