In [1]:
import numpy as np

def load_data(filename):
    # Load data directly as float
    data = np.genfromtxt(filename, delimiter=',')
    
    # Split into features and labels
    X = data[:, :-1]
    y = data[:, -1]
    
    # Add bias term
    X_with_bias = np.column_stack([np.ones(len(X)), X])
    
    # Convert 0/1 labels to -1/1
    y = 2 * y - 1
    
    return X_with_bias, y

def voted_perceptron(X, y, T=10):
    """
    Voted Perceptron implementation
    X: features with bias term (n_samples, n_features + 1)
    y: labels (-1 or 1)
    T: maximum number of epochs
    
    Returns:
    w_list: list of weight vectors
    c_list: list of counts for each weight vector
    """
    n_samples, n_features = X.shape
    
    # Initialize lists to store weights and their counts
    w_list = []
    c_list = []
    
    # Initialize first weight vector and its count
    w = np.zeros(n_features)
    c = 0
    
    # Training loop
    for epoch in range(T):
        for i in range(n_samples):
            prediction = np.sign(np.dot(w, X[i]))
            
            if prediction * y[i] <= 0:  # Mistake was made
                # Store current weight vector and its count
                if c > 0:
                    w_list.append(w.copy())
                    c_list.append(c)
                
                # Update weights
                w = w + y[i] * X[i]
                c = 1
            else:
                c += 1  # Increment count for current weights
                
        print(f"Epoch {epoch + 1}, distinct weight vectors: {len(w_list)}")
    
    # Add the last weight vector and its count
    if c > 0:
        w_list.append(w)
        c_list.append(c)
    
    return np.array(w_list), np.array(c_list)

def predict_voted(X, w_list, c_list):
    """
    Make predictions using voted weights
    Returns both the final prediction and individual predictions
    """
    # Get predictions from each weight vector
    predictions = np.sign(np.dot(X, w_list.T))  # shape: (n_samples, n_weights)
    
    # Weight each prediction by its count
    weighted_predictions = predictions * c_list
    
    # Final prediction is the sign of the sum of weighted predictions
    final_predictions = np.sign(weighted_predictions.sum(axis=1))
    
    return final_predictions

def evaluate(y_true, y_pred):
    """Calculate prediction error"""
    return np.mean(y_true != y_pred)


In [6]:
# Load data
print("Loading data...")
train_data, train_labels = load_data('train.csv')
test_data, test_labels = load_data('test.csv')

# Train voted perceptron
print("\nTraining Voted Perceptron...")
w_list, c_list = voted_perceptron(train_data, train_labels, T=10)

# Evaluate on training set
train_pred = predict_voted(train_data, w_list, c_list)
train_error = evaluate(train_labels, train_pred)
train_correct = sum(train_pred == train_labels)

# Evaluate on test set
test_pred = predict_voted(test_data, w_list, c_list)
test_error = evaluate(test_labels, test_pred)

# Report results
print("\nResults:")
print(f"Number of distinct weight vectors: {len(w_list)}")
print(f"Number of correctly predicted training examples: {train_correct}")
print(f"Training error rate: {train_error:.4f}")
print(f"Test error rate: {test_error:.4f}")

print("\nWeight vectors and their counts:")
for i, (w, c) in enumerate(zip(w_list, c_list)):
    print(f"\nWeight vector {i+1}:")
    print(f"Count (survival time): {c}")
    print("Weights by feature:")
    feature_names = ['bias', 'variance', 'skewness', 'curtosis', 'entropy']
    for name, weight in zip(feature_names, w):
        print(f"{name}: {weight:.4f}")

Loading data...

Training Voted Perceptron...
Epoch 1, distinct weight vectors: 52
Epoch 2, distinct weight vectors: 82
Epoch 3, distinct weight vectors: 104
Epoch 4, distinct weight vectors: 129
Epoch 5, distinct weight vectors: 149
Epoch 6, distinct weight vectors: 170
Epoch 7, distinct weight vectors: 192
Epoch 8, distinct weight vectors: 217
Epoch 9, distinct weight vectors: 234
Epoch 10, distinct weight vectors: 258

Results:
Number of distinct weight vectors: 259
Number of correctly predicted training examples: 859
Training error rate: 0.0149
Test error rate: 0.0140

Weight vectors and their counts:

Weight vector 1:
Count (survival time): 2
Weights by feature:
bias: -1.0000
variance: -3.8481
skewness: -10.1539
curtosis: 3.8561
entropy: 4.2228

Weight vector 2:
Count (survival time): 1
Weights by feature:
bias: -2.0000
variance: -3.8001
skewness: -8.5502
curtosis: -4.6195
entropy: 3.4672

Weight vector 3:
Count (survival time): 8
Weights by feature:
bias: -1.0000
variance: -5.066