In [6]:
import numpy as np

def load_data(csv_content):
    """Load and preprocess data from CSV content string"""
    # Parse CSV content into lines
    lines = csv_content.strip().split('\n')
    
    # Convert each line to numpy arrays
    data = []
    labels = []
    for line in lines:
        # Skip empty lines
        if not line.strip():
            continue
        values = list(map(float, line.split(',')))
        # Add bias term
        features = np.array([1.0] + values[:-1])  # Add 1.0 as first element for bias
        label = 1 if values[-1] == 1 else -1  # Convert 0 to -1 for perceptron
        data.append(features)
        labels.append(label)
    
    return np.array(data), np.array(labels)

def perceptron(X, y, T=10):
    """
    Standard Perceptron implementation
    X: features with bias term (n_samples, n_features + 1)
    y: labels (-1 or 1)
    T: maximum number of epochs
    """
    n_samples, n_features = X.shape
    w = np.zeros(n_features)  # Initialize weights to zero
    
    for epoch in range(T):
        mistakes = 0
        for i in range(n_samples):
            prediction = np.sign(np.dot(w, X[i]))
            if prediction * y[i] <= 0:  # Mistake was made
                w += y[i] * X[i]  # Update weights
                mistakes += 1
        
        print(f"Epoch {epoch + 1}, mistakes: {mistakes}")
        if mistakes == 0:  # If no mistakes, we've converged
            print(f"Converged at epoch {epoch + 1}")
            break
    
    return w

def evaluate(X, y, w):
    """Calculate prediction error"""
    predictions = np.sign(np.dot(X, w))
    errors = np.sum(predictions != y)
    return errors / len(y)


In [None]:
# Load training data
train_data, train_labels = load_data('train.csv')

# Train perceptron
print("Training Perceptron...")
w = perceptron(train_data, train_labels, T=10)

# Load and evaluate on test data
test_data, test_labels = load_data('test.csv')
test_error = evaluate(test_data, test_labels, w)

print("\nFinal weight vector:", w)
print(f"Test error rate: {test_error:.4f}")

# Print weights with feature names for interpretability
feature_names = ['bias', 'variance', 'skewness', 'curtosis', 'entropy']
print("\nWeights by feature:")
for name, weight in zip(feature_names, w):
    print(f"{name}: {weight:.4f}")


NameError: name 'train' is not defined