In [5]:
import numpy as np
import pandas as pd

# Load the data
train_data = pd.read_csv("datasets/bank-note/train.csv", header=None)
test_data = pd.read_csv("datasets/bank-note/test.csv", header=None)

# Extract features and labels
X_train = train_data.iloc[:, :-1].values
y_train = train_data.iloc[:, -1].values
X_test = test_data.iloc[:, :-1].values
y_test = test_data.iloc[:, -1].values

T = 10  # Maximum number of epochs

distinct_weight_vectors = []
weight_vector_counts = []

for epoch in range(1, T + 1):
    # Initialize the weight vector for this epoch
    weight_vector = np.zeros(X_train.shape[1])
    correctly_predicted_count = 0

    # Shuffle the training data
    shuffle_indices = np.random.permutation(len(X_train))
    X_train_shuffled = X_train[shuffle_indices]
    y_train_shuffled = y_train[shuffle_indices]

    for i in range(len(X_train_shuffled)):
        # Perceptron update
        prediction = np.sign(np.dot(weight_vector, X_train_shuffled[i]))
        if prediction * y_train_shuffled[i] <= 0:
            weight_vector += y_train_shuffled[i] * X_train_shuffled[i]
        else:
            correctly_predicted_count += 1

    # Store the distinct weight vectors and their counts
    if not any(np.array_equal(weight_vector, w) for w in distinct_weight_vectors):
        distinct_weight_vectors.append(weight_vector)
        weight_vector_counts.append(correctly_predicted_count)

# Testing
test_errors = 0
for i in range(len(X_test)):
    votes = [np.sign(np.dot(weight, X_test[i])) for weight in distinct_weight_vectors]
    predicted_label = max(set(votes), key=votes.count)
    if predicted_label != y_test[i]:
        test_errors += 1

# Calculate the average test error
average_test_error = test_errors / len(X_test) * 100

print("Distinct Weight Vectors and Their Counts:")
for weight, count in zip(distinct_weight_vectors, weight_vector_counts):
    print(f"Weight Vector: {weight}, Correctly Predicted: {count}")

print(f"Average Test Error: {average_test_error:.2f}%")


Distinct Weight Vectors and Their Counts:
Weight Vector: [-12.2665776  -7.8265     -6.865495   -6.1982902], Correctly Predicted: 328
Weight Vector: [ -9.7568017 -12.000472   -6.018925   -7.0182302], Correctly Predicted: 319
Weight Vector: [-12.9323656  -9.606395   -3.28569   -15.1659302], Correctly Predicted: 322
Weight Vector: [-13.4807966 -10.611955   -3.620055  -12.7522272], Correctly Predicted: 320
Weight Vector: [-13.0163407 -13.318698   -0.743488   -3.8173032], Correctly Predicted: 318
Weight Vector: [-10.0841294  -5.763855   -5.178865   -8.3548716], Correctly Predicted: 315
Weight Vector: [-13.4950024 -10.220625   -4.5937606 -11.0487506], Correctly Predicted: 322
Weight Vector: [-12.7151467 -15.127045    0.579005   -8.1113342], Correctly Predicted: 314
Weight Vector: [-14.5986646  -8.019115   -7.088168  -14.9544252], Correctly Predicted: 317
Weight Vector: [-16.0233367  -4.528274   -8.8793916 -23.7619252], Correctly Predicted: 312
Average Test Error: 62.00%
