In [None]:
# Load and preprocess data
X_train, y_train = load_data('train_bank.csv')
X_test, y_test = load_data('test_bank.csv')

print(f"Training data shape: {X_train.shape}")
print(f"Test data shape: {X_test.shape}")

# Run AdaBoost
T = 500
stumps, stump_errors = adaboost(X_train, y_train, T)

# Calculate errors
train_errors = []
test_errors = []

for t in range(1, T + 1):
    train_preds = adaboost_predict(X_train, stumps[:t])
    test_preds = adaboost_predict(X_test, stumps[:t])
    
    train_error = np.mean(train_preds != y_train)
    test_error = np.mean(test_preds != y_test)
    
    train_errors.append(train_error)
    test_errors.append(test_error)
    
    if t % 50 == 0:
        print(f"Iteration {t}, Train Error: {train_error:.4f}, Test Error: {test_error:.4f}")

# Plot results
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(range(1, T + 1), train_errors, label='Training Error')
plt.plot(range(1, T + 1), test_errors, label='Test Error')
plt.xlabel('Number of Iterations (T)')
plt.ylabel('Error Rate')
plt.title('AdaBoost Performance')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(range(1, T + 1), stump_errors)
plt.xlabel('Iteration')
plt.ylabel('Stump Error')
plt.title('Decision Stump Errors')

plt.tight_layout()
plt.show()

Training data shape: (4999, 42)
Test data shape: (4999, 42)
Iteration 1/500, Train Error: 0.1088
Iteration 11/500, Train Error: 0.1050
Iteration 21/500, Train Error: 0.1004
Iteration 31/500, Train Error: 0.1020
Iteration 41/500, Train Error: 0.0982
Iteration 51/500, Train Error: 0.0994
Iteration 61/500, Train Error: 0.1008
Iteration 71/500, Train Error: 0.1004
Iteration 81/500, Train Error: 0.0994
Iteration 91/500, Train Error: 0.0992
Iteration 101/500, Train Error: 0.0990
Iteration 111/500, Train Error: 0.0984
Iteration 121/500, Train Error: 0.0990
Iteration 131/500, Train Error: 0.0968
Iteration 141/500, Train Error: 0.0974
Iteration 151/500, Train Error: 0.0964
Iteration 161/500, Train Error: 0.0976
Iteration 171/500, Train Error: 0.0964
Iteration 181/500, Train Error: 0.0956
Iteration 191/500, Train Error: 0.0952
Iteration 201/500, Train Error: 0.0956
Iteration 211/500, Train Error: 0.0946
Iteration 221/500, Train Error: 0.0938
Iteration 231/500, Train Error: 0.0948
Iteration 241/5