In [1]:
from tools import *
from data import *
from ensemble import *
import json

In [2]:
def show_top_weights(final_results, n_top=5):
    # Sort by mean_accuracy (top 5 accuracies)
    top_5_accuracies = sorted(final_results, key=lambda x: x['mean_accuracy'], reverse=True)[:n_top]

    # Sort by mean_log_loss (bottom 5 log-losses)
    bottom_5_loglosses = sorted(final_results, key=lambda x: x['mean_log_loss'])[:n_top]

    print(f"Top {n_top} Accuracies:")
    for i, result in enumerate(top_5_accuracies, 1):
        print(f"{i}. Weights: {[round(float(w), 2) for w in result['weights']]} | "
              f"Mean Accuracy: {result['mean_accuracy']:.3f} | "
              f"Std Accuracy: {result['std_accuracy']:.3f} | "
              f"Mean Log Loss: {result['mean_log_loss']:.3f} | "
              f"Std Log Loss: {result['std_log_loss']:.3f}")

    print(f"\nBottom {n_top} Log Losses:")
    for i, result in enumerate(bottom_5_loglosses, 1):
        print(f"{i}. Weights: {[round(float(w), 2) for w in result['weights']]} | "
              f"Mean Accuracy: {result['mean_accuracy']:.3f} | "
              f"Std Accuracy: {result['std_accuracy']:.3f} | "
              f"Mean Log Loss: {result['mean_log_loss']:.3f} | "
              f"Std Log Loss: {result['std_log_loss']:.3f}")


In [3]:
with open('model_xgb_params.json', 'r') as file:
    xgb_params = json.load(file)

results, best_result = train_ensemble(X_train, y_train, mean_type='arithmetic', xgb_params=xgb_params)
show_top_weights(results, n_top=10)

[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
Top 10 Accuracies:
1. Weights: [0.45, 0.1, 0.45] | Mean Accuracy: 0.697 | Std Accuracy: 0.014 | Mean Log Loss: 0.694 | Std Log Loss: 0.012
2. Weights: [0.55, 0.15, 0.3] | Mean Accuracy: 0.697 | Std Accuracy: 0.016 | Mean Log Loss: 0.694 | Std Log Loss: 0.011
3. Weights: [0.5, 0.1, 0.4] | Mean Accuracy: 0.696 | Std Accuracy: 0.016 | Mean Log Loss: 0.694 | Std Log Loss: 0.012
4. Weights: [0.5, 0.15, 0.35] | Mean Accuracy: 0.696 | Std Accuracy: 0.016 | Mean Log Loss: 0.695 | Std Log Loss: 0.012
5. Weights: [0.55, 0.25, 0.2] | Mean Accuracy: 0.696 | Std Accuracy: 0.012 | Mean Log Loss: 0.697 | Std Log Loss: 0.011
6. Weights: [0.

In [4]:
results, best_result = train_ensemble(X_train, y_train, mean_type='geometric', xgb_params=xgb_params)
show_top_weights(results, n_top=10)

[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step


KeyboardInterrupt: 

In [None]:
# Extract weights and performance metrics
weights_xgb = [res['weights'][0] for res in results]
weights_gnb = [res['weights'][1] for res in results]
weights_nn = [res['weights'][2] for res in results]
log_losses = [res['log_loss'] for res in results]
accuracies = [res['accuracy'] for res in results]

# Plot Log Loss vs. Model Weights
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.scatter(weights_xgb, log_losses, label='XGB Weight', alpha=0.6, marker='o', color='blue')
plt.scatter(weights_gnb, log_losses, label='GNB Weight', alpha=0.6, marker='s', color='green')
plt.scatter(weights_nn, log_losses, label='NN Weight', alpha=0.6, marker='^', color='red')
plt.xlabel("Model Weights")
plt.ylabel("Log Loss")
plt.legend()
plt.title("Ensemble Weights vs Log Loss")

# Plot Accuracy vs. Model Weights
plt.subplot(1, 2, 2)
plt.scatter(weights_xgb, accuracies, label='XGB Weight', alpha=0.6, marker='o', color='blue')
plt.scatter(weights_gnb, accuracies, label='GNB Weight', alpha=0.6, marker='s', color='green')
plt.scatter(weights_nn, accuracies, label='NN Weight', alpha=0.6, marker='^', color='red')
plt.xlabel("Model Weights")
plt.ylabel("Accuracy")
plt.legend()
plt.title("Ensemble Weights vs Accuracy")

plt.tight_layout()
plt.show()


In [None]:
best_result

In [None]:
max(results, key=lambda x: x['accuracy'])

In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy as np

# Extract weights and performance metrics
weights_xgb = [res['weights'][0] for res in results]
weights_gnb = [res['weights'][1] for res in results]
log_losses = [res['log_loss'] for res in results]
accuracies = [res['accuracy'] for res in results]

# Create grid for plotting
weight_options = np.linspace(0, 1, 21)
X, Y = np.meshgrid(weight_options, weight_options)
Z_accuracy = np.full(X.shape, np.nan)
Z_log_loss = np.full(X.shape, np.nan)

# Fill the grid with accuracy and log loss values
for i, w_xgb in enumerate(weight_options):
    for j, w_gnb in enumerate(weight_options):
        if w_xgb + w_gnb <= 1:
            w_nn = 1 - (w_xgb + w_gnb)
            idx = next(k for k, res in enumerate(results) if np.isclose(res['weights'][0], w_xgb) and np.isclose(res['weights'][1], w_gnb))
            Z_accuracy[i, j] = accuracies[idx]
            Z_log_loss[i, j] = log_losses[idx]

# Plotting accuracy
fig = plt.figure(figsize=(14, 7))

# Plot Accuracy surface
ax1 = fig.add_subplot(121, projection='3d')
ax1.plot_surface(X, Y, Z_accuracy, cmap='viridis')
ax1.set_xlabel('XGBoost Weight')
ax1.set_ylabel('GNB Weight')
ax1.set_zlabel('Accuracy')
ax1.set_title('Accuracy vs Model Weights')

# Plot Log Loss surface
ax2 = fig.add_subplot(122, projection='3d')
ax2.plot_surface(X, Y, Z_log_loss, cmap='plasma')
ax2.set_xlabel('XGBoost Weight')
ax2.set_ylabel('GNB Weight')
ax2.set_zlabel('Log Loss')
ax2.set_title('Log Loss vs Model Weights')

plt.tight_layout()
plt.show()
