In [1]:
import torch
import pandas as pd
import numpy as np
import glob
import os
from collections import Counter
from ogb.nodeproppred import PygNodePropPredDataset

# Fix for PyTorch 2.6+ compatibility with OGB dataset loading
# Patch torch.load to use weights_only=False for compatibility with torch_geometric
_original_torch_load = torch.load
def _patched_torch_load(*args, **kwargs):
    # Set weights_only=False if not explicitly provided (for PyTorch 2.6+)
    if 'weights_only' not in kwargs:
        kwargs['weights_only'] = False
    return _original_torch_load(*args, **kwargs)
torch.load = _patched_torch_load

# Configuration
dataset_name = 'products'
predictions_dir = 'predictions'
models_dir = 'models'
labels_csv = 'gcn_predictions_2.csv'
gcn_pt_file = 'gcn_outputs_2.pt'
cs_dir = 'ogbn-products_final'


  from pkg_resources import parse_version


In [2]:
# Load dataset and get split indices
print("Loading dataset and split indices...")
dataset = PygNodePropPredDataset(name=f'ogbn-{dataset_name}')
data = dataset[0]
split_idx = dataset.get_idx_split()
test_idx = split_idx['test'].numpy()
print(f"Test set size: {len(test_idx)} nodes")

# Load true labels
print("\nLoading true labels...")
labels_df = pd.read_csv(labels_csv)
labels_df = labels_df.sort_values('node_id').reset_index(drop=True)
true_labels = labels_df['true_label'].values
num_nodes = len(true_labels)
print(f"Loaded {num_nodes} total nodes")

# Get test set labels
test_labels = true_labels[test_idx]


Loading dataset and split indices...
Test set size: 2213091 nodes

Loading true labels...
Loaded 2449029 total nodes


In [3]:
# Define the 5 best models (from model_aggregation analysis)
best_models = {
    'plain_run1': 'plain',
    'linear_run3': 'linear',
    'mlp_run2': 'mlp',
    'gamlp_cs_09883da0': 'gamlp_cs',
    'gamlp_09883da0': 'gamlp'
}

print("Loading predictions for the 5 best models...")
print("Best models:", list(best_models.keys()))

# Dictionary to store predictions (top 3) and probabilities for each model
model_predictions = {}  # {model_name: DataFrame with prediction_1, prediction_2, prediction_3}
model_probabilities = {}  # {model_name: tensor of shape [num_nodes, num_classes]}

# Load predictions for each model
for model_name in best_models.keys():
    print(f"\nLoading {model_name}...")
    
    if model_name.startswith('plain_run'):
        run_num = int(model_name.replace('plain_run', ''))
        pred_file = f'{predictions_dir}/{dataset_name}_plain/run{run_num}_predictions.csv'
        df = pd.read_csv(pred_file)
        df = df.sort_values('node_id').reset_index(drop=True)
        model_predictions[model_name] = df
        # Load probabilities from .pt file
        pt_file = f'{predictions_dir}/{dataset_name}_plain/run{run_num}.pt'
        if os.path.exists(pt_file):
            probs = torch.load(pt_file, map_location='cpu')
            model_probabilities[model_name] = probs
        
    elif model_name.startswith('linear_run'):
        run_num = int(model_name.replace('linear_run', ''))
        pred_file = f'{predictions_dir}/{dataset_name}_linear/run{run_num}_predictions.csv'
        df = pd.read_csv(pred_file)
        df = df.sort_values('node_id').reset_index(drop=True)
        model_predictions[model_name] = df
        # Load probabilities from .pt file
        pt_file = f'{predictions_dir}/{dataset_name}_linear/run{run_num}.pt'
        if os.path.exists(pt_file):
            probs = torch.load(pt_file, map_location='cpu')
            model_probabilities[model_name] = probs
            
    elif model_name.startswith('mlp_run'):
        run_num = int(model_name.replace('mlp_run', ''))
        pred_file = f'{predictions_dir}/{dataset_name}_mlp/run{run_num}_predictions.csv'
        df = pd.read_csv(pred_file)
        df = df.sort_values('node_id').reset_index(drop=True)
        model_predictions[model_name] = df
        # Load probabilities from .pt file
        pt_file = f'{predictions_dir}/{dataset_name}_mlp/run{run_num}.pt'
        if os.path.exists(pt_file):
            probs = torch.load(pt_file, map_location='cpu')
            model_probabilities[model_name] = probs
            
    elif model_name.startswith('gcn_run'):
        # GCN from fallback file
        if os.path.exists(gcn_pt_file):
            probs = torch.load(gcn_pt_file, map_location='cpu')
            model_probabilities[model_name] = probs
            # Create DataFrame from probabilities
            top3_values, top3_indices = torch.topk(probs, k=3, dim=1)
            df = pd.DataFrame({
                'node_id': np.arange(len(probs)),
                'prediction_1': top3_indices[:, 0].numpy(),
                'prediction_2': top3_indices[:, 1].numpy(),
                'prediction_3': top3_indices[:, 2].numpy(),
                'truth': true_labels
            })
            model_predictions[model_name] = df
            
    elif model_name.startswith('gamlp_cs_'):
        # Extract hash from model name
        hash_part = model_name.replace('gamlp_cs_', '')
        # Find matching file
        cs_files = glob.glob(f'{cs_dir}/*_cs.pt')
        for cs_file in cs_files:
            if hash_part in os.path.basename(cs_file):
                probs = torch.load(cs_file, map_location='cpu')
                model_probabilities[model_name] = probs
                # Create DataFrame from probabilities
                top3_values, top3_indices = torch.topk(probs, k=3, dim=1)
                df = pd.DataFrame({
                    'node_id': np.arange(len(probs)),
                    'prediction_1': top3_indices[:, 0].numpy(),
                    'prediction_2': top3_indices[:, 1].numpy(),
                    'prediction_3': top3_indices[:, 2].numpy(),
                    'truth': true_labels
                })
                model_predictions[model_name] = df
                break
                
    elif model_name.startswith('gamlp_'):
        # Extract hash from model name (without cs)
        hash_part = model_name.replace('gamlp_', '')
        # Find matching file (non-CS)
        gamlp_files = glob.glob(f'{cs_dir}/*.pt')
        gamlp_files = [f for f in gamlp_files if '_cs.pt' not in f]
        for gamlp_file in gamlp_files:
            if hash_part in os.path.basename(gamlp_file):
                probs = torch.load(gamlp_file, map_location='cpu')
                model_probabilities[model_name] = probs
                # Create DataFrame from probabilities
                top3_values, top3_indices = torch.topk(probs, k=3, dim=1)
                df = pd.DataFrame({
                    'node_id': np.arange(len(probs)),
                    'prediction_1': top3_indices[:, 0].numpy(),
                    'prediction_2': top3_indices[:, 1].numpy(),
                    'prediction_3': top3_indices[:, 2].numpy(),
                    'truth': true_labels
                })
                model_predictions[model_name] = df
                break

print(f"\nLoaded {len(model_predictions)} models with predictions")
print(f"Loaded {len(model_probabilities)} models with probabilities")


Loading predictions for the 5 best models...
Best models: ['plain_run1', 'linear_run3', 'mlp_run2', 'gamlp_cs_09883da0', 'gamlp_09883da0']

Loading plain_run1...

Loading linear_run3...

Loading mlp_run2...

Loading gamlp_cs_09883da0...

Loading gamlp_09883da0...

Loaded 5 models with predictions
Loaded 5 models with probabilities


## Voting Method 1: Basic Voting (1 vote per model for top prediction)


In [4]:
print("=" * 60)
print("VOTING METHOD 1: BASIC VOTING")
print("=" * 60)
print("Each model gets 1 vote for their top prediction")
print()

# Get predictions for test nodes only
ensemble_predictions = []

for node_idx in test_idx:
    # Collect votes from each model
    votes = []
    for model_name in best_models.keys():
        df = model_predictions[model_name]
        top_pred = df.loc[node_idx, 'prediction_1']
        votes.append(int(top_pred))
    
    # Count votes and select most common (with tie-breaking)
    vote_counts = Counter(votes)
    # Get the class with most votes, break ties by selecting the first one
    most_common = vote_counts.most_common(1)[0][0]
    ensemble_predictions.append(most_common)

ensemble_predictions = np.array(ensemble_predictions)

# Calculate accuracy
correct = (ensemble_predictions == test_labels).sum()
accuracy = correct / len(test_idx)

print(f"Ensemble accuracy (Basic Voting): {accuracy:.4f} ({accuracy*100:.2f}%)")
print(f"Correct predictions: {correct} / {len(test_idx)}")


VOTING METHOD 1: BASIC VOTING
Each model gets 1 vote for their top prediction

Ensemble accuracy (Basic Voting): 0.8457 (84.57%)
Correct predictions: 1871600 / 2213091


## Voting Method 2: Ranked Choice Voting (3 points for 1st, 2 for 2nd, 1 for 3rd)


In [5]:
print("=" * 60)
print("VOTING METHOD 2: RANKED CHOICE VOTING")
print("=" * 60)
print("Each model gets 3 votes: 3 points for 1st choice, 2 for 2nd, 1 for 3rd")
print()

# Get number of classes (from first model's probabilities)
num_classes = model_probabilities[list(best_models.keys())[0]].shape[1]

ensemble_predictions_ranked = []

for node_idx in test_idx:
    # Initialize score array for all classes
    class_scores = np.zeros(num_classes)
    
    # Collect votes from each model
    for model_name in best_models.keys():
        df = model_predictions[model_name]
        pred1 = int(df.loc[node_idx, 'prediction_1'])
        pred2 = int(df.loc[node_idx, 'prediction_2'])
        pred3 = int(df.loc[node_idx, 'prediction_3'])
        
        # Add points: 3 for 1st, 2 for 2nd, 1 for 3rd
        class_scores[pred1] += 3
        class_scores[pred2] += 2
        class_scores[pred3] += 1
    
    # Select class with highest score
    predicted_class = np.argmax(class_scores)
    ensemble_predictions_ranked.append(predicted_class)

ensemble_predictions_ranked = np.array(ensemble_predictions_ranked)

# Calculate accuracy
correct = (ensemble_predictions_ranked == test_labels).sum()
accuracy = correct / len(test_idx)

print(f"Ensemble accuracy (Ranked Choice Voting): {accuracy:.4f} ({accuracy*100:.2f}%)")
print(f"Correct predictions: {correct} / {len(test_idx)}")


VOTING METHOD 2: RANKED CHOICE VOTING
Each model gets 3 votes: 3 points for 1st choice, 2 for 2nd, 1 for 3rd

Ensemble accuracy (Ranked Choice Voting): 0.8485 (84.85%)
Correct predictions: 1877803 / 2213091


## Voting Method 3: Probability Sum Voting (sum all probabilities, highest wins)


In [6]:
print("=" * 60)
print("VOTING METHOD 3: PROBABILITY SUM VOTING")
print("=" * 60)
print("Sum all output probabilities across models, highest sum wins")
print()

# Verify all models have probabilities
missing_probs = [name for name in best_models.keys() if name not in model_probabilities]
if missing_probs:
    print(f"Warning: Missing probabilities for: {missing_probs}")
    print("This method requires probability files (.pt) for all models.")
else:
    ensemble_predictions_prob = []
    
    for node_idx in test_idx:
        # Sum probabilities across all models
        summed_probs = None
        
        for model_name in best_models.keys():
            probs = model_probabilities[model_name][node_idx, :]  # Shape: [num_classes]
            
            if summed_probs is None:
                summed_probs = probs.clone() if isinstance(probs, torch.Tensor) else probs.copy()
            else:
                summed_probs += probs
        
        # Convert to numpy if needed
        if isinstance(summed_probs, torch.Tensor):
            summed_probs = summed_probs.numpy()
        
        # Select class with highest summed probability
        predicted_class = np.argmax(summed_probs)
        ensemble_predictions_prob.append(predicted_class)

    ensemble_predictions_prob = np.array(ensemble_predictions_prob)

    # Calculate accuracy
    correct = (ensemble_predictions_prob == test_labels).sum()
    accuracy = correct / len(test_idx)

    print(f"Ensemble accuracy (Probability Sum Voting): {accuracy:.4f} ({accuracy*100:.2f}%)")
    print(f"Correct predictions: {correct} / {len(test_idx)}")


VOTING METHOD 3: PROBABILITY SUM VOTING
Sum all output probabilities across models, highest sum wins

Ensemble accuracy (Probability Sum Voting): 0.8518 (85.18%)
Correct predictions: 1885028 / 2213091


## Voting Method 4: Proportional Top-5 Voting (5 points split proportionally among top 5)


In [7]:
print("=" * 60)
print("VOTING METHOD 4: PROPORTIONAL TOP-5 VOTING")
print("=" * 60)
print("Each model gets 5 points, split proportionally among top 5 probabilities")
print()

# Verify all models have probabilities
missing_probs = [name for name in best_models.keys() if name not in model_probabilities]
if missing_probs:
    print(f"Warning: Missing probabilities for: {missing_probs}")
    print("This method requires probability files (.pt) for all models.")
else:
    ensemble_predictions_prop5 = []
    
    for node_idx in test_idx:
        # Initialize score array for all classes
        class_scores = np.zeros(num_classes)
        
        # Collect votes from each model
        for model_name in best_models.keys():
            probs = model_probabilities[model_name][node_idx, :]  # Shape: [num_classes]
            
            # Convert to numpy if needed
            if isinstance(probs, torch.Tensor):
                probs = probs.numpy()
            
            # Get top 5 probabilities and their indices
            top5_indices = np.argsort(probs)[-5:][::-1]  # Indices of top 5 (descending)
            top5_probs = probs[top5_indices]  # Top 5 probabilities
            
            # Sum of top 5 probabilities
            top5_sum = np.sum(top5_probs)
            
            # Allocate 5 points proportionally
            if top5_sum > 0:
                # Normalize by sum and multiply by 5
                point_allocations = (top5_probs / top5_sum) * 5.0
                
                # Add points to class scores
                for idx, points in zip(top5_indices, point_allocations):
                    class_scores[idx] += points
        
        # Select class with highest score
        predicted_class = np.argmax(class_scores)
        ensemble_predictions_prop5.append(predicted_class)

    ensemble_predictions_prop5 = np.array(ensemble_predictions_prop5)

    # Calculate accuracy
    correct = (ensemble_predictions_prop5 == test_labels).sum()
    accuracy = correct / len(test_idx)

    print(f"Ensemble accuracy (Proportional Top-5 Voting): {accuracy:.4f} ({accuracy*100:.2f}%)")
    print(f"Correct predictions: {correct} / {len(test_idx)}")


VOTING METHOD 4: PROPORTIONAL TOP-5 VOTING
Each model gets 5 points, split proportionally among top 5 probabilities

Ensemble accuracy (Proportional Top-5 Voting): 0.8524 (85.24%)
Correct predictions: 1886520 / 2213091


## Voting Method 5: Performance-Weighted Probability Sum (weight by individual model accuracy)


In [8]:
print("=" * 60)
print("VOTING METHOD 5: PERFORMANCE-WEIGHTED PROBABILITY SUM")
print("=" * 60)
print("Sum probabilities weighted by each model's individual accuracy")
print()

# Calculate individual model accuracies for weighting
model_weights = {}
for model_name in best_models.keys():
    df = model_predictions[model_name]
    test_preds = df.loc[test_idx, 'prediction_1'].values
    correct = (test_preds == test_labels).sum()
    acc = correct / len(test_idx)
    model_weights[model_name] = acc

# Normalize weights to sum to number of models (so average weight = 1)
total_weight = sum(model_weights.values())
num_models = len(model_weights)
for model_name in model_weights:
    model_weights[model_name] = (model_weights[model_name] / total_weight) * num_models

print("Model weights (normalized):")
for model_name, weight in model_weights.items():
    print(f"  {model_name:25s}: {weight:.4f}")

# Verify all models have probabilities
missing_probs = [name for name in best_models.keys() if name not in model_probabilities]
if missing_probs:
    print(f"\nWarning: Missing probabilities for: {missing_probs}")
    print("This method requires probability files (.pt) for all models.")
else:
    ensemble_predictions_weighted = []
    
    for node_idx in test_idx:
        # Sum weighted probabilities across all models
        summed_probs = None
        
        for model_name in best_models.keys():
            probs = model_probabilities[model_name][node_idx, :]  # Shape: [num_classes]
            weight = model_weights[model_name]
            
            if summed_probs is None:
                summed_probs = (probs * weight).clone() if isinstance(probs, torch.Tensor) else (probs * weight).copy()
            else:
                summed_probs += probs * weight
        
        # Convert to numpy if needed
        if isinstance(summed_probs, torch.Tensor):
            summed_probs = summed_probs.numpy()
        
        # Select class with highest summed probability
        predicted_class = np.argmax(summed_probs)
        ensemble_predictions_weighted.append(predicted_class)

    ensemble_predictions_weighted = np.array(ensemble_predictions_weighted)

    # Calculate accuracy
    correct = (ensemble_predictions_weighted == test_labels).sum()
    accuracy = correct / len(test_idx)

    print(f"\nEnsemble accuracy (Performance-Weighted): {accuracy:.4f} ({accuracy*100:.2f}%)")
    print(f"Correct predictions: {correct} / {len(test_idx)}")


VOTING METHOD 5: PERFORMANCE-WEIGHTED PROBABILITY SUM
Sum probabilities weighted by each model's individual accuracy

Model weights (normalized):
  plain_run1               : 0.9826
  linear_run3              : 0.9878
  mlp_run2                 : 1.0026
  gamlp_cs_09883da0        : 1.0142
  gamlp_09883da0           : 1.0128

Ensemble accuracy (Performance-Weighted): 0.8518 (85.18%)
Correct predictions: 1885022 / 2213091


## Voting Method 6: Confidence-Weighted Voting (weight by model's max probability/confidence)


In [9]:
print("=" * 60)
print("VOTING METHOD 6: CONFIDENCE-WEIGHTED VOTING")
print("=" * 60)
print("Weight each model's probabilities by its confidence (max probability) for that node")
print()

# Verify all models have probabilities
missing_probs = [name for name in best_models.keys() if name not in model_probabilities]
if missing_probs:
    print(f"Warning: Missing probabilities for: {missing_probs}")
    print("This method requires probability files (.pt) for all models.")
else:
    ensemble_predictions_conf = []
    
    for node_idx in test_idx:
        # Sum confidence-weighted probabilities across all models
        summed_probs = None
        total_confidence = 0.0
        
        for model_name in best_models.keys():
            probs = model_probabilities[model_name][node_idx, :]  # Shape: [num_classes]
            
            # Convert to numpy if needed
            if isinstance(probs, torch.Tensor):
                probs_np = probs.numpy()
            else:
                probs_np = probs
            
            # Confidence = max probability for this node
            confidence = np.max(probs_np)
            total_confidence += confidence
            
            # Weight probabilities by confidence
            weighted_probs = probs * confidence
            
            if summed_probs is None:
                summed_probs = weighted_probs.clone() if isinstance(weighted_probs, torch.Tensor) else weighted_probs.copy()
            else:
                summed_probs += weighted_probs
        
        # Convert to numpy if needed
        if isinstance(summed_probs, torch.Tensor):
            summed_probs = summed_probs.numpy()
        
        # Select class with highest summed probability
        predicted_class = np.argmax(summed_probs)
        ensemble_predictions_conf.append(predicted_class)

    ensemble_predictions_conf = np.array(ensemble_predictions_conf)

    # Calculate accuracy
    correct = (ensemble_predictions_conf == test_labels).sum()
    accuracy = correct / len(test_idx)

    print(f"Ensemble accuracy (Confidence-Weighted): {accuracy:.4f} ({accuracy*100:.2f}%)")
    print(f"Correct predictions: {correct} / {len(test_idx)}")


VOTING METHOD 6: CONFIDENCE-WEIGHTED VOTING
Weight each model's probabilities by its confidence (max probability) for that node

Ensemble accuracy (Confidence-Weighted): 0.8510 (85.10%)
Correct predictions: 1883371 / 2213091


## Voting Method 7: Geometric Mean Voting (product of probabilities, more robust to outliers)


In [10]:
print("=" * 60)
print("VOTING METHOD 7: GEOMETRIC MEAN VOTING")
print("=" * 60)
print("Use geometric mean (product) of probabilities across models")
print("Note: Geometric mean emphasizes agreement between models")
print()

# Verify all models have probabilities
missing_probs = [name for name in best_models.keys() if name not in model_probabilities]
if missing_probs:
    print(f"Warning: Missing probabilities for: {missing_probs}")
    print("This method requires probability files (.pt) for all models.")
else:
    ensemble_predictions_geom = []
    
    for node_idx in test_idx:
        # Multiply probabilities across all models (geometric mean = product^(1/n))
        product_probs = None
        
        for model_name in best_models.keys():
            probs = model_probabilities[model_name][node_idx, :]  # Shape: [num_classes]
            
            # Convert to numpy if needed
            if isinstance(probs, torch.Tensor):
                probs = probs.numpy()
            
            # Add small epsilon to avoid zeros
            probs = probs + 1e-10
            
            if product_probs is None:
                product_probs = probs.copy()
            else:
                product_probs *= probs
        
        # Take nth root (geometric mean), but argmax is same for product vs geometric mean
        # Select class with highest product
        predicted_class = np.argmax(product_probs)
        ensemble_predictions_geom.append(predicted_class)

    ensemble_predictions_geom = np.array(ensemble_predictions_geom)

    # Calculate accuracy
    correct = (ensemble_predictions_geom == test_labels).sum()
    accuracy = correct / len(test_idx)

    print(f"Ensemble accuracy (Geometric Mean): {accuracy:.4f} ({accuracy*100:.2f}%)")
    print(f"Correct predictions: {correct} / {len(test_idx)}")


VOTING METHOD 7: GEOMETRIC MEAN VOTING
Use geometric mean (product) of probabilities across models
Note: Geometric mean emphasizes agreement between models

Ensemble accuracy (Geometric Mean): 0.8518 (85.18%)
Correct predictions: 1885081 / 2213091


## Voting Method 8: Exponential Weighted Voting (raise probabilities to power before summing)


In [11]:
print("=" * 60)
print("VOTING METHOD 8: EXPONENTIAL WEIGHTED VOTING")
print("=" * 60)
print("Raise probabilities to a power (e.g., 2) before summing to emphasize high-confidence predictions")
print()

# Try different powers
powers_to_try = [1.5, 2.0, 2.5, 3.0]
best_power = None
best_acc = 0
best_predictions = None

# Verify all models have probabilities
missing_probs = [name for name in best_models.keys() if name not in model_probabilities]
if missing_probs:
    print(f"Warning: Missing probabilities for: {missing_probs}")
    print("This method requires probability files (.pt) for all models.")
else:
    for power in powers_to_try:
        ensemble_predictions_exp = []
        
        for node_idx in test_idx:
            # Sum exponentiated probabilities across all models
            summed_probs = None
            
            for model_name in best_models.keys():
                probs = model_probabilities[model_name][node_idx, :]  # Shape: [num_classes]
                
                # Convert to numpy if needed
                if isinstance(probs, torch.Tensor):
                    probs = probs.numpy()
                
                # Raise to power
                exp_probs = np.power(probs, power)
                
                if summed_probs is None:
                    summed_probs = exp_probs.copy()
                else:
                    summed_probs += exp_probs
            
            # Select class with highest summed exponentiated probability
            predicted_class = np.argmax(summed_probs)
            ensemble_predictions_exp.append(predicted_class)

        ensemble_predictions_exp = np.array(ensemble_predictions_exp)

        # Calculate accuracy
        correct = (ensemble_predictions_exp == test_labels).sum()
        accuracy = correct / len(test_idx)
        
        print(f"Power {power:.1f}: {accuracy:.4f} ({accuracy*100:.2f}%)")
        
        if accuracy > best_acc:
            best_acc = accuracy
            best_power = power
            best_predictions = ensemble_predictions_exp
    
    if best_predictions is not None:
        ensemble_predictions_exp = best_predictions
        print(f"\nBest power: {best_power:.1f}")
        print(f"Ensemble accuracy (Exponential Weighted, power={best_power:.1f}): {best_acc:.4f} ({best_acc*100:.2f}%)")
        print(f"Correct predictions: {(best_predictions == test_labels).sum()} / {len(test_idx)}")


VOTING METHOD 8: EXPONENTIAL WEIGHTED VOTING
Raise probabilities to a power (e.g., 2) before summing to emphasize high-confidence predictions



  exp_probs = np.power(probs, power)


Power 1.5: 0.0009 (0.09%)
Power 2.0: 0.1078 (10.78%)
Power 2.5: 0.0009 (0.09%)
Power 3.0: 0.8508 (85.08%)

Best power: 3.0
Ensemble accuracy (Exponential Weighted, power=3.0): 0.8508 (85.08%)
Correct predictions: 1882972 / 2213091


## Voting Method 9: Hybrid Weighted (combine performance weight + confidence weight)


In [12]:
print("=" * 60)
print("VOTING METHOD 9: HYBRID WEIGHTED VOTING")
print("=" * 60)
print("Combine performance-based weights with per-node confidence weights")
print()

# Calculate individual model accuracies for base weighting
model_base_weights = {}
for model_name in best_models.keys():
    df = model_predictions[model_name]
    test_preds = df.loc[test_idx, 'prediction_1'].values
    correct = (test_preds == test_labels).sum()
    acc = correct / len(test_idx)
    model_base_weights[model_name] = acc

# Normalize base weights
total_weight = sum(model_base_weights.values())
num_models = len(model_base_weights)
for model_name in model_base_weights:
    model_base_weights[model_name] = (model_base_weights[model_name] / total_weight) * num_models

# Verify all models have probabilities
missing_probs = [name for name in best_models.keys() if name not in model_probabilities]
if missing_probs:
    print(f"Warning: Missing probabilities for: {missing_probs}")
    print("This method requires probability files (.pt) for all models.")
else:
    ensemble_predictions_hybrid = []
    
    for node_idx in test_idx:
        # Sum hybrid-weighted probabilities across all models
        summed_probs = None
        
        for model_name in best_models.keys():
            probs = model_probabilities[model_name][node_idx, :]  # Shape: [num_classes]
            
            # Convert to numpy if needed
            if isinstance(probs, torch.Tensor):
                probs_np = probs.numpy()
            else:
                probs_np = probs
            
            # Base weight from model performance
            base_weight = model_base_weights[model_name]
            
            # Confidence weight from max probability for this node
            confidence = np.max(probs_np)
            
            # Combined weight
            combined_weight = base_weight * confidence
            
            # Weight probabilities
            weighted_probs = probs * combined_weight
            
            if summed_probs is None:
                summed_probs = weighted_probs.clone() if isinstance(weighted_probs, torch.Tensor) else weighted_probs.copy()
            else:
                summed_probs += weighted_probs
        
        # Convert to numpy if needed
        if isinstance(summed_probs, torch.Tensor):
            summed_probs = summed_probs.numpy()
        
        # Select class with highest summed probability
        predicted_class = np.argmax(summed_probs)
        ensemble_predictions_hybrid.append(predicted_class)

    ensemble_predictions_hybrid = np.array(ensemble_predictions_hybrid)

    # Calculate accuracy
    correct = (ensemble_predictions_hybrid == test_labels).sum()
    accuracy = correct / len(test_idx)

    print(f"Ensemble accuracy (Hybrid Weighted): {accuracy:.4f} ({accuracy*100:.2f}%)")
    print(f"Correct predictions: {correct} / {len(test_idx)}")


VOTING METHOD 9: HYBRID WEIGHTED VOTING
Combine performance-based weights with per-node confidence weights

Ensemble accuracy (Hybrid Weighted): 0.8510 (85.10%)
Correct predictions: 1883358 / 2213091


## Summary Comparison


In [13]:
print("=" * 60)
print("SUMMARY: VOTING METHODS COMPARISON")
print("=" * 60)
print()

# Calculate individual model accuracies for reference
print("Individual Model Accuracies (for reference):")
individual_accs = {}
for model_name in best_models.keys():
    df = model_predictions[model_name]
    test_preds = df.loc[test_idx, 'prediction_1'].values
    correct = (test_preds == test_labels).sum()
    acc = correct / len(test_idx)
    individual_accs[model_name] = acc
    print(f"  {model_name:25s}: {acc:.4f} ({acc*100:.2f}%)")

avg_individual = np.mean(list(individual_accs.values()))
best_individual = max(individual_accs.values())
print(f"\n  Average individual accuracy: {avg_individual:.4f} ({avg_individual*100:.2f}%)")
print(f"  Best individual accuracy: {best_individual:.4f} ({best_individual*100:.2f}%)")

print("\n" + "=" * 60)
print("Ensemble Results:")
print("=" * 60)

# Basic voting
basic_acc = (ensemble_predictions == test_labels).sum() / len(test_idx)
print(f"1. Basic Voting:                    {basic_acc:.4f} ({basic_acc*100:.2f}%)")
print(f"   Improvement over average:       {basic_acc - avg_individual:.4f} ({((basic_acc / avg_individual - 1) * 100):.2f}%)")
print(f"   Improvement over best:           {basic_acc - best_individual:.4f} ({((basic_acc / best_individual - 1) * 100):.2f}%)")

# Ranked choice voting
ranked_acc = (ensemble_predictions_ranked == test_labels).sum() / len(test_idx)
print(f"\n2. Ranked Choice Voting:           {ranked_acc:.4f} ({ranked_acc*100:.2f}%)")
print(f"   Improvement over average:       {ranked_acc - avg_individual:.4f} ({((ranked_acc / avg_individual - 1) * 100):.2f}%)")
print(f"   Improvement over best:           {ranked_acc - best_individual:.4f} ({((ranked_acc / best_individual - 1) * 100):.2f}%)")

# Probability sum voting (if available)
if 'ensemble_predictions_prob' in locals():
    prob_acc = (ensemble_predictions_prob == test_labels).sum() / len(test_idx)
    print(f"\n3. Probability Sum Voting:        {prob_acc:.4f} ({prob_acc*100:.2f}%)")
    print(f"   Improvement over average:       {prob_acc - avg_individual:.4f} ({((prob_acc / avg_individual - 1) * 100):.2f}%)")
    print(f"   Improvement over best:           {prob_acc - best_individual:.4f} ({((prob_acc / best_individual - 1) * 100):.2f}%)")
else:
    print(f"\n3. Probability Sum Voting:        Not available (missing probability files)")

# Proportional top-5 voting (if available)
if 'ensemble_predictions_prop5' in locals():
    prop5_acc = (ensemble_predictions_prop5 == test_labels).sum() / len(test_idx)
    print(f"\n4. Proportional Top-5 Voting:      {prop5_acc:.4f} ({prop5_acc*100:.2f}%)")
    print(f"   Improvement over average:       {prop5_acc - avg_individual:.4f} ({((prop5_acc / avg_individual - 1) * 100):.2f}%)")
    print(f"   Improvement over best:           {prop5_acc - best_individual:.4f} ({((prop5_acc / best_individual - 1) * 100):.2f}%)")
else:
    print(f"\n4. Proportional Top-5 Voting:      Not available (missing probability files)")

# Performance-weighted voting (if available)
if 'ensemble_predictions_weighted' in locals():
    weighted_acc = (ensemble_predictions_weighted == test_labels).sum() / len(test_idx)
    print(f"\n5. Performance-Weighted Voting:     {weighted_acc:.4f} ({weighted_acc*100:.2f}%)")
    print(f"   Improvement over average:       {weighted_acc - avg_individual:.4f} ({((weighted_acc / avg_individual - 1) * 100):.2f}%)")
    print(f"   Improvement over best:           {weighted_acc - best_individual:.4f} ({((weighted_acc / best_individual - 1) * 100):.2f}%)")
else:
    print(f"\n5. Performance-Weighted Voting:     Not available (missing probability files)")

# Confidence-weighted voting (if available)
if 'ensemble_predictions_conf' in locals():
    conf_acc = (ensemble_predictions_conf == test_labels).sum() / len(test_idx)
    print(f"\n6. Confidence-Weighted Voting:     {conf_acc:.4f} ({conf_acc*100:.2f}%)")
    print(f"   Improvement over average:       {conf_acc - avg_individual:.4f} ({((conf_acc / avg_individual - 1) * 100):.2f}%)")
    print(f"   Improvement over best:           {conf_acc - best_individual:.4f} ({((conf_acc / best_individual - 1) * 100):.2f}%)")
else:
    print(f"\n6. Confidence-Weighted Voting:     Not available (missing probability files)")

# Geometric mean voting (if available)
if 'ensemble_predictions_geom' in locals():
    geom_acc = (ensemble_predictions_geom == test_labels).sum() / len(test_idx)
    print(f"\n7. Geometric Mean Voting:          {geom_acc:.4f} ({geom_acc*100:.2f}%)")
    print(f"   Improvement over average:       {geom_acc - avg_individual:.4f} ({((geom_acc / avg_individual - 1) * 100):.2f}%)")
    print(f"   Improvement over best:           {geom_acc - best_individual:.4f} ({((geom_acc / best_individual - 1) * 100):.2f}%)")
else:
    print(f"\n7. Geometric Mean Voting:          Not available (missing probability files)")

# Exponential weighted voting (if available)
if 'ensemble_predictions_exp' in locals():
    exp_acc = (ensemble_predictions_exp == test_labels).sum() / len(test_idx)
    print(f"\n8. Exponential Weighted Voting:     {exp_acc:.4f} ({exp_acc*100:.2f}%)")
    print(f"   Improvement over average:       {exp_acc - avg_individual:.4f} ({((exp_acc / avg_individual - 1) * 100):.2f}%)")
    print(f"   Improvement over best:           {exp_acc - best_individual:.4f} ({((exp_acc / best_individual - 1) * 100):.2f}%)")
else:
    print(f"\n8. Exponential Weighted Voting:     Not available (missing probability files)")

# Hybrid weighted voting (if available)
if 'ensemble_predictions_hybrid' in locals():
    hybrid_acc = (ensemble_predictions_hybrid == test_labels).sum() / len(test_idx)
    print(f"\n9. Hybrid Weighted Voting:          {hybrid_acc:.4f} ({hybrid_acc*100:.2f}%)")
    print(f"   Improvement over average:       {hybrid_acc - avg_individual:.4f} ({((hybrid_acc / avg_individual - 1) * 100):.2f}%)")
    print(f"   Improvement over best:           {hybrid_acc - best_individual:.4f} ({((hybrid_acc / best_individual - 1) * 100):.2f}%)")
else:
    print(f"\n9. Hybrid Weighted Voting:          Not available (missing probability files)")

# Find best method
all_methods = {
    'Basic Voting': 'ensemble_predictions',
    'Ranked Choice Voting': 'ensemble_predictions_ranked',
    'Probability Sum Voting': 'ensemble_predictions_prob',
    'Proportional Top-5 Voting': 'ensemble_predictions_prop5',
    'Performance-Weighted Voting': 'ensemble_predictions_weighted',
    'Confidence-Weighted Voting': 'ensemble_predictions_conf',
    'Geometric Mean Voting': 'ensemble_predictions_geom',
    'Exponential Weighted Voting': 'ensemble_predictions_exp',
    'Hybrid Weighted Voting': 'ensemble_predictions_hybrid'
}

best_method_name = None
best_method_acc = 0
for method_name, var_name in all_methods.items():
    if var_name in locals():
        acc = (locals()[var_name] == test_labels).sum() / len(test_idx)
        if acc > best_method_acc:
            best_method_acc = acc
            best_method_name = method_name

if best_method_name:
    print("\n" + "=" * 60)
    print(f"BEST METHOD: {best_method_name}")
    print(f"Accuracy: {best_method_acc:.4f} ({best_method_acc*100:.2f}%)")
    print(f"Improvement over best individual: {best_method_acc - best_individual:.4f} ({((best_method_acc / best_individual - 1) * 100):.2f}%)")
    print("=" * 60)

print("\n" + "=" * 60)
print(f"Test set size: {len(test_idx)} nodes")


SUMMARY: VOTING METHODS COMPARISON

Individual Model Accuracies (for reference):
  plain_run1               : 0.8254 (82.54%)
  linear_run3              : 0.8297 (82.97%)
  mlp_run2                 : 0.8422 (84.22%)
  gamlp_cs_09883da0        : 0.8519 (85.19%)
  gamlp_09883da0           : 0.8508 (85.08%)

  Average individual accuracy: 0.8400 (84.00%)
  Best individual accuracy: 0.8519 (85.19%)

Ensemble Results:
1. Basic Voting:                    0.8457 (84.57%)
   Improvement over average:       0.0057 (0.68%)
   Improvement over best:           -0.0062 (-0.73%)

2. Ranked Choice Voting:           0.8485 (84.85%)
   Improvement over average:       0.0085 (1.01%)
   Improvement over best:           -0.0034 (-0.40%)

3. Probability Sum Voting:        0.8518 (85.18%)
   Improvement over average:       0.0117 (1.40%)
   Improvement over best:           -0.0002 (-0.02%)

4. Proportional Top-5 Voting:      0.8524 (85.24%)
   Improvement over average:       0.0124 (1.48%)
   Improvement ov