In [None]:
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from scipy.stats import pearsonr
import pandas as pd
import torch

def CCCLoss(x, y):
    """
    Calculates the Concordance Correlation Coefficient (CCC) Loss.
    CCC measures the agreement between two variables.
    Loss = 1 - CCC, because loss functions are minimized, while CCC is maximized.
    """
    # Check if inputs are tensors
    if not isinstance(x, torch.Tensor) or not isinstance(y, torch.Tensor):
        # Convert inputs to tensors
        x = torch.tensor(x, dtype=torch.float32)
        y = torch.tensor(y, dtype=torch.float32)
    # Ensure inputs are flat tensors
    x = x.view(-1)
    y = y.view(-1)

    # Calculate means
    mean_x = torch.mean(x)
    mean_y = torch.mean(y)

    # Calculate variances using population variance (unbiased=False)
    var_x = torch.var(x, unbiased=False)
    var_y = torch.var(y, unbiased=False)

    # Calculate covariance between x and y (population covariance)
    cov_xy = torch.mean((x - mean_x) * (y - mean_y))

    # Calculate CCC
    numerator = 2 * cov_xy
    denominator = var_x + var_y + (mean_x - mean_y)**2

    # Add a small epsilon for numerical stability (prevents division by zero)
    epsilon = 1e-8
    ccc = numerator / (denominator + epsilon)

    # Return 1 - CCC because loss functions should be minimized
    return 1.0 - ccc

# ------------------- 1. Load Your Data -------------------
predictions_audio_np = pd.read_csv("predictions-audio-attention-ccc-hubert-large-ll60k.csv")["Predicted Values"].values
true_values_np       = pd.read_csv("y-enjoyment.csv")["Average"].values
predictions_text_np  = pd.read_csv("predictions-text-attention-ccc-t5.csv")["Predicted Values"].values

# Ensure they are NumPy arrays
true_values_np       = np.array(true_values_np)
predictions_audio_np = np.array(predictions_audio_np)
predictions_text_np = np.array(predictions_text_np)

# Check if lengths match
assert len(true_values_np) == len(predictions_audio_np) == len(predictions_text_np), \
       "Error: True values and all prediction arrays must have the same length."

all_predictions = [predictions_audio_np, predictions_text_np]

# ------------------- 2. Evaluation Function (with MAE) -------------------
def evaluate_predictions(true_vals, pred_vals, model_name="Model"):
    """Calculates and prints R2, MSE, MAE, Pearson Correlation, and p-value."""
    # Correlation
    if len(true_vals) < 2:
        print(f"Warning: Need at least 2 data points for correlation for {model_name}.")
        corr, p_value = np.nan, np.nan
    else:
        corr, p_value = pearsonr(true_vals, pred_vals)

    # Metrics
    r2  = r2_score(true_vals, pred_vals)
    mse = mean_squared_error(true_vals, pred_vals)
    mae = mean_absolute_error(true_vals, pred_vals)
    ccc = 1 - CCCLoss(torch.tensor(true_vals), torch.tensor(pred_vals)).item()

    # Output
    print(f"\n--- Evaluation Results for: {model_name} ---")
    print(f"R-squared (R2):               {r2:.4f}")
    print(f"Mean Squared Error (MSE):     {mse:.4f}")
    print(f"Mean Absolute Error (MAE):    {mae:.4f}")
    print(f"Pearson Correlation:          {corr:.4f}")
    print(f"P-value (for Correlation):    {p_value:.4f}")
    print(f"Concordance Correlation Coefficient (CCC): {ccc:.4f}")
    print("---------------------------------------------")
    if not np.isnan(p_value):
        if p_value < 0.05:
            print("  (Correlation is statistically significant at p < 0.05)")
        else:
            print("  (Correlation is not statistically significant at p < 0.05)")
    print("---------------------------------------------")

# ------------------- 3. Run Evaluation -------------------
print(f"\nEvaluating {len(true_values_np)} data points.")

# Audio-only
evaluate_predictions(true_values_np, predictions_audio_np, "Audio Only - Attention-based Pooling")

# Text-only
evaluate_predictions(true_values_np, predictions_text_np,    "Text Only - Attention-based Pooling")

# Simple average fusion
fused_predictions_avg_np = np.average(all_predictions, axis=0, weights=[0.61, 0.39])
evaluate_predictions(true_values_np, fused_predictions_avg_np, "Fused (Simple Average)")

# Save fused predictions
fused_predictions_avg_df = pd.DataFrame({
    'Person ID': [i for i in range(4, 43) if i != 26],
    'True Values': true_values_np,
    'Fused Predictions (Average)': np.round(fused_predictions_avg_np, 2)
})
fused_predictions_avg_df.to_csv('fused_predictions_avg-sfr-t5.csv', index=False)

print("\nFused predictions saved to 'fused_predictions_avg.csv'.")

FileNotFoundError: [Errno 2] No such file or directory: 'predictions-audio-attention-ccc-hubert-large-ll60k.csv'