In [3]:
import json
import numpy as np

In [4]:
def calculate_score_stats(json_file_path):
    """
    Loads evaluation results from a JSON file and calculates the mean and
    variance of the 'score' field.

    Args:
        json_file_path (str): The path to the JSON file containing evaluation results.

    Returns:
        tuple: A tuple containing (mean_score, variance_score, num_samples_with_scores).
               Returns (None, None, 0) if no scores are found or the file is invalid.
    """
    try:
        with open(json_file_path, 'r') as f:
            results = json.load(f)
    except FileNotFoundError:
        print(f"Error: File not found at '{json_file_path}'")
        return None, None, 0
    except json.JSONDecodeError:
        print(f"Error: Could not decode JSON from '{json_file_path}'")
        return None, None, 0
    except Exception as e:
        print(f"An unexpected error occurred while reading the file: {e}")
        return None, None, 0

    if not isinstance(results, list):
        print("Error: JSON content is not a list of results.")
        return None, None, 0

    scores = []
    for item in results:
        if not isinstance(item, dict):
            print(f"Warning: Skipping non-dictionary item in results: {item}")
            continue
        if 'score' in item and item['score'] is not None:
            try:
                scores.append(float(item['score']))
            except (ValueError, TypeError):
                print(f"Warning: Could not convert score to float for item: {item}. Skipping.")
        # If 'score' is missing or None, it's implicitly skipped for calculation.
        # You could add a warning here if desired.

    if not scores:
        print("No valid scores found in the JSON file.")
        return None, None, 0

    mean_score = np.mean(scores)
    variance_score = np.var(scores, ddof=0) # ddof=0 for population variance, ddof=1 for sample variance
    num_samples_with_scores = len(scores)

    return mean_score, variance_score, num_samples_with_scores

In [15]:
json_file = "evaluation_outputs/scar1800_generated_test_summaries_with_scores_nomin.json"
# json_file = "evaluation_outputs/scar1800_generated_test_summaries_with_scores.json"
json_file = "evaluation_outputs/abc_generated_test_summaries_with_scores.json"
# json_file = "evaluation_outputs/rlhf_generated_test_summaries_with_scores.json"
json_file = "evaluation_outputs/uniform_generated_test_summaries_with_scores.json"
mean_val, var_val, num_samples = calculate_score_stats(json_file)

if mean_val is not None and var_val is not None:
    print(f"\nStatistics for scores from '{json_file}':")
    print(f"  Number of samples with scores: {num_samples}")
    print(f"  Mean score:                    {mean_val:.4f}")
    if num_samples > 1:
        sample_var_val = np.var(np.array([s for item in json.load(open(json_file)) if 'score' in item and item['score'] is not None for s in [item['score']]]), ddof=1)
        print(f"  Sample Variance of scores:     {sample_var_val:.4f}")
        print(f"  Sample Standard Deviation:     {np.sqrt(sample_var_val):.4f}")
    elif num_samples <= 1:
        print(f"  Sample Variance of scores:     N/A (requires >1 sample)")
        print(f"  Sample Standard Deviation:     N/A (requires >1 sample)")
    else:
        print(f"  Population Variance of scores: {var_val:.4f}")
        print(f"  Population Standard Deviation: {np.sqrt(var_val):.4f}")


Statistics for scores from 'evaluation_outputs/uniform_generated_test_summaries_with_scores.json':
  Number of samples with scores: 6553
  Mean score:                    1.6803
  Sample Variance of scores:     0.9966
  Sample Standard Deviation:     0.9983


In [8]:
# RLHF
  # Number of samples with scores: 6553
  # Mean score:                    1.5802
  # Sample Variance of scores:     0.9844
  # Sample Standard Deviation:     0.9922

# ABC
  # Number of samples with scores: 6553
  # Mean score:                    2.8497
  # Sample Variance of scores:     1.3109
  # Sample Standard Deviation:     1.1449

# SCAR
  # Number of samples with scores: 6553
  # Mean score:                    4.3512
  # Sample Variance of scores:     1.0883
  # Sample Standard Deviation:     1.0432

In [16]:
numbers = [1.73093,
1.72790,
1.68616,
1.68247,
1.68202]

In [17]:
np.std(numbers)

0.022535428640254446