In [None]:
import json
import pandas as pd
from google.colab import files
from collections import defaultdict

# --- Configuration ---
# Define the names for the four models/strategies
MODEL_CONFIG = {
    "Baseline (Non-FT)": "base_mistral_inference_results.jsonl",
    "LoRA Finetuning": "", #change
    "Neologism Learning": "mistral_with_short_inference_results.jsonl",
    "Prompting": "", #change
}

# Define the threshold for response length in words
WORD_THRESHOLD = 50

print(f"Configuration: Response length threshold set to {WORD_THRESHOLD} words.")

In [None]:
# --- File Upload ---
print("Please upload the four required JSONL files.")
print("The files expected are:")
for name, filename in MODEL_CONFIG.items():
    print(f"- {filename} (for {name})")

uploaded = files.upload()

# Check that all required files were uploaded
uploaded_files = list(uploaded.keys())
missing_files = [filename for filename in MODEL_CONFIG.values() if filename not in uploaded_files]

if missing_files:
    print("\n[ERROR] The following files are missing and are required for the analysis:")
    for filename in missing_files:
        print(f"- {filename}")
else:
    print("\n[SUCCESS] All four files uploaded successfully.")

In [None]:
# --- Function to Compute Statistics ---

def compute_stats(file_path, threshold):
    """
    Loads a JSONL file, computes response word counts, and generates statistics.
    """
    data = []
    try:
        with open(file_path, 'r') as f:
            for line in f:
                if line.strip():
                    data.append(json.loads(line))
    except Exception as e:
        return {"Error": f"Failed to read file: {e}"}

    if not data:
        return {"Total Responses": 0, "Error": "File is empty or invalid JSONL."}

    # Extract responses and compute word count
    response_lengths = []
    for record in data:
        # Assuming the response is under the key 'response', as determined by inspection
        response_text = record.get("response", "")
        # Simple word count: split by whitespace
        word_count = len(response_text.split())
        response_lengths.append(word_count)

    total_responses = len(response_lengths)

    # Calculate responses over the threshold
    over_threshold_count = sum(1 for length in response_lengths if length > threshold)

    # Calculate mean and median length
    mean_length = sum(response_lengths) / total_responses if total_responses > 0 else 0
    median_length = sorted(response_lengths)[total_responses // 2] if total_responses > 0 else 0

    # Compile results
    results = {
        "Total Responses": total_responses,
        f"Responses > {threshold} Words": over_threshold_count,
        f"% > {threshold} Words": (over_threshold_count / total_responses) * 100 if total_responses > 0 else 0,
        "Mean Length (Words)": mean_length,
        "Median Length (Words)": median_length,
    }

    return results

# --- Main Analysis Loop ---
results_data = []

print("\n--- Running Analysis ---")
for model_name, filename in MODEL_CONFIG.items():
    print(f"Processing {model_name}...")
    stats = compute_stats(filename, WORD_THRESHOLD)
    stats['Model'] = model_name

    # Re-order keys for presentation
    if 'Error' not in stats:
        results_data.append({
            'Model': stats['Model'],
            'Total Responses': stats['Total Responses'],
            f'Responses > {WORD_THRESHOLD} Words': stats[f'Responses > {WORD_THRESHOLD} Words'],
            f'% > {WORD_THRESHOLD} Words': stats[f'% > {WORD_THRESHOLD} Words'],
            'Mean Length (Words)': stats['Mean Length (Words)'],
            'Median Length (Words)': stats['Median Length (Words)'],
        })
    else:
        print(f"[ERROR] Skipping {model_name} due to error: {stats['Error']}")

In [None]:
# --- Display Results ---
if results_data:
    df = pd.DataFrame(results_data)

    # Format the percentage and mean/median columns
    df[f'% > {WORD_THRESHOLD} Words'] = df[f'% > {WORD_THRESHOLD} Words'].map('{:.2f}%'.format)
    df['Mean Length (Words)'] = df['Mean Length (Words)'].map('{:.1f}'.format)
    df['Median Length (Words)'] = df['Median Length (Words)'].astype(int)

    # Set the 'Model' as the index for a cleaner look
    df = df.set_index('Model')

    print("\n--- Summary of Response Length Statistics ---")
    print(f"Threshold for Long Response: {WORD_THRESHOLD} words")
    print("\n" + df.to_markdown(floatfmt=".1f"))

    # Optional: Save results to CSV
    df.to_csv("response_length_summary.csv")
    print("\nResults saved to 'response_length_summary.csv'")

else:
    print("\nAnalysis failed. Please check file uploads and data format.")