Compare different thresholds and strategies

Explanation:

base_path: 
The file path that contains all the results


filename: Follow this pattern:

data/outputs/2025/large_corpus/1128/lm_syn_bodysite_top5_result_0.8.csv

data/outputs/2025/large_corpus/1128/st_syn_disease_top5_result_0.9.csv

data/outputs/2025/large_corpus/1128/st_syn_trt_result_0.7.csv

In [1]:
%cd /home/lcc/projects/MetaHarmonizer

/home/lcc/projects/MetaHarmonizer


In [2]:
from evaluation.calc_stats import CalcStats

calc = CalcStats()

In [None]:
import pandas as pd
import os
from evaluation.calc_stats import CalcStats

def analyze_results(strategy='st', base_path="data/outputs/2025/large_corpus/1128"):
    """
    Analyze and summarize results for a given strategy.
    
    Parameters:
    - strategy: 'lm' or 'st'
    - base_path: Directory where result files are located
    """
    calc = CalcStats()
    
    # Define thresholds to analyze
    thresholds = ['0.7', '0.8', '0.9']
    
    # Define categories
    categories = {
        'bodysite': 'Body Site',
        'disease': 'Disease', 
        'trt': 'Treatment'
    }
    
    all_results = []
    
    # Iterate over each category and threshold
    for cat_key, cat_name in categories.items():
        for threshold in thresholds:
            # Construct filename
            if cat_key == 'trt':
                filename = f"{strategy}_syn_{cat_key}_result_{threshold}.csv"
            else:
                filename = f"{strategy}_syn_{cat_key}_top5_result_{threshold}.csv"
            
            file_path = os.path.join(base_path, filename)
            
            # Check if file exists
            if not os.path.exists(file_path):
                print(f"Warning: File not found - {filename}")
                continue
            
            print(f"Processing: {filename}")
            
            try:
                data = pd.read_csv(file_path)
                
                # Calculate accuracy
                accuracy_df = calc.calc_accuracy(data)
                
                # Add metadata
                accuracy_df['Category'] = cat_name
                accuracy_df['Threshold'] = threshold
                accuracy_df['Strategy'] = strategy.upper()
                accuracy_df['File'] = filename
                
                all_results.append(accuracy_df)
                
                print(f"  ✓ {cat_name} @ {threshold}: Top1={accuracy_df.iloc[0]['Accuracy']:.2f}%")
                
            except Exception as e:
                print(f"  ✗ Error processing {filename}: {e}")
    
    if not all_results:
        print("No results found!")
        return None, None
    
    combined_results = pd.concat(all_results, ignore_index=True)
    
    # Reorder columns
    combined_results = combined_results[['Strategy', 'Category', 'Threshold', 
                                        'Accuracy Level', 'Accuracy', 'File']]
    
    print(f"\n{'='*80}")
    print(f"SUMMARY - {strategy.upper()} Strategy Results")
    print(f"{'='*80}")
    print(combined_results[['Category', 'Threshold', 'Accuracy Level', 'Accuracy']].to_string(index=False))
    
    # Create comparison tables: Thresholds (0.7, 0.8, 0.9) vertically, Top1/3/5 horizontally
    comparison_tables = {}
    
    for cat_name in categories.values():
        cat_data = combined_results[combined_results['Category'] == cat_name]
        
        if cat_data.empty:
            continue
        
        # Create pivot table: rows=Threshold, columns=Accuracy Level
        pivot = cat_data.pivot_table(
            index='Threshold',
            columns='Accuracy Level',
            values='Accuracy',
            aggfunc='first'
        )
        
        # Ensure column order is Top 1, Top 3, Top 5
        column_order = ['Top 1 Match', 'Top 3 Matches', 'Top 5 Matches']
        pivot = pivot[[col for col in column_order if col in pivot.columns]]
        
        # Rename columns for simpler display
        pivot.columns = ['Top 1', 'Top 3', 'Top 5']
        
        # Ensure threshold order
        pivot = pivot.reindex(['0.7', '0.8', '0.9'])
        
        comparison_tables[cat_name] = pivot
        
        print(f"\n{'='*80}")
        print(f"{cat_name} - Accuracy Comparison (Threshold × Top-K)")
        print(f"{'='*80}")
        print(pivot.to_string())
    
    return combined_results, comparison_tables


def compare_strategies(strategies=['lm', 'st'], 
                       base_path="data/outputs/2025/large_corpus/1128",
                       save_results=False):
    """
    Compare results of multiple strategies
    
    Parameters:
    - strategies: List of strategies, e.g., ['lm', 'st']
    - base_path: the directory where result files are located
    - save_results: Whether to save results to CSV
    """
    all_strategy_results = []
    all_comparison_tables = {}
    
    for strategy in strategies:
        print(f"\n{'#'*80}")
        print(f"# Analyzing Strategy: {strategy.upper()}")
        print(f"{'#'*80}\n")
        
        results, tables = analyze_results(strategy=strategy, base_path=base_path)
        
        if results is not None:
            all_strategy_results.append(results)
            all_comparison_tables[strategy] = tables
    
    if not all_strategy_results:
        print("No results to compare!")
        return
    
    # Combine results from all strategies
    final_combined = pd.concat(all_strategy_results, ignore_index=True)
    
    # Cross-strategy comparison tables
    print(f"\n\n{'='*80}")
    print("CROSS-STRATEGY COMPARISON")
    print(f"{'='*80}")
    
    categories = final_combined['Category'].unique()
    
    for category in categories:
        for acc_level in ['Top 1 Match', 'Top 3 Matches', 'Top 5 Matches']:
            cat_level_data = final_combined[
                (final_combined['Category'] == category) &
                (final_combined['Accuracy Level'] == acc_level)
            ]
            
            if cat_level_data.empty:
                continue
            
            # Create strategy comparison pivot table
            strategy_pivot = cat_level_data.pivot_table(
                index='Threshold',
                columns='Strategy',
                values='Accuracy',
                aggfunc='first'
            )
            
            # Ensure threshold order
            strategy_pivot = strategy_pivot.reindex(['0.7', '0.8', '0.9'])
            
            print(f"\n{category} - {acc_level}")
            print("-" * 60)
            print(strategy_pivot.to_string())
    
    # Save results
    if save_results:
        output_path = os.path.join(base_path, f"accuracy_comparison_{'_'.join(strategies)}.csv")
        final_combined.to_csv(output_path, index=False)
        print(f"\n\nResults saved to: {output_path}")
        
        # Save comparison tables for each category
        for strategy, tables in all_comparison_tables.items():
            for cat_name, table in tables.items():
                filename = f"{strategy}_{cat_name.replace(' ', '_').lower()}_comparison.csv"
                table_path = os.path.join(base_path, filename)
                table.to_csv(table_path)
                print(f"Saved: {filename}")
    
    return final_combined, all_comparison_tables
    

In [None]:
# Test single strategy analysis
results_lm, tables_lm = analyze_results(strategy='st')

In [None]:
# Test two strategies comparison
combined, all_tables = compare_strategies(
        strategies=['lm', 'st'], 
        save_results=True
    )

Compare 2 strategies for threshold=0.9

In [5]:
import pandas as pd
import os
from evaluation.calc_stats import CalcStats

def analyze_results_formatted(strategies=['lm', 'st'], 
                              base_path="data/outputs/2025/large_corpus/1128",
                              threshold='0.9',
                              save_results=False):
    """
    Analyze and summarize results for given strategies in a formatted table.
    Only uses SapBERT model.
    """
    calc = CalcStats()
    
    # Define categories
    categories = {
        'bodysite': 'bodysite',
        'disease': 'disease', 
        'trt': 'treatment'
    }
    
    all_results = []
    
    # Iterate over each strategy
    for strategy in strategies:
        print(f"\nProcessing strategy: {strategy.upper()}")
        
        for cat_key, cat_name in categories.items():
            # Use strategy as the file prefix (lm or st)
            strat_code = strategy.lower()
            
            # Construct filename
            if cat_key == 'trt':
                filename = f"{strat_code}_syn_{cat_key}_result_{threshold}.csv"
            else:
                filename = f"{strat_code}_syn_{cat_key}_top5_result_{threshold}.csv"
            
            file_path = os.path.join(base_path, filename)
            
            # Check if file exists
            if not os.path.exists(file_path):
                print(f"  Warning: File not found - {filename}")
                continue
            
            print(f"  Processing: {filename}")
            
            try:
                data = pd.read_csv(file_path)
                
                # Calculate accuracy
                accuracy_df = calc.calc_accuracy(data)
                
                # Extract Top 1, Top 3, Top 5 accuracies
                top1 = accuracy_df[accuracy_df['Accuracy Level'] == 'Top 1 Match']['Accuracy'].values[0]
                top3 = accuracy_df[accuracy_df['Accuracy Level'] == 'Top 3 Matches']['Accuracy'].values[0]
                top5 = accuracy_df[accuracy_df['Accuracy Level'] == 'Top 5 Matches']['Accuracy'].values[0]
                
                # Add to results
                result_row = {
                    'Attribute': cat_name,
                    'Strategy': strategy.upper(),
                    'Top 1 (%)': round(top1, 2),
                    'Top 3 (%)': round(top3, 2),
                    'Top 5 (%)': round(top5, 2)
                }
                
                all_results.append(result_row)
                
                print(f"    ✓ {cat_name}: Top1={top1:.2f}%, Top3={top3:.2f}%, Top5={top5:.2f}%")
                
            except Exception as e:
                print(f"    ✗ Error processing {filename}: {e}")
    
    if not all_results:
        print("No results found!")
        return None
    
    # Create DataFrame
    final_df = pd.DataFrame(all_results)
    
    # Sort by Attribute, Strategy
    final_df = final_df.sort_values(['Attribute', 'Strategy'])
    final_df = final_df.reset_index(drop=True)
    
    # Print results
    print(f"\n{'='*80}")
    print(f"RESULTS SUMMARY (Threshold = {threshold})")
    print(f"{'='*80}\n")
    print(final_df.to_string(index=False))
    
    # Save results
    if save_results:
        output_path = os.path.join(base_path, f"accuracy_summary_threshold_{threshold}.csv")
        final_df.to_csv(output_path, index=False)
        print(f"\n\nResults saved to: {output_path}")
    
    return final_df


# Usage example
if __name__ == "__main__":
    # Analyze LM and ST strategies, only for threshold=0.9
    results = analyze_results_formatted(
        strategies=['lm', 'st'],
        base_path="data/outputs/2025/large_corpus/1128",
        threshold='0.9',
        save_results=False
    )


Processing strategy: LM
  Processing: lm_syn_bodysite_top5_result_0.9.csv
    ✓ bodysite: Top1=57.63%, Top3=79.73%, Top5=84.51%
  Processing: lm_syn_disease_top5_result_0.9.csv
    ✓ disease: Top1=80.64%, Top3=89.39%, Top5=91.64%
  Processing: lm_syn_trt_result_0.9.csv
    ✓ treatment: Top1=79.76%, Top3=85.64%, Top5=87.89%

Processing strategy: ST
  Processing: st_syn_bodysite_top5_result_0.9.csv
    ✓ bodysite: Top1=64.46%, Top3=80.64%, Top5=86.10%
  Processing: st_syn_disease_top5_result_0.9.csv
    ✓ disease: Top1=82.32%, Top3=90.35%, Top5=92.60%
  Processing: st_syn_trt_result_0.9.csv
    ✓ treatment: Top1=81.31%, Top3=85.29%, Top5=87.02%

RESULTS SUMMARY (Threshold = 0.9)

Attribute Strategy  Top 1 (%)  Top 3 (%)  Top 5 (%)
 bodysite       LM      57.63      79.73      84.51
 bodysite       ST      64.46      80.64      86.10
  disease       LM      80.64      89.39      91.64
  disease       ST      82.32      90.35      92.60
treatment       LM      79.76      85.64      87.89


The graphs are generated by manually adding results; Need improve (automatically generate graph from evaluation results).

In [None]:
import matplotlib.pyplot as plt

# Top-K
x = [1, 3, 5]

# ---- Previous
prev = {
    "Body Site": [58.997722, 75.398633, 82.460137],
    "Disease":   [75.562701, 84.694534, 87.781350],
    "Treatment": [61.764706, 67.301038, 69.723183],
}

# ---- Current
curr = {
    "Body Site": [64.46, 80.64, 86.10],
    "Disease":   [82.32, 90.35, 92.60],
    "Treatment": [81.31, 85.29, 87.02],
}


colors = {
    "Body Site": "#1f77b4",   
    "Disease":   "#2ca02c",   
    "Treatment": "#d62728",   
}

plt.figure(figsize=(8, 6))

for category in prev.keys():
    color = colors[category]

    # previous — dotted line
    plt.plot(
        x, prev[category],
        marker='o',
        linestyle='--',
        color=color,
        label=f"{category} — Previous"
    )

    # current — solid line
    plt.plot(
        x, curr[category],
        marker='o',
        linestyle='-',
        linewidth=2,
        color=color,
        label=f"{category} — Current"
    )

plt.xlabel("Top-K")
plt.ylabel("Accuracy (%)")
plt.title("Previous vs Current Accuracy (Top-1 / Top-3 / Top-5)")
plt.xticks([1, 3, 5])
plt.ylim(55, 100)
plt.grid(True, linestyle="--", alpha=0.3)
plt.legend()
plt.tight_layout()
plt.show()
