# CP1 Evaluation Performance

Lets see how 

In [None]:
# Import necessary modules
import sys
import os
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt

# Get the absolute path of the project root directory
notebook_dir = Path(os.getcwd())  
project_root = notebook_dir.parent.parent  # Go up TWO levels instead of one

# Add project root to Python path
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))
    print(f"Added {project_root} to sys.path")

from scripts.climate_policy_pipelines.cp1.pipeline import run_cp1a_assessment
from scripts.climate_policy_pipelines.cp1.pipeline import run_cp1a_assessment_large_context
from scripts.climate_policy_pipelines.cp1.pipeline import run_cp1b_assessment

In [None]:
# Load ASCOR data

ascor_ground_truth = read_xlsx("Notebooks\\Evaluation\\ASCOR_assessments_results.xlsx")


# Run Assement 


In [None]:
countries = ascor_ground_truth["Country"].unique().tolist()[:10]

results_data = []

for country in countries:
    # Get ASCOR ground truth for this country
    ascor_result = ascor_ground_truth[ascor_ground_truth["Country"] == country]
    
    # Run CP1A assessment and capture result
    cp1a_rag = run_cp1a_assessment(country, detailed=False, print_results=False)
    cp1a_large_context_rag = run_cp1a_assessment_large_context(country, detailed=False, print_results=False)
    cp1b_rag = run_cp1b_assessment(country, detailed=False, print_results=False)  # Uncomment if needed
    
    # Store results (adjust column names based on your ASCOR data structure)
    results_data.append({
        'Country': country,
        'ASCOR_True': ascor_result.iloc[0]['Assessment_Column'],  # Replace with actual column name
        'CP1A_Assessment': cp1a_rag, # Assuming this returns the assessment score/result
        'CP1A_Large_Context_Assessment': cp1a_large_context_rag,  # Assuming this returns the assessment score/result
        'CP1B_Assessment': cp1b_rag  # Uncomment if needed
    })


In [None]:
comparison_df = pd.DataFrame(results_data)

# Calculate accuracy for each assessment method
cp1a_accuracy = (comparison_df['ASCOR_True'] == comparison_df['CP1A_Assessment']).mean()
cp1a_large_accuracy = (comparison_df['ASCOR_True'] == comparison_df['CP1A_Large_Context_Assessment']).mean()
cp1b_accuracy = (comparison_df['ASCOR_True'] == comparison_df['CP1B_Assessment']).mean()

print(f"CP1A Accuracy: {cp1a_accuracy:.2%}")
print(f"CP1A Large Context Accuracy: {cp1a_large_accuracy:.2%}")
print(f"CP1B Accuracy: {cp1b_accuracy:.2%}")

# Create visualization
methods = ['CP1A', 'CP1A Large Context', 'CP1B']
accuracies = [cp1a_accuracy, cp1a_large_accuracy, cp1b_accuracy]

plt.figure(figsize=(10, 6))
bars = plt.bar(methods, accuracies, color=['skyblue', 'lightgreen', 'coral'])
plt.title('Assessment Method Accuracy Comparison')
plt.ylabel('Accuracy')
plt.ylim(0, 1)

# Add percentage labels on bars
for bar, acc in zip(bars, accuracies):
    plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01, 
             f'{acc:.1%}', ha='center', va='bottom')

plt.tight_layout()
plt.show()

# Display comparison DataFrame
print("\nDetailed Results:")
print(comparison_df)