# Benchmark Pickle Viewer

This notebook loads and displays the contents of the benchmarks pickle file.

In [None]:
import pickle
import pandas as pd
import sys
from pathlib import Path

# Add the project root to the Python path
sys.path.append('.')
from src.excel_processor import Benchmark

## Load the pickle file

In [None]:
# Load the benchmarks from the pickle file
# Use absolute path to ensure the file can be found
current_dir = Path.cwd()
pickle_path = current_dir / "data" / "processed" / "benchmarks.pkl"

print(f"Looking for pickle file at: {pickle_path}")

if pickle_path.exists():
    with open(pickle_path, 'rb') as f:
        benchmarks = pickle.load(f)
    print(f"Total benchmarks: {len(benchmarks)}")
else:
    print(f"Pickle file not found at {pickle_path}")
    # Try alternative paths
    alt_path = Path("data/processed/benchmarks.pkl")
    if alt_path.exists():
        with open(alt_path, 'rb') as f:
            benchmarks = pickle.load(f)
        print(f"Loaded from alternative path. Total benchmarks: {len(benchmarks)}")

## View sample benchmarks

In [None]:
# Display the first 5 benchmarks
for i, (key, value) in enumerate(list(benchmarks.items())[:5]):
    print(f"\n{i+1}. Benchmark ID: {key}")
    print(f"   Grade Level: {value.grade_level}")
    print(f"   Subject: {value.subject}")
    print(f"   Definition: {value.definition[:100]}..." if len(value.definition) > 100 else f"   Definition: {value.definition}")

## Convert to DataFrame for easier viewing

In [None]:
# Convert the benchmarks to a DataFrame
data = []
for key, value in benchmarks.items():
    data.append({
        "ID": key,
        "Grade Level": value.grade_level,
        "Subject": value.subject,
        "Definition": value.definition
    })
    
df = pd.DataFrame(data)

# Display the DataFrame
df.head()

## Filter by Grade Level

In [None]:
# Get unique grade levels
grade_levels = df["Grade Level"].unique()
print("Available grade levels:")
print(grade_levels)

In [None]:
# Filter by a specific grade level (change as needed)
grade_to_filter = "K"  # Change this to filter by different grades

filtered_df = df[df["Grade Level"] == grade_to_filter]
print(f"Found {len(filtered_df)} benchmarks for grade {grade_to_filter}")
filtered_df.head()

## Search by Benchmark ID

In [None]:
# Search for benchmarks by ID pattern
search_term = "MA.K"  # Change this to search for different patterns

search_results = df[df["ID"].str.contains(search_term)]
print(f"Found {len(search_results)} benchmarks matching '{search_term}'")
search_results.head()

## View a specific benchmark in detail

In [None]:
# View a specific benchmark (change the ID as needed)
benchmark_id = list(benchmarks.keys())[0]  # First benchmark by default

if benchmark_id in benchmarks:
    benchmark = benchmarks[benchmark_id]
    print(f"Benchmark ID: {benchmark_id}")
    print(f"Grade Level: {benchmark.grade_level}")
    print(f"Subject: {benchmark.subject}")
    print(f"\nDefinition:")
    print(benchmark.definition)
else:
    print(f"Benchmark {benchmark_id} not found")