# Gold standard judge dataset
This is the equivalent of SME labelling

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from dspy_judge.llm_caller.utils import load_secrets
from dspy_judge.data_loader.dataset_loader import CustomerSupportDatasetLoader
from dspy_judge.processor.parallel_processor import ParallelProcessor
from dspy_judge.prompts.dspy_signatures import SupportTranscriptJudge
from dspy_judge.processor.utils import convert_dataset_to_dspy_examples, extract_llm_response_fields_dspy
from dspy_judge.processor.parallel_processor import ParallelProcessor
from dspy_judge.metrics import match_judge_metric
from dspy_judge.plotting import plot_judge_results
import numpy as np
from sklearn.metrics import cohen_kappa_score
import dspy

In [None]:
secrets = load_secrets()

## Load split dataset (final output of notebook 1)

In [None]:
data_loader = CustomerSupportDatasetLoader()
split_dataset = data_loader.load_local_dataset("datasets/preprocessed_dev_judge_dataset_split/")

In [None]:
dev_dataset = split_dataset["train"]
judge_dataset = split_dataset["test"]

## Set up gold standard judge

In [None]:
gold_standard_judge_generator_module = dspy.ChainOfThought(SupportTranscriptJudge)

In [None]:
dspy_config = {
      "model_name": "anthropic/claude-opus-4-20250514",
      "api_key": secrets["ANTHROPIC_API_KEY"],
      "temperature": 0
}

gold_standard_judge_generator_module = dspy.ChainOfThought(SupportTranscriptJudge)

gold_standard_dspy_judge_processor = ParallelProcessor()

dspy_judge_results_optimized = gold_standard_dspy_judge_processor.process_dataset_with_dspy(
  judge_dataset.select_columns(
    ["conversation_id","output_transcript"]
  ),
  input_field="output_transcript",
  dspy_module=gold_standard_judge_generator_module,
  dspy_config=dspy_config
)

In [None]:
dspy_judge_results_optimized.to_pandas().iloc[0]["dspy_response"]

In [None]:
gold_standard_dspy_judge_results = dspy_judge_results_optimized.map(
    extract_llm_response_fields_dspy
)

In [None]:
gold_standard_dspy_judge_results

In [None]:
plot_judge_results(
    gold_standard_dspy_judge_results.to_pandas()
)

## Save gold standard result

In [None]:
data_loader.save_dataset_locally(
    gold_standard_dspy_judge_results,"datasets/gold_standard_judge_result"
)