In [None]:
import json
import uuid
from datetime import datetime, timezone

try:
    spark
except NameError as e:
    raise RuntimeError('This notebook must be run on a Spark cluster (Databricks) with an active `spark` session.') from e

now = datetime.now(timezone.utc)
episode_id = str(uuid.uuid4())
episode_version = 1
evaluation_id = str(uuid.uuid4())
print('episode_id:', episode_id)
print('evaluation_id:', evaluation_id)

In [None]:
episode_row = {
    'episode_id': episode_id,
    'episode_version': episode_version,
    'created_at': now,
    'started_at': now,
    'ended_at': now,
    'duration_ms': 0,
    'status': 'successful',
    'workflow_name': 'seed_workflow',
    'model_name': 'seed_model',
    'model_version': None,
    'agent_version': 'dev',
    'is_golden': True,
    'golden_template_id': 'golden_seed_001',
    'source': 'seed_notebook',
    'source_ref': None,
    'inputs_json': json.dumps({'query': 'What is the capital of France?'}),
    'expected_outputs_json': json.dumps({'answer': 'Paris'}),
    'actual_outputs_json': json.dumps({'answer': 'Paris'}),
    'metadata_json': json.dumps({'note': 'minimal seed'}),
    'cost_usd': None,
    'cost_input_usd': None,
    'cost_output_usd': None,
    'input_tokens': 12,
    'output_tokens': 3,
    'total_tokens': 15,
}

steps_rows = [
    {
        'episode_id': episode_id,
        'episode_version': episode_version,
        'step_index': 0,
        'step_type': 'user',
        'step_name': 'question',
        'content': 'What is the capital of France?',
        'created_at': now,
        'tokens_in': None,
        'tokens_out': None,
        'total_tokens': None,
        'latency_ms': None,
        'score': None,
        'failure_type': None,
        'invariant_violated': None,
        'metadata_json': None,
    },
    {
        'episode_id': episode_id,
        'episode_version': episode_version,
        'step_index': 1,
        'step_type': 'assistant',
        'step_name': 'answer',
        'content': 'Paris.',
        'created_at': now,
        'tokens_in': 12,
        'tokens_out': 3,
        'total_tokens': 15,
        'latency_ms': None,
        'score': 1.0,
        'failure_type': None,
        'invariant_violated': None,
        'metadata_json': None,
    },
]

evaluation_row = {
    'evaluation_id': evaluation_id,
    'episode_id': episode_id,
    'episode_version': episode_version,
    'evaluated_at': now,
    'evaluator_name': 'ai_workflow_evaluator',
    'evaluator_version': 'dev',
    'mlflow_run_id': None,
    'match_outcome': 'match',
    'overall_score': 1.0,
    'drift_score': 0.0,
    'coherence_score': 1.0,
    'idempotency_score': 1.0,
    'artifact_uri': None,
    'metrics_json': json.dumps({'seed': True}),
}

spark.createDataFrame([episode_row]).write.mode('append').saveAsTable('datamodel_db.episode')
spark.createDataFrame(steps_rows).write.mode('append').saveAsTable('datamodel_db.episode_steps')
spark.createDataFrame([evaluation_row]).write.mode('append').saveAsTable('datamodel_db.episode_evaluation')

print('âœ“ Seeded one episode, two steps, one evaluation')


In [None]:
spark.sql('SELECT * FROM datamodel_db.episode ORDER BY created_at DESC LIMIT 5').show(truncate=False)

spark.sql(
    f"SELECT * FROM datamodel_db.episode_steps WHERE episode_id = '{episode_id}' ORDER BY step_index"
).show(truncate=False)

spark.sql(
    f"SELECT * FROM datamodel_db.episode_evaluation WHERE episode_id = '{episode_id}' ORDER BY evaluated_at DESC"
).show(truncate=False)
