In [1]:
from langsmith import Client
from langsmith.utils import LangSmithNotFoundError

# Your questions and answers
# Define questions and answers for the golden dataset
question_answers = {
    # Basic Retrieval
    
    # Comparisons
    "Which regions had both a population increase and a gender ratio improvement from start to end?":
        "Prague, Central Bohemia, South Moravia",  #NEW
    "What is the total number of regions where the absolute difference between start and end population exceeded 2000?":
        "8",  #NEW
    "Which region had the highest standard deviation of population changes across the period?":
        "Prague",  #NEW
    
    # Aggregations
    "What is the median population size of regions that experienced a population increase?":
        "1227503",  #NEW
    "What is the weighted average population change rate across all regions?":
        "-0.03",  #NEW
    
    # Temporal/Deltas
    "What is the average population change rate per month for regions that lost population?":
        "-0.12",  #NEW
    "How many regions had a population change pattern opposite to the national trend?":
        "3",  #NEW
    
    # Demographic Ratios
    "What is the standard deviation of the gender ratio across all regions at mid-period?":
        "0.034",  #NEW
    "Which region had the smallest standard deviation of gender ratio across the period?":
        "Vysočina",  #NEW
    "How many regions had a gender ratio more than 2 standard deviations from the mean?":
        "0",  #NEW
    
    # Complex Joins/Logic
    "How many regions had both male and female population increases with growth rate difference > 0.5%?":
        "2",  #NEW
    "How many regions had a higher percentage of women at period end despite population decrease?":
        "2",  #NEW
    "What is the total number of regions where the absolute difference between male and female population changes exceeded 1000?":
        "7",  #NEW
    
    # Statistical Analysis
    "What is the coefficient of variation of population changes across all regions?":
        "0.87",  #NEW
    "What is the correlation coefficient between initial population size and population change rate?":
        "0.62",  #NEW
    "What is the average population momentum for regions with more than 1 million residents?":
        "1.002",  #NEW
    "What is the average demographic transition index for regions with more than 5% of national population?":
        "1.002",  #NEW
    "What is the average population density change rate for regions with both population increase and gender ratio improvement?":
        "0.11",  #NEW
}

ls_client = Client()

# Get or create dataset
try:
    dataset = ls_client.read_dataset(dataset_name="czsu agent problematic2")
    print(f"Dataset '{dataset.name}' found with ID: {dataset.id}")
except LangSmithNotFoundError:
    dataset = ls_client.create_dataset(
        dataset_name="czsu agent problematic2",
        description="Dataset of Czech Statistical Office agent questions and answers."
    )
    print(f"Dataset '{dataset.name}' created with ID: {dataset.id}")

# Fetch existing examples' questions to avoid duplicates
existing_examples = ls_client.list_examples(dataset_id=dataset.id)
existing_questions = set()
for ex in existing_examples:
    q = ex.inputs.get("question")
    if q:
        existing_questions.add(q)

# Prepare only new examples (filter out duplicates)
new_examples = []
for question, answer in question_answers.items():
    if question not in existing_questions:
        new_examples.append({"inputs": {"question": question}, "outputs": {"answers": answer}})

if new_examples:
    ls_client.create_examples(dataset_id=dataset.id, examples=new_examples)
    print(f"Added {len(new_examples)} new examples to dataset '{dataset.name}'.")
else:
    print("No new examples to add; all questions already exist in the dataset.")

Dataset 'czsu agent problematic2' created with ID: 3a539a49-7ecb-4f5c-bd4f-2b8886d3ec6e
Added 18 new examples to dataset 'czsu agent problematic2'.


In [4]:
from langsmith import Client
from langsmith.utils import LangSmithNotFoundError

# Your questions and answers
# Define questions and answers for the golden dataset
question_answers = {
    # Basic Retrieval
    
    # Comparisons
    "Which regions had both a population increase and a gender ratio improvement from start to end?":
        "Prague, Central Bohemia, South Moravia",  #NEW
    "What is the total number of regions where the absolute difference between start and end population exceeded 2000?":
        "8",  #NEW
    "Which region had the highest standard deviation of population changes across the period?":
        "Prague",  #NEW
    
    # Aggregations
    "What is the median population size of regions that experienced a population increase?":
        "1227503",  #NEW
    "What is the weighted average population change rate across all regions?":
        "-0.03",  #NEW
    
    # Temporal/Deltas
    "What is the average population change rate per month for regions that lost population?":
        "-0.12",  #NEW
    "How many regions had a population change pattern opposite to the national trend?":
        "3",  #NEW
    
    # Demographic Ratios
    "What is the standard deviation of the gender ratio across all regions at mid-period?":
        "0.034",  #NEW
    "Which region had the smallest standard deviation of gender ratio across the period?":
        "Vysočina",  #NEW
    "How many regions had a gender ratio more than 2 standard deviations from the mean?":
        "0",  #NEW
    
    # Complex Joins/Logic
    "How many regions had both male and female population increases with growth rate difference > 0.5%?":
        "2",  #NEW
    "How many regions had a higher percentage of women at period end despite population decrease?":
        "11",  #NEW / Adjusted from 2 to 11
    "What is the total number of regions where the absolute difference between male and female population changes exceeded 1000?":
        "7",  #NEW
    
    # Statistical Analysis
    "What is the coefficient of variation of population changes across all regions?":
        "0.87",  #NEW
    "What is the correlation coefficient between initial population size and population change rate?":
        "0.62",  #NEW
    "What is the average population momentum for regions with more than 1 million residents?":
        "1.002",  #NEW
    "What is the average demographic transition index for regions with more than 5% of national population?":
        "1.002",  #NEW
    "What is the average population density change rate for regions with both population increase and gender ratio improvement?":
        "0.11",  #NEW
}

ls_client = Client()

# Get or create dataset
try:
    dataset = ls_client.read_dataset(dataset_name="czsu agent problematic2b")
    print(f"Dataset '{dataset.name}' found with ID: {dataset.id}")
except LangSmithNotFoundError:
    dataset = ls_client.create_dataset(
        dataset_name="czsu agent problematic2b",
        description="Dataset of Czech Statistical Office agent questions and answers."
    )
    print(f"Dataset '{dataset.name}' created with ID: {dataset.id}")

# Fetch existing examples' questions to avoid duplicates
existing_examples = ls_client.list_examples(dataset_id=dataset.id)
existing_questions = set()
for ex in existing_examples:
    q = ex.inputs.get("question")
    if q:
        existing_questions.add(q)

# Prepare only new examples (filter out duplicates)
new_examples = []
for question, answer in question_answers.items():
    if question not in existing_questions:
        new_examples.append({"inputs": {"question": question}, "outputs": {"answers": answer}})

if new_examples:
    ls_client.create_examples(dataset_id=dataset.id, examples=new_examples)
    print(f"Added {len(new_examples)} new examples to dataset '{dataset.name}'.")
else:
    print("No new examples to add; all questions already exist in the dataset.")

Dataset 'czsu agent problematic2b' created with ID: 038e0f73-9559-4744-bdb6-a36c24a11f96
Added 18 new examples to dataset 'czsu agent problematic2b'.


<!-- MEDIUM COMPLEX -->

In [2]:
from langsmith import Client
from langsmith.utils import LangSmithNotFoundError

# Your questions and answers
# Define questions and answers for the golden dataset
question_answers = {
    # Basic Retrieval
    "What was the total population of Prague at the beginning of the period?":
        "1384732",  #NEW
    "What was the mid-period population count for South Moravia?":
        "1225885",  #NEW
    "What was the end-of-period male population in Central Bohemia?":
        "719004",  #NEW
    
    # Simple Comparisons
    "Which regions had more than 1 million residents at mid-period?":
        "Prague, Central Bohemia, South Moravia, Moravian-Silesia",  #NEW
    "List regions where female population at the end was higher than at the start":
        "Prague, Central Bohemia, South Moravia",  #NEW
    "Which regions had a higher mid-period population than their start population?":
        "Prague, Central Bohemia, South Moravia",  #NEW
    
    # Basic Aggregations
    "What was the average mid-period population across all regions?":
        "726034",  #NEW
    "What was the total population of all regions at the end of the period?":
        "10897237",  #NEW
    "How many regions had a mid-period population above 500,000?":
        "13",  #NEW
    
    # Simple Temporal Analysis
    "What was the population change in Prague from start to end of the period?":
        "7393",  #NEW
    "Which region had the largest population decrease from start to end?":
        "Moravian-Silesia",  #NEW
    "What was the total population change across all regions from start to end?":
        "-3318",  #NEW
    
    # Gender-based Analysis
    "What was the gender ratio (women/men) in Prague at mid-period?":
        "1.06",  #NEW
    "Which region had the highest percentage of women at mid-period?":
        "Karlovarský kraj",  #NEW
    "How many regions had more women than men at the end of the period?":
        "14",  #NEW
    
    # Regional Comparisons
    "What was the difference in population between Prague and Central Bohemia at mid-period?":
        "71515",  #NEW
    "Which region had the smallest population at the end of the period?":
        "Karlovarský kraj",  #NEW
    "What was the ratio of Prague's population to the national total at mid-period?":
        "0.13",  #NEW
    
    # Simple Demographic Patterns
    "How many regions experienced a population increase from start to end?":
        "3",  #NEW
    "What was the average population change rate across all regions?":
        "-0.03",  #NEW
    "Which region had the most stable population (smallest absolute change)?":
        "South Moravia",  #NEW
    
    # Basic Statistical Measures
    "What was the median population size across all regions at mid-period?":
        "613109",  #NEW
    "What was the standard deviation of mid-period populations across all regions?":
        "389234",  #NEW
    "What was the range of population sizes at the end of the period?":
        "1098907",  #NEW
    
    # Simple Ratio Analysis
    "What percentage of the total population lived in Prague at mid-period?":
        "12.74",  #NEW
    "What was the average gender ratio across all regions at mid-period?":
        "1.04",  #NEW
    "What was the ratio of the largest to smallest region population at mid-period?":
        "4.73",  #NEW
    
    # Basic Trend Analysis
    "How many regions had a higher end population than mid-period population?":
        "3",  #NEW
    "What was the average population change from mid-period to end?":
        "872",  #NEW
    "Which region had the most consistent population growth throughout the period?":
        "Prague",  #NEW
}

ls_client = Client()

# Get or create dataset
try:
    dataset = ls_client.read_dataset(dataset_name="czsu agent problematic2c")
    print(f"Dataset '{dataset.name}' found with ID: {dataset.id}")
except LangSmithNotFoundError:
    dataset = ls_client.create_dataset(
        dataset_name="czsu agent problematic2c",
        description="Dataset of Czech Statistical Office agent questions and answers."
    )
    print(f"Dataset '{dataset.name}' created with ID: {dataset.id}")

# Fetch existing examples' questions to avoid duplicates
existing_examples = ls_client.list_examples(dataset_id=dataset.id)
existing_questions = set()
for ex in existing_examples:
    q = ex.inputs.get("question")
    if q:
        existing_questions.add(q)

# Prepare only new examples (filter out duplicates)
new_examples = []
for question, answer in question_answers.items():
    if question not in existing_questions:
        new_examples.append({"inputs": {"question": question}, "outputs": {"answers": answer}})

if new_examples:
    ls_client.create_examples(dataset_id=dataset.id, examples=new_examples)
    print(f"Added {len(new_examples)} new examples to dataset '{dataset.name}'.")
else:
    print("No new examples to add; all questions already exist in the dataset.")

Dataset 'czsu agent problematic2c' created with ID: e700e593-5cbf-4524-898c-87fba572105f
Added 30 new examples to dataset 'czsu agent problematic2c'.
