In [None]:
#https://colab.research.google.com/github/Arize-ai/phoenix/blob/datasets/tutorials/experiments/txt2sql.ipynb#scrollTo=1HPMpWWkdFAs

In [1]:
# Launch Phoenix
import phoenix as px
px.launch_app()

🌍 To view the Phoenix app in your browser, visit http://localhost:6006/
📖 For more information on how to use Phoenix, check out https://docs.arize.com/phoenix


<phoenix.session.session.ThreadSession at 0x1ab011576d0>

In [2]:
# Start tracing for OpenAI. 
# We will repurpose the OpenAI to use Ollama.
from phoenix.trace.openai import OpenAIInstrumentor
OpenAIInstrumentor().instrument()

In [3]:
import nest_asyncio
nest_asyncio.apply()

In [4]:
# Load the data
import duckdb
from datasets import load_dataset
data = load_dataset("suzyanil/nba-data")["train"]

In [5]:
# Change the date to correct format
import pandas as pd
df = data.to_pandas()
df['Date'] = pd.to_datetime(df['Date'])
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9840 entries, 0 to 9839
Data columns (total 41 columns):
 #   Column                    Non-Null Count  Dtype         
---  ------                    --------------  -----         
 0   Unnamed: 0                9840 non-null   int64         
 1   Team                      9840 non-null   object        
 2   Game                      9840 non-null   int64         
 3   Date                      9840 non-null   datetime64[ns]
 4   Home                      9840 non-null   object        
 5   Opponent                  9840 non-null   object        
 6   WINorLOSS                 9840 non-null   object        
 7   TeamPoints                9840 non-null   int64         
 8   OpponentPoints            9840 non-null   int64         
 9   FieldGoals                9840 non-null   int64         
 10  FieldGoalsAttempted       9840 non-null   int64         
 11  FieldGoals.               9840 non-null   float64       
 12  X3PointShots        

  df['Date'] = pd.to_datetime(df['Date'])


In [6]:
# Create connection to the dataframe 
conn = duckdb.connect(database=":memory:", read_only=False)
conn.register("nba", df)

conn.query("SELECT * FROM nba limit 5").to_df().to_dict(orient="records")[0]

{'Unnamed: 0': 1,
 'Team': 'ATL',
 'Game': 1,
 'Date': Timestamp('2014-10-29 00:00:00'),
 'Home': 'Away',
 'Opponent': 'TOR',
 'WINorLOSS': 'L',
 'TeamPoints': 102,
 'OpponentPoints': 109,
 'FieldGoals': 40,
 'FieldGoalsAttempted': 80,
 'FieldGoals.': 0.5,
 'X3PointShots': 13,
 'X3PointShotsAttempted': 22,
 'X3PointShots.': 0.591,
 'FreeThrows': 9,
 'FreeThrowsAttempted': 17,
 'FreeThrows.': 0.529,
 'OffRebounds': 10,
 'TotalRebounds': 42,
 'Assists': 26,
 'Steals': 6,
 'Blocks': 8,
 'Turnovers': 17,
 'TotalFouls': 24,
 'Opp.FieldGoals': 37,
 'Opp.FieldGoalsAttempted': 90,
 'Opp.FieldGoals.': 0.411,
 'Opp.3PointShots': 8,
 'Opp.3PointShotsAttempted': 26,
 'Opp.3PointShots.': 0.308,
 'Opp.FreeThrows': 27,
 'Opp.FreeThrowsAttempted': 33,
 'Opp.FreeThrows.': 0.818,
 'Opp.OffRebounds': 16,
 'Opp.TotalRebounds': 48,
 'Opp.Assists': 26,
 'Opp.Steals': 13,
 'Opp.Blocks': 9,
 'Opp.Turnovers': 9,
 'Opp.TotalFouls': 22}

# Implement txt to sql

In [7]:
import os
import openai
client = openai.AsyncClient(base_url="http://localhost:11434/v1/", api_key="ollama") # repurpose the client to use Ollama

In [8]:
columns = conn.query("DESCRIBE nba").to_df().to_dict(orient="records")
columns # get the columns info. This will be used as part of the system prompt.

[{'column_name': 'Unnamed: 0',
  'column_type': 'BIGINT',
  'null': 'YES',
  'key': None,
  'default': None,
  'extra': None},
 {'column_name': 'Team',
  'column_type': 'VARCHAR',
  'null': 'YES',
  'key': None,
  'default': None,
  'extra': None},
 {'column_name': 'Game',
  'column_type': 'BIGINT',
  'null': 'YES',
  'key': None,
  'default': None,
  'extra': None},
 {'column_name': 'Date',
  'column_type': 'TIMESTAMP_NS',
  'null': 'YES',
  'key': None,
  'default': None,
  'extra': None},
 {'column_name': 'Home',
  'column_type': 'VARCHAR',
  'null': 'YES',
  'key': None,
  'default': None,
  'extra': None},
 {'column_name': 'Opponent',
  'column_type': 'VARCHAR',
  'null': 'YES',
  'key': None,
  'default': None,
  'extra': None},
 {'column_name': 'WINorLOSS',
  'column_type': 'VARCHAR',
  'null': 'YES',
  'key': None,
  'default': None,
  'extra': None},
 {'column_name': 'TeamPoints',
  'column_type': 'BIGINT',
  'null': 'YES',
  'key': None,
  'default': None,
  'extra': None},
 

In [9]:
TASK_MODEL = "phi3"
CONFIG = {"model": TASK_MODEL}

# SIMPLE PROMPT

In [10]:
system_prompt = (
    "You are a SQL expert, and you are given a single table named nba with the following columns:\n"
    f"""{",".join(column["column_name"] + ": " + column["column_type"] for column in columns)}\n"""
    "Write a SQL query corresponding to the user's request. Return just the query text, "
    "with no formatting (backticks, markdown, etc.)."
)

In [11]:
# LLM function
async def generate_query(input):
    response = await client.chat.completions.create(
        model=TASK_MODEL,
        temperature=0,
        messages=[
            {
                "role": "system",
                "content": system_prompt,
            },
            {
                "role": "user",
                "content": input,
            },
        ],
    )
    return response.choices[0].message.content

In [12]:
query = await generate_query("Who won the most games?")
print(query)

 SELECT Team AS Winner FROM nba WHERE WinorLOSS = 'WIN' GROUP BY Team ORDER BY COUNT(*) DESC LIMIT 1;
Please note that this assumes a simplified representation of game outcomes and does not account for ties or other complexities in real-world data. The actual query might need adjustments based on the specific SQL dialect being used (e.g., MySQL, PostgreSQL) and additional details about how games are recorded if there's more than one per day.


In [13]:
# SQL function
def execute_query(query):
    return conn.query(query).fetchdf().to_dict(orient="records")


execute_query("SELECT Team AS Winner FROM nba WHERE WINorLOSS = 'W' GROUP BY Team ORDER BY COUNT(*) DESC LIMIT 1;")

[{'Winner': 'GSW'}]

In [14]:
conn.query("SELECT Team AS Winner, count(*) FROM nba WHERE WINorLOSS = 'W' GROUP BY Team ORDER BY COUNT(*) DESC").to_df().head()

Unnamed: 0,Winner,count_star()
0,GSW,265
1,SAS,230
2,HOU,217
3,TOR,215
4,CLE,211


In [15]:
# Test questions
questions = [
    "Which team won the most games?",
    "Which team won the most games in 2015?",
    "Who led the league in 3 point shots?",
    "Which team had the biggest difference in records across two consecutive years?",
    "What is the average number of free throws per year?",
]

In [16]:
# create a dataset in phonnix
import pandas as pd
# store the data above as a versioned dataset in phoenix.

ds = px.Client().upload_dataset(
    dataset_name="nba-questions", # name of dataset
    dataframe=pd.DataFrame({"question": questions}), # dataframe to load
    input_keys=["question"], # column that has the questions
)

# If you have already uploaded the dataset, you can fetch it using the following line
# ds = px.Client().get_dataset(name="nba-questions")

📤 Uploading dataset...
💾 Examples uploaded: http://localhost:6006/datasets/RGF0YXNldDox/examples
🗄️ Dataset version ID: RGF0YXNldFZlcnNpb246MQ==


In [17]:
# function for the generating sql from text and checking if there was any error
async def text2sql(question):
    query = await generate_query(question)
    results = None
    error = None
    try:
        results = execute_query(query)
    except duckdb.Error as e:
        error = str(e)

    return {
        "query": query,
        "results": results,
        "error": error,
    }

In [18]:
# Test if there are no sql execution errors
def no_error(output):
    return 1.0 if output.get("error") is None else 0.0


# Test if the query has results
def has_results(output):
    results = output.get("results")
    has_results = results is not None and len(results) > 0
    return 1.0 if has_results else 0.0

In [19]:
# running and experiment for all the question and checking if there is error and if any result is generated by sql
from phoenix.experiments import run_experiment


# Define the task to run text2sql on the input question
def task(input):
    return text2sql(input["question"])


experiment = run_experiment(
    ds, task=task, evaluators=[no_error, has_results], experiment_metadata=CONFIG)

🧪 Experiment started.
📺 View dataset experiments: http://localhost:6006/datasets/RGF0YXNldDox/experiments
🔗 View this experiment: http://localhost:6006/datasets/RGF0YXNldDox/compare?experimentId=RXhwZXJpbWVudDox


running tasks |          | 0/5 (0.0%) | ⏳ 00:00<? | ?it/s

✅ Task runs completed.
🧠 Evaluation started.


running experiment evaluations |          | 0/10 (0.0%) | ⏳ 00:00<? | ?it/s


🔗 View this experiment: http://localhost:6006/datasets/RGF0YXNldDox/compare?experimentId=RXhwZXJpbWVudDox

Experiment Summary (07/22/24 03:17 PM -0400)
--------------------------------------------
     evaluator  n  n_scores  avg_score
0  has_results  5         5        0.2
1     no_error  5         5        0.4

Tasks Summary (07/22/24 03:17 PM -0400)
---------------------------------------
   n_examples  n_runs  n_errors
0           5       5         0


# Advanced

In [20]:
# A more advanced representation of the schema for the prompt
samples = conn.query("SELECT * FROM nba LIMIT 1").to_df().to_dict(orient="records")[0]

sample_rows = "\n".join(
    f"{column['column_name']} | {column['column_type']} | {samples[column['column_name']]}"
    for column in columns
)

print(sample_rows)

Unnamed: 0 | BIGINT | 1
Team | VARCHAR | ATL
Game | BIGINT | 1
Date | TIMESTAMP_NS | 2014-10-29 00:00:00
Home | VARCHAR | Away
Opponent | VARCHAR | TOR
WINorLOSS | VARCHAR | L
TeamPoints | BIGINT | 102
OpponentPoints | BIGINT | 109
FieldGoals | BIGINT | 40
FieldGoalsAttempted | BIGINT | 80
FieldGoals. | DOUBLE | 0.5
X3PointShots | BIGINT | 13
X3PointShotsAttempted | BIGINT | 22
X3PointShots. | DOUBLE | 0.591
FreeThrows | BIGINT | 9
FreeThrowsAttempted | BIGINT | 17
FreeThrows. | DOUBLE | 0.529
OffRebounds | BIGINT | 10
TotalRebounds | BIGINT | 42
Assists | BIGINT | 26
Steals | BIGINT | 6
Blocks | BIGINT | 8
Turnovers | BIGINT | 17
TotalFouls | BIGINT | 24
Opp.FieldGoals | BIGINT | 37
Opp.FieldGoalsAttempted | BIGINT | 90
Opp.FieldGoals. | DOUBLE | 0.411
Opp.3PointShots | BIGINT | 8
Opp.3PointShotsAttempted | BIGINT | 26
Opp.3PointShots. | DOUBLE | 0.308
Opp.FreeThrows | BIGINT | 27
Opp.FreeThrowsAttempted | BIGINT | 33
Opp.FreeThrows. | DOUBLE | 0.818
Opp.OffRebounds | BIGINT | 16
Opp.

In [21]:
system_prompt = (
    "You are a SQL expert, and you are given a single table named nba with the following columns:\n\n"
    "Column | Type | Example\n"
    "-------|------|--------\n"
    f"{sample_rows}\n"
    "\n"
    "Write a DuckDB SQL query corresponding to the user's request. "
    "Return just the query text, with no formatting (backticks, markdown, etc.)."
)

print(system_prompt)

You are a SQL expert, and you are given a single table named nba with the following columns:

Column | Type | Example
-------|------|--------
Unnamed: 0 | BIGINT | 1
Team | VARCHAR | ATL
Game | BIGINT | 1
Date | TIMESTAMP_NS | 2014-10-29 00:00:00
Home | VARCHAR | Away
Opponent | VARCHAR | TOR
WINorLOSS | VARCHAR | L
TeamPoints | BIGINT | 102
OpponentPoints | BIGINT | 109
FieldGoals | BIGINT | 40
FieldGoalsAttempted | BIGINT | 80
FieldGoals. | DOUBLE | 0.5
X3PointShots | BIGINT | 13
X3PointShotsAttempted | BIGINT | 22
X3PointShots. | DOUBLE | 0.591
FreeThrows | BIGINT | 9
FreeThrowsAttempted | BIGINT | 17
FreeThrows. | DOUBLE | 0.529
OffRebounds | BIGINT | 10
TotalRebounds | BIGINT | 42
Assists | BIGINT | 26
Steals | BIGINT | 6
Blocks | BIGINT | 8
Turnovers | BIGINT | 17
TotalFouls | BIGINT | 24
Opp.FieldGoals | BIGINT | 37
Opp.FieldGoalsAttempted | BIGINT | 90
Opp.FieldGoals. | DOUBLE | 0.411
Opp.3PointShots | BIGINT | 8
Opp.3PointShotsAttempted | BIGINT | 26
Opp.3PointShots. | DOUBLE 

In [22]:
# function to generate query
async def generate_query(input):
    response = await client.chat.completions.create(
        model=TASK_MODEL,
        temperature=0,
        messages=[
            {
                "role": "system",
                "content": system_prompt,
            },
            {
                "role": "user",
                "content": input,
            },
        ],
    )
    return response.choices[0].message.content


print(await generate_query("Which team won the most games in 2015?"))

 SELECT Team FROM nba WHERE YEAR(Date) = '2015' GROUP BY Team ORDER BY COUNT(*) DESC LIMIT 1;
Please note that this query assumes there is a column named `Year` extracted from the `Date` column, which was not present in your table description. If such functionality does not exist natively within DuckDB or if you prefer to avoid creating an additional computed column for year extraction, here's how it could be done:

```sql
SELECT Team FROM (
    SELECT 
        Team, COUNT(*) AS GamesWonIn2015
    FROM nba
    WHERE EXTRACT(YEAR FROM Date) = '2015' AND WINorLOSS = 'W'
    GROUP BY Team
) AS SubQuery
ORDER BY GamesWonIn2015 DESC LIMIT 1;
```


In [23]:
# running the experiment on the dataset with same evaluators
experiment = run_experiment(
    ds, task=task, evaluators=[has_results, no_error], experiment_metadata=CONFIG
)

🧪 Experiment started.
📺 View dataset experiments: http://localhost:6006/datasets/RGF0YXNldDox/experiments
🔗 View this experiment: http://localhost:6006/datasets/RGF0YXNldDox/compare?experimentId=RXhwZXJpbWVudDoy


running tasks |          | 0/5 (0.0%) | ⏳ 00:00<? | ?it/s

✅ Task runs completed.
🧠 Evaluation started.


running experiment evaluations |          | 0/10 (0.0%) | ⏳ 00:00<? | ?it/s


🔗 View this experiment: http://localhost:6006/datasets/RGF0YXNldDox/compare?experimentId=RXhwZXJpbWVudDoy

Experiment Summary (07/22/24 03:18 PM -0400)
--------------------------------------------
     evaluator  n  n_scores  avg_score
0  has_results  5         5        0.6
1     no_error  5         5        0.6

Tasks Summary (07/22/24 03:18 PM -0400)
---------------------------------------
   n_examples  n_runs  n_errors
0           5       5         0


In [24]:
from phoenix.evals.models import OpenAIModel
from phoenix.experiments import evaluate_experiment
from phoenix.experiments.evaluators.llm_evaluators import LLMCriteriaEvaluator

llm_evaluator = LLMCriteriaEvaluator(
    name="is_sql",
    criteria="is_sql",
    description="the output is a valid SQL query and that it executes without errors",
    model=OpenAIModel(model='phi3', base_url='http://localhost:11434/v1', api_key='ollama'),
)

evaluate_experiment(experiment, evaluators=[llm_evaluator])

🧠 Evaluation started.


running experiment evaluations |          | 0/5 (0.0%) | ⏳ 00:00<? | ?it/s

[91mTraceback (most recent call last):
  File "c:\Users\Jigar\anaconda3\envs\langchain\Lib\site-packages\phoenix\experiments\functions.py", line 560, in async_evaluate_run
    result = await evaluator.async_evaluate(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Jigar\anaconda3\envs\langchain\Lib\site-packages\phoenix\experiments\evaluators\llm_evaluators.py", line 61, in async_evaluate
    return self._parse_eval_output(unparsed_response)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Jigar\anaconda3\envs\langchain\Lib\site-packages\phoenix\experiments\evaluators\llm_evaluators.py", line 79, in _parse_eval_output
    raise RuntimeError(f"Could not parse LLM evaluation: {unparsed_response}")
RuntimeError: Could not parse LLM evaluation:  CRITERIA: the text is 'is_sql'
TEXT: {'query': ' SELECT AVG(FreeThrows) AS AvgFreeThrowsPerYear\nFROM nba;', 'results': [{'AvgFreeThrowsPerYear': 17.320630081300813}], 'error': None}
EXPLANATION: The provided te

RanExperiment(id='RXhwZXJpbWVudDoy', dataset_id='RGF0YXNldDox', dataset_version_id='RGF0YXNldFZlcnNpb246MQ==', repetitions=1)

In [25]:
client

<openai.AsyncOpenAI at 0x1ab037ecd90>

In [26]:
# Generate Dataimport json

from pydantic import BaseModel
import json

class Question(BaseModel):
    sql: str
    question: str


class Questions(BaseModel):
    questions: list[Question]

schema = "\n".join(f"{column['column_name']} | {column['column_type']} | {samples[column['column_name']]}" for column in columns)


synthetic_data_prompt = f"""
You are a SQL expert, and you are given a single table named nba with the following columns:

Column | Type | Example
-------|------|--------
{schema}

Generate SQL queries that would be interesting to ask about this table. Return the SQL query as a string, as well as the question that the query answers."""

response = await client.chat.completions.create(
    model=TASK_MODEL,
    temperature=0,
    messages=[
        {
            "role": "user",
            "content": synthetic_data_prompt,
        }
    ],
    tools=[
        {
            "type": "function",
            "function": {
                "name": "generate_questions",
                "description": "Generate SQL queries that would be interesting to ask about this table.",
                "parameters": Questions.model_json_schema(),
            },
        }
    ],
    tool_choice={"type": "function", "function": {"name": "generate_questions"}},
)


generated_questions = response.choices[0].message.content

print(generated_questions)



 Here are some possible questions and their corresponding SQL queries:

1. What is the total number of points scored by each team in all games? 
   Query: SELECT Team, SUM(TeamPoints) AS TotalPoints FROM nba GROUP BY Team;
   Question: How many points did each NBA team score across all games played during a specific season or period?
   
2. Which teams have the highest field goal percentage (FG%) in this table and what are their FG% values? 
   Query: SELECT Team, ROUND(SUM(FieldGoals) / SUM(FieldGoalsAttempted), 3) AS FieldGoalPercentage FROM nba GROUP BY Team ORDER BY FieldGoalPercentage DESC LIMIT 5;
   Question: Which NBA teams have the highest field goal percentage in this dataset and what are their respective FG% values?
   
3. How many three-point shots did each team attempt, make, and how does it compare to other players or seasons (if applicable)? 
   Query: SELECT Team, SUM(X3PointShotsAttempted) AS Attempts, SUM(X3PointShots) AS Made FROM nba GROUP BY Team;
   Question: How 

In [27]:
import re

final_question = []

for item in re.split(r'\d\.', generated_questions):
    if item.strip().startswith(('What', 'Which', 'How')):
        sent_pieces = (re.split(r'\n', item))
        original_task = sent_pieces[0].strip()
        query = [q1 for q1 in [re.findall(r'(Query:)(.*)', st) for st in sent_pieces] if len(q1)>0][0][0][1].strip()
        question = [q2 for q2 in [re.findall(r'(Question:)(.*)', st) for st in sent_pieces] if len(q2)>0][0][0][1].strip()
        output_dict = {
            'task': original_task,
            'sql': query,
            'question': question
        }
        final_question.append(output_dict)

       


In [28]:
final_question

[{'task': 'What is the total number of points scored by each team in all games?',
  'sql': 'SELECT Team, SUM(TeamPoints) AS TotalPoints FROM nba GROUP BY Team;',
  'question': 'How many points did each NBA team score across all games played during a specific season or period?'},
 {'task': 'Which teams have the highest field goal percentage (FG%) in this table and what are their FG% values?',
  'sql': 'SELECT Team, ROUND(SUM(FieldGoals) / SUM(FieldGoalsAttempted), 3) AS FieldGoalPercentage FROM nba GROUP BY Team ORDER BY FieldGoalPercentage DESC LIMIT 5;',
  'question': 'Which NBA teams have the highest field goal percentage in this dataset and what are their respective FG% values?'},
 {'task': 'How many three-point shots did each team attempt, make, and how does it compare to other players or seasons (if applicable)?',
  'sql': 'SELECT Team, SUM(X3PointShotsAttempted) AS Attempts, SUM(X3PointShots) AS Made FROM nba GROUP BY Team;',
  'question': 'How many three-point shots did each NBA

In [29]:
generated_dataset = []
for q in final_question:
    try:
        result = execute_query(q["sql"])
        generated_dataset.append(
            {
                "input": q["question"],
                "expected": {
                    "results": result,
                    "error": None,
                    "query": q["sql"],
                },
                "metadata": {
                    "category": "Generated",
                },
            }
        )
    except duckdb.Error as e:
        print(f"Query failed: {q['sql']}", e)
        print("Skipping...")

generated_dataset[0]

Query failed: SELECT PlayerName, ROUND(SUM(FreeThrows) / SUM(Turnovers), 3) AS FreeThrowPercentage FROM nba WHERE Turnover = 'SpecificPlayer' GROUP BY PlayerName ORDER BY FreeThrowPercentage DESC LIMIT 5; Binder Error: Referenced column "Turnover" not found in FROM clause!
Candidate bindings: "nba.Turnovers", "nba.Opp.Turnovers"
Skipping...


{'input': 'How many points did each NBA team score across all games played during a specific season or period?',
 'expected': {'results': [{'Team': 'CLE', 'TotalPoints': 35150.0},
   {'Team': 'MEM', 'TotalPoints': 32572.0},
   {'Team': 'NOP', 'TotalPoints': 34287.0},
   {'Team': 'ORL', 'TotalPoints': 32983.0},
   {'Team': 'WAS', 'TotalPoints': 34309.0},
   {'Team': 'BRK', 'TotalPoints': 33541.0},
   {'Team': 'CHO', 'TotalPoints': 33675.0},
   {'Team': 'DAL', 'TotalPoints': 33435.0},
   {'Team': 'LAC', 'TotalPoints': 35168.0},
   {'Team': 'ATL', 'TotalPoints': 33776.0},
   {'Team': 'CHI', 'TotalPoints': 33475.0},
   {'Team': 'NYK', 'TotalPoints': 32722.0},
   {'Team': 'POR', 'TotalPoints': 34563.0},
   {'Team': 'BOS', 'TotalPoints': 34367.0},
   {'Team': 'HOU', 'TotalPoints': 35930.0},
   {'Team': 'LAL', 'TotalPoints': 33492.0},
   {'Team': 'MIN', 'TotalPoints': 34051.0},
   {'Team': 'TOR', 'TotalPoints': 34867.0},
   {'Team': 'DET', 'TotalPoints': 33256.0},
   {'Team': 'GSW', 'TotalPoi

In [30]:
generated_dataset

[{'input': 'How many points did each NBA team score across all games played during a specific season or period?',
  'expected': {'results': [{'Team': 'CLE', 'TotalPoints': 35150.0},
    {'Team': 'MEM', 'TotalPoints': 32572.0},
    {'Team': 'NOP', 'TotalPoints': 34287.0},
    {'Team': 'ORL', 'TotalPoints': 32983.0},
    {'Team': 'WAS', 'TotalPoints': 34309.0},
    {'Team': 'BRK', 'TotalPoints': 33541.0},
    {'Team': 'CHO', 'TotalPoints': 33675.0},
    {'Team': 'DAL', 'TotalPoints': 33435.0},
    {'Team': 'LAC', 'TotalPoints': 35168.0},
    {'Team': 'ATL', 'TotalPoints': 33776.0},
    {'Team': 'CHI', 'TotalPoints': 33475.0},
    {'Team': 'NYK', 'TotalPoints': 32722.0},
    {'Team': 'POR', 'TotalPoints': 34563.0},
    {'Team': 'BOS', 'TotalPoints': 34367.0},
    {'Team': 'HOU', 'TotalPoints': 35930.0},
    {'Team': 'LAL', 'TotalPoints': 33492.0},
    {'Team': 'MIN', 'TotalPoints': 34051.0},
    {'Team': 'TOR', 'TotalPoints': 34867.0},
    {'Team': 'DET', 'TotalPoints': 33256.0},
    {'Te

In [31]:
synthetic_dataset = px.Client().upload_dataset(
    dataset_name="nba-golden-synthetic",
    inputs=[{"question": example["input"]} for example in generated_dataset],
    outputs=[example["expected"] for example in generated_dataset],
);

📤 Uploading dataset...
💾 Examples uploaded: http://localhost:6006/datasets/RGF0YXNldDoy/examples
🗄️ Dataset version ID: RGF0YXNldFZlcnNpb246Mg==


In [32]:
run_experiment(
    synthetic_dataset, task=task, evaluators=[no_error, has_results], experiment_metadata=CONFIG
)

🧪 Experiment started.
📺 View dataset experiments: http://localhost:6006/datasets/RGF0YXNldDoy/experiments
🔗 View this experiment: http://localhost:6006/datasets/RGF0YXNldDoy/compare?experimentId=RXhwZXJpbWVudDoz


running tasks |          | 0/4 (0.0%) | ⏳ 00:00<? | ?it/s

✅ Task runs completed.
🧠 Evaluation started.


running experiment evaluations |          | 0/8 (0.0%) | ⏳ 00:00<? | ?it/s


🔗 View this experiment: http://localhost:6006/datasets/RGF0YXNldDoy/compare?experimentId=RXhwZXJpbWVudDoz

Experiment Summary (07/22/24 03:21 PM -0400)
--------------------------------------------
     evaluator  n  n_scores  avg_score
0  has_results  4         4        0.5
1     no_error  4         4        0.5

Tasks Summary (07/22/24 03:21 PM -0400)
---------------------------------------
   n_examples  n_runs  n_errors
0           4       4         0


RanExperiment(id='RXhwZXJpbWVudDoz', dataset_id='RGF0YXNldDoy', dataset_version_id='RGF0YXNldFZlcnNpb246Mg==', repetitions=1)