In [2]:
from dotenv import load_dotenv
import os

In [3]:
load_dotenv()

secret_key = os.getenv('SQL_KEY')
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')

In [4]:
from langchain.llms import OpenAI
from langchain.utilities import SQLDatabase
from langchain_experimental.sql import SQLDatabaseChain

db = SQLDatabase.from_uri("sqlite:///../afl.db", sample_rows_in_table_info=10)
llm = OpenAI(temperature=0, verbose=True)
db_chain = SQLDatabaseChain.from_llm(llm, db, verbose=True, top_k = 20)

In [5]:
TEMPLATE = """You are Anthony (Huddo) Hudson, an AFL commentator and stat expert. You also know SQL and have access to an SQLite3 database of AFL statistics.
Given an input question, first create a syntactically correct SQLite3 query to run, then look at the results of the query and return the answer.
Use the following format:

Question: "Question here"
SQLQuery: "SQL Query to run"
SQLResult: "Result of the SQLQuery"
Answer: "Final answer here"

Only use the following tables:

player_stats and game.

Few shot examples:
Question: "How many goals did Zak Butters kick in 2023?"
SQLQuery: "SELECT SUM(goals) FROM player_stats WHERE player_name = 'Zak Butters' AND year = 2023"

Question: "How many goals did Max Gawn kick last year?"
SQLQuery = "SELECT goals FROM player_stats WHERE player_name = 'Max Gawn' AND year = strftime('%Y', date('now', '-1 year'))"

Whenever you are asked a question, please return additional information that might be implied by the question, such as what year or round a statistic is from.

If you get no result from the query, return "No result", and suggest a new query to run.

Question: {user_question}"""

In [41]:
user_question = "In how many games did Christian Petracca have more disposals than Clayton Oliver in 2022?"
db_chain.run(TEMPLATE.format(user_question=user_question))



[1m> Entering new SQLDatabaseChain chain...[0m
You are Anthony (Huddo) Hudson, an AFL commentator and stat expert. You also know SQL and have access to an SQLite3 database of AFL statistics.
Given an input question, first create a syntactically correct SQLite3 query to run, then look at the results of the query and return the answer.
Use the following format:

Question: "Question here"
SQLQuery: "SQL Query to run"
SQLResult: "Result of the SQLQuery"
Answer: "Final answer here"

Only use the following tables:

player_stats and game.

Whenever you are asked a question, please return additional information that might be implied by the question, such as what year or round a statistic is from.

If you get no result from the query, return "No result", and suggest a new query to run.

Question: In how many games did Christian Petracca have more disposals than Clayton Oliver in 2022?
SQLQuery:[32;1m[1;3mSELECT COUNT(*) FROM player_stats ps1 JOIN player_stats ps2 ON ps1.game_id = ps2.game

'Christian Petracca had more disposals than Clayton Oliver in 5 games in 2022.'

In [42]:
user_question = "How many times has Harris Andrews had more One Percenters than anyone else in that game?"
db_chain.run(TEMPLATE.format(user_question=user_question))
# SELECT COUNT(*) FROM player_stats WHERE one_percenters = (SELECT MAX(one_percenters) FROM player_stats WHERE player_name = 'Harris Andrews');
# SELECT COUNT(*) FROM player_stats WHERE player_name = 'Harris Andrews' AND one_percenters = (SELECT MAX(one_percenters) FROM player_stats);
# BOTH WRONG. I THINK IT NEEDS TO BE GIVEN A MATCH_ID IN THE DATABASE.




[1m> Entering new SQLDatabaseChain chain...[0m
You are Anthony (Huddo) Hudson, an AFL commentator and stat expert. You also know SQL and have access to an SQLite3 database of AFL statistics.
Given an input question, first create a syntactically correct SQLite3 query to run, then look at the results of the query and return the answer.
Use the following format:

Question: "Question here"
SQLQuery: "SQL Query to run"
SQLResult: "Result of the SQLQuery"
Answer: "Final answer here"

Only use the following tables:

player_stats and game.

Whenever you are asked a question, please return additional information that might be implied by the question, such as what year or round a statistic is from.

If you get no result from the query, return "No result", and suggest a new query to run.

Question: How many times has Harris Andrews had more One Percenters than anyone else in that game?
SQLQuery:[32;1m[1;3mSELECT COUNT(*) FROM player_stats ps1 JOIN player_stats ps2 ON ps1.game_id = ps2.game_

'Harris Andrews has had more One Percenters than anyone else in that game 7697 times.'

In [43]:
user_question = "What is the lowest number of disposals for which a player has received 3 brownlow votes?"
db_chain.run(TEMPLATE.format(user_question=user_question))
# Correct!



[1m> Entering new SQLDatabaseChain chain...[0m
You are Anthony (Huddo) Hudson, an AFL commentator and stat expert. You also know SQL and have access to an SQLite3 database of AFL statistics.
Given an input question, first create a syntactically correct SQLite3 query to run, then look at the results of the query and return the answer.
Use the following format:

Question: "Question here"
SQLQuery: "SQL Query to run"
SQLResult: "Result of the SQLQuery"
Answer: "Final answer here"

Only use the following tables:

player_stats and game.

Whenever you are asked a question, please return additional information that might be implied by the question, such as what year or round a statistic is from.

If you get no result from the query, return "No result", and suggest a new query to run.

Question: What is the lowest number of disposals for which a player has received 3 brownlow votes?
SQLQuery:[32;1m[1;3mSELECT MIN(disp) FROM player_stats WHERE brownlow = 3;[0m
SQLResult: [33;1m[1;3m[(7

'The lowest number of disposals for which a player has received 3 brownlow votes is 7.'

In [12]:
db_chain.run("How many games have Christian Petracca and Clayton Oliver played together?")



[1m> Entering new SQLDatabaseChain chain...[0m
How many games have Christian Petracca and Clayton Oliver played together?
SQLQuery:[32;1m[1;3mSELECT COUNT(*) FROM player_stats WHERE player_name IN ('Christian Petracca', 'Clayton Oliver') GROUP BY round, year HAVING COUNT(*) > 1;[0m
SQLResult: [33;1m[1;3m[(2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (2,), (

'Christian Petracca and Clayton Oliver have played together in 92 games.'

In [44]:
db_chain.run("What are the top 5 games by disposals but 0 brownlow votes? Who, when, and how many disposals?")



[1m> Entering new SQLDatabaseChain chain...[0m
What are the top 5 games by disposals but 0 brownlow votes? Who, when, and how many disposals?
SQLQuery:[32;1m[1;3mSELECT player_name, date, disp FROM player_stats JOIN game ON player_stats.game_id = game.game_id WHERE brownlow = 0 ORDER BY disp DESC LIMIT 5;[0m
SQLResult: [33;1m[1;3m[('Matt Crouch', '2019-08-25 13:10:00', 47), ('Tom Mitchell', '2018-05-20 15:20:00', 46), ('Lachie Neale', '2021-08-28 19:30:00', 46), ('Matt Priddis', '2008-06-27 19:40:00', 45), ('Matthew Boyd', '2011-04-25 20:45:00', 45)][0m
Answer:[32;1m[1;3mThe top 5 games by disposals but 0 brownlow votes are Matt Crouch (47 disposals on 2019-08-25 13:10:00), Tom Mitchell (46 disposals on 2018-05-20 15:20:00), Lachie Neale (46 disposals on 2021-08-28 19:30:00), Matt Priddis (45 disposals on 2008-06-27 19:40:00), and Matthew Boyd (45 disposals on 2011-04-25 20:45:00).[0m
[1m> Finished chain.[0m


'The top 5 games by disposals but 0 brownlow votes are Matt Crouch (47 disposals on 2019-08-25 13:10:00), Tom Mitchell (46 disposals on 2018-05-20 15:20:00), Lachie Neale (46 disposals on 2021-08-28 19:30:00), Matt Priddis (45 disposals on 2008-06-27 19:40:00), and Matthew Boyd (45 disposals on 2011-04-25 20:45:00).'

In [16]:
db_chain.run("How many times has a player kicked 9 goals and at least 1 behind in a game? Tell me about those games, player, year, round etc.")



[1m> Entering new SQLDatabaseChain chain...[0m
How many times has a player kicked 9 goals and at least 1 behind in a game? Tell me about those games, player, year, round etc.
SQLQuery:[32;1m[1;3mSELECT player_name, year, round, team, opponent FROM player_stats WHERE goals = 9 AND behinds >= 1 LIMIT 5;[0m
SQLResult: [33;1m[1;3m[('Gary Ablett', 1996, 1, 'GE', 'ME'), ('Stewart Loewe', 1996, 2, 'SK', 'GE'), ('Gary Ablett', 1996, 9, 'GE', 'CW'), ('Tony Lockett', 1996, 10, 'SY', 'FI'), ('Jason Dunstall', 1996, 11, 'HW', 'AD')][0m
Answer:[32;1m[1;3mThere have been 5 games in which a player kicked 9 goals and at least 1 behind. Those players were Gary Ablett (1996, Round 1, GE vs ME), Stewart Loewe (1996, Round 2, SK vs GE), Gary Ablett (1996, Round 9, GE vs CW), Tony Lockett (1996, Round 10, SY vs FI), and Jason Dunstall (1996, Round 11, HW vs AD).[0m
[1m> Finished chain.[0m


'There have been 5 games in which a player kicked 9 goals and at least 1 behind. Those players were Gary Ablett (1996, Round 1, GE vs ME), Stewart Loewe (1996, Round 2, SK vs GE), Gary Ablett (1996, Round 9, GE vs CW), Tony Lockett (1996, Round 10, SY vs FI), and Jason Dunstall (1996, Round 11, HW vs AD).'

In [20]:
user_question = "How many goals did Zak Butters kick last year?"

db_chain.run(TEMPLATE.format(user_question=user_question))




[1m> Entering new SQLDatabaseChain chain...[0m
You are Anthony (Huddo) Hudson, an AFL commentator and stat expert. You also know SQL and have access to an SQLite3 database of AFL statistics.
Given an input question, first create a syntactically correct SQLite3 query to run, then look at the results of the query and return the answer.
Use the following format:

Question: "Question here"
SQLQuery: "SQL Query to run"
SQLResult: "Result of the SQLQuery"
Answer: "Final answer here"

Only use the following tables:

player_stats and game.

Few shot examples:
Question: "How many goals did Zak Butters kick in 2023?"
SQLQuery: "SELECT SUM(goals) FROM player_stats WHERE player_name = 'Zak Butters' AND year = 2023"

Question: "How many goals did Max Gawn kick last year?"
SQLQuery = "SELECT goals FROM player_stats WHERE player_name = 'Max Gawn' AND year = strftime('%Y', date('now', '-1 year'))"

Whenever you are asked a question, please return additional information that might be implied by the

'Zak Butters kicked 10 goals last year.'

In [6]:
user_question = "How good is Richmond?"
res = db_chain.run(TEMPLATE.format(user_question=user_question))



[1m> Entering new SQLDatabaseChain chain...[0m
You are Anthony (Huddo) Hudson, an AFL commentator and stat expert. You also know SQL and have access to an SQLite3 database of AFL statistics.
Given an input question, first create a syntactically correct SQLite3 query to run, then look at the results of the query and return the answer.
Use the following format:

Question: "Question here"
SQLQuery: "SQL Query to run"
SQLResult: "Result of the SQLQuery"
Answer: "Final answer here"

Only use the following tables:

player_stats and game.

Few shot examples:
Question: "How many goals did Zak Butters kick in 2023?"
SQLQuery: "SELECT SUM(goals) FROM player_stats WHERE player_name = 'Zak Butters' AND year = 2023"

Question: "How many goals did Max Gawn kick last year?"
SQLQuery = "SELECT goals FROM player_stats WHERE player_name = 'Max Gawn' AND year = strftime('%Y', date('now', '-1 year'))"

Whenever you are asked a question, please return additional information that might be implied by the