In [None]:
from langchain.sql_database import SQLDatabase
from langchain.agents import create_sql_agent
from langchain.agents.agent_toolkits import SQLDatabaseToolkit
from langchain_ollama import ChatOllama
from langchain_groq import ChatGroq
from langchain.agents import AgentExecutor
import os
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
# Define your MySQL connection details
mysql_host = os.getenv("MYSQL_HOST")
mysql_port = os.getenv("MYSQL_PORT")
mysql_user = os.getenv("MYSQL_USER")
mysql_password = os.getenv("MYSQL_PASSWORD")
mysql_database = os.getenv("MYSQL_DATABASE")


In [3]:
print(f"{mysql_user}:{mysql_password}@{mysql_host}:{mysql_port}/{mysql_database}")

root:root@127.0.0.1:3306/sakila


In [4]:
# Create the connection URI
connection_string = f"mysql+pymysql://{mysql_user}:{mysql_password}@{mysql_host}:{mysql_port}/{mysql_database}"

In [5]:
# Initialize the SQLDatabase object
db = SQLDatabase.from_uri(connection_string)

  self._metadata.reflect(


In [6]:
db.get_table_info()

  metadata_table_names = [tbl.name for tbl in self._metadata.sorted_tables]
  for tbl in self._metadata.sorted_tables


"\nCREATE TABLE actor (\n\tactor_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT, \n\tfirst_name VARCHAR(45) NOT NULL, \n\tlast_name VARCHAR(45) NOT NULL, \n\tlast_update TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, \n\tPRIMARY KEY (actor_id)\n)ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE utf8mb4_0900_ai_ci\n\n/*\n3 rows from actor table:\nactor_id\tfirst_name\tlast_name\tlast_update\n1\tPENELOPE\tGUINESS\t2006-02-15 04:34:33\n2\tNICK\tWAHLBERG\t2006-02-15 04:34:33\n3\tED\tCHASE\t2006-02-15 04:34:33\n*/\n\n\nCREATE TABLE address (\n\taddress_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT, \n\taddress VARCHAR(50) NOT NULL, \n\taddress2 VARCHAR(50), \n\tdistrict VARCHAR(20) NOT NULL, \n\tcity_id SMALLINT UNSIGNED NOT NULL, \n\tpostal_code VARCHAR(10), \n\tphone VARCHAR(20) NOT NULL, \n\tlast_update TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, \n\tPRIMARY KEY (address_id), \n\tCONSTRAINT fk_address_city FOREIGN KEY(city_id) REFERENCES 

In [7]:
# Initialize Groq LLM
llm = ChatGroq(api_key=os.getenv("GROQ_API_KEY"), model="meta-llama/llama-4-scout-17b-16e-instruct")  
# llm = ChatOllama(base_url="http://localhost:11434",model='llama3.2')

In [8]:
# Create a toolkit for the SQL database
toolkit = SQLDatabaseToolkit(db=db, llm=llm)

In [9]:
# Create an agent to handle the SQL queries
agent_executor = create_sql_agent(
    
    llm=llm,
    toolkit=toolkit,
    verbose=True
)

In [10]:
# Example usage: Convert natural language to SQL and execute
query = "List all films that have never been rented, including their descriptions and categories."
result = agent_executor.invoke(query)
print(result)



[1m> Entering new SQL Agent Executor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: [0m[38;5;200m[1;3mactor, address, category, city, country, customer, film, film_actor, film_category, film_text, inventory, language, payment, rental, staff, store[0m[32;1m[1;3mThe tables in the database are: actor, address, category, city, country, customer, film, film_actor, film_category, film_text, inventory, language, payment, rental, staff, store

I think the most relevant tables for this query are film, film_category, and category. I should query the schema of these tables to see what columns I can use.

Action: sql_db_schema
Action Input: film, film_category, category[0m[33;1m[1;3m
CREATE TABLE category (
	category_id TINYINT UNSIGNED NOT NULL AUTO_INCREMENT, 
	name VARCHAR(25) NOT NULL, 
	last_update TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, 
	PRIMARY KEY (category_id)
)ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE utf8mb4_0900_ai_

  metadata_table_names = [tbl.name for tbl in self._metadata.sorted_tables]
  for tbl in self._metadata.sorted_tables


[32;1m[1;3mTo find all films that have never been rented, including their descriptions and categories, I need to find films that do not have any entries in the `inventory` table, as the `inventory` table seems to track the stock of films available for rent. 

I should query the schema of the `inventory` table to confirm its columns.

Action: sql_db_schema
Action Input: inventory[0m[33;1m[1;3m
CREATE TABLE inventory (
	inventory_id MEDIUMINT UNSIGNED NOT NULL AUTO_INCREMENT, 
	film_id SMALLINT UNSIGNED NOT NULL, 
	store_id TINYINT UNSIGNED NOT NULL, 
	last_update TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, 
	PRIMARY KEY (inventory_id), 
	CONSTRAINT fk_inventory_film FOREIGN KEY(film_id) REFERENCES film (film_id) ON DELETE RESTRICT ON UPDATE CASCADE, 
	CONSTRAINT fk_inventory_store FOREIGN KEY(store_id) REFERENCES store (store_id) ON DELETE RESTRICT ON UPDATE CASCADE
)ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE utf8mb4_0900_ai_ci

/*
3 rows from inven

  metadata_table_names = [tbl.name for tbl in self._metadata.sorted_tables]
  for tbl in self._metadata.sorted_tables


[32;1m[1;3mTo find all films that have never been rented, including their descriptions and categories, I need to find films that do not have any entries in the `inventory` table. However, a more accurate approach would be to find films that do not have any entries in the `rental` table, as a film can be in the `inventory` table but not rented.

I should construct a query that selects films without any corresponding records in the `rental` table. This can be achieved using a `LEFT JOIN` or `NOT IN`/`NOT EXISTS` clauses. 

To get the descriptions and categories, I will join the `film`, `film_category`, and `category` tables.

Action: sql_db_query_checker
Action Input: SELECT f.description, c.name AS category 
FROM film f 
JOIN film_category fc ON f.film_id = fc.film_id 
JOIN category c ON fc.category_id = c.category_id 
WHERE f.film_id NOT IN (SELECT film_id FROM rental)
LIMIT 10;[0m[36;1m[1;3m```sql
SELECT f.description, c.name AS category 
FROM film f 
JOIN film_category fc ON f.f

  metadata_table_names = [tbl.name for tbl in self._metadata.sorted_tables]
  for tbl in self._metadata.sorted_tables


[32;1m[1;3mIt seems like the `rental` table does not have a `film_id` column. Instead, it has an `inventory_id` column that references the `inventory` table. 

To find all films that have never been rented, including their descriptions and categories, we need to join the `film`, `film_category`, `category`, and `inventory` tables. We can use a `LEFT JOIN` or `NOT EXISTS` clause to find films without any corresponding records in the `rental` table.

Here's a corrected query:

```sql
SELECT f.description, c.name AS category 
FROM film f 
JOIN film_category fc ON f.film_id = fc.film_id 
JOIN category c ON fc.category_id = c.category_id 
LEFT JOIN inventory i ON f.film_id = i.film_id 
LEFT JOIN rental r ON i.inventory_id = r.inventory_id
WHERE r.inventory_id IS NULL
LIMIT 10;
```

Let's try again.

Action: sql_db_query
Action Input: SELECT f.description, c.name AS category 
FROM film f 
JOIN film_category fc ON f.film_id = fc.film_id 
JOIN category c ON fc.category_id = c.category_id 
LE

In [13]:
# # Example usage: Convert natural language to SQL and execute
# query = "Which five customers have spent the most money on rentals in the last six months of available data, and what are their most frequently rented film categories? For each customer, list their total amount spent, the top category they rented, and how many times they rented from that category."
# result = agent_executor.invoke(query)
# print(result)

In [14]:
# Example usage: Convert natural language to SQL and execute
query = "Which actor has appeared in the highest-grossing films (based on total rental revenue), and what is the average rental duration for films they appeared in?"
result = agent_executor.invoke(query)
print(result)



[1m> Entering new SQL Agent Executor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: [0m[38;5;200m[1;3mactor, address, category, city, country, customer, film, film_actor, film_category, film_text, inventory, language, payment, rental, staff, store[0m[32;1m[1;3mThe tables in the database are: actor, address, category, city, country, customer, film, film_actor, film_category, film_text, inventory, language, payment, rental, staff, store

I should look at the schema of the film, actor, and film_actor tables as they seem most relevant to the question.

Action: sql_db_schema
Action Input: film, actor, film_actor[0m[33;1m[1;3m
CREATE TABLE actor (
	actor_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT, 
	first_name VARCHAR(45) NOT NULL, 
	last_name VARCHAR(45) NOT NULL, 
	last_update TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, 
	PRIMARY KEY (actor_id)
)ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE utf8mb4_0900_ai_ci

/*
3 rows from a

  metadata_table_names = [tbl.name for tbl in self._metadata.sorted_tables]
  for tbl in self._metadata.sorted_tables


[32;1m[1;3mTo answer the question, I need to find the actor who has appeared in the highest-grossing films (based on total rental revenue) and calculate the average rental duration for films they appeared in.

First, I'll create a query to calculate the total rental revenue for each film and then join this with the film_actor table to get the actors who appeared in these films.

Action: sql_db_query_checker
Action Input: 
SELECT 
  fa.actor_id, 
  SUM(f.rental_rate * f.rental_duration) as total_revenue
FROM 
  film_actor fa
JOIN 
  film f ON fa.film_id = f.film_id
GROUP BY 
  fa.actor_id
ORDER BY 
  total_revenue DESC
LIMIT 10;[0m[36;1m[1;3m```sql
SELECT 
 fa.actor_id, 
 SUM(f.rental_rate * f.rental_duration) as total_revenue
FROM 
 film_actor fa
JOIN 
 film f ON fa.film_id = f.film_id
GROUP BY 
 fa.actor_id
ORDER BY 
 total_revenue DESC
LIMIT 10;
```[0m[32;1m[1;3mThe query seems correct. I'll execute it to get the actor_id and total revenue.

Action: sql_db_query
Action Input:

In [15]:
# Example usage: Convert natural language to SQL and execute
query = "Which country has the highest average customer lifetime value (total payments divided by rental count per customer), and what are the top 3 cities in that country by total revenue?"
result = agent_executor.invoke(query)
print(result)



[1m> Entering new SQL Agent Executor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: [0m[38;5;200m[1;3mactor, address, category, city, country, customer, film, film_actor, film_category, film_text, inventory, language, payment, rental, staff, store[0m[32;1m[1;3mI see a list of tables that could be relevant to the question. The tables that seem most relevant are customer, payment, rental, city, and country. I should query the schema of these tables to see what columns they contain.

Action: sql_db_schema
Action Input: customer, payment, rental, city, country[0m[33;1m[1;3m
CREATE TABLE city (
	city_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT, 
	city VARCHAR(50) NOT NULL, 
	country_id SMALLINT UNSIGNED NOT NULL, 
	last_update TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, 
	PRIMARY KEY (city_id), 
	CONSTRAINT fk_city_country FOREIGN KEY(country_id) REFERENCES country (country_id) ON DELETE RESTRICT ON UPDATE CASCADE
)ENGINE=InnoDB

  metadata_table_names = [tbl.name for tbl in self._metadata.sorted_tables]
  for tbl in self._metadata.sorted_tables


[32;1m[1;3mI have the schema for the relevant tables. Now I need to calculate the average customer lifetime value (total payments divided by rental count per customer) and then find the country with the highest average customer lifetime value. After that, I can find the top 3 cities in that country by total revenue.

First, I need to calculate the average customer lifetime value per country. I will use the payment and rental tables to calculate this.

Action: sql_db_query_checker
Action Input: 
```sql
SELECT 
  c.country,
  AVG(p.total_payments / r.rental_count) AS avg_lifetime_value
FROM 
  (SELECT 
     customer_id, 
     SUM(amount) AS total_payments,
     COUNT(DISTINCT rental_id) AS rental_count
   FROM 
     payment
   GROUP BY 
     customer_id) p
JOIN 
  customer cu ON p.customer_id = cu.customer_id
JOIN 
  address a ON cu.address_id = a.address_id
JOIN 
  city ci ON a.city_id = ci.city_id
JOIN 
  country c ON ci.country_id = c.country_id
GROUP BY 
  c.country
ORDER BY 
  avg

In [16]:
# Example usage: Convert natural language to SQL and execute
query = "Which 10 films have the highest rental-to-inventory ratio (total rentals divided by number of copies), and how does this vary by store location?"
result = agent_executor.invoke(query)
print(result)



[1m> Entering new SQL Agent Executor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: [0m[38;5;200m[1;3mactor, address, category, city, country, customer, film, film_actor, film_category, film_text, inventory, language, payment, rental, staff, store[0m[32;1m[1;3mThe tables in the database are: actor, address, category, city, country, customer, film, film_actor, film_category, film_text, inventory, language, payment, rental, staff, store. 

To calculate the rental-to-inventory ratio, I need to look at the film, inventory, and rental tables. I should query the schema of these tables to see what columns I can use.

Action: sql_db_schema
Action Input: film, inventory, rental[0m[33;1m[1;3m
CREATE TABLE film (
	film_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT, 
	title VARCHAR(128) NOT NULL, 
	description TEXT, 
	release_year YEAR, 
	language_id TINYINT UNSIGNED NOT NULL, 
	original_language_id TINYINT UNSIGNED, 
	rental_duration TINYINT UNSIGNED NOT NULL DEFAUL

  metadata_table_names = [tbl.name for tbl in self._metadata.sorted_tables]
  for tbl in self._metadata.sorted_tables


[32;1m[1;3mTo calculate the rental-to-inventory ratio, I need to join the film, inventory, and rental tables. I want to find the top 10 films with the highest rental-to-inventory ratio. I also want to see how this varies by store location.

First, I need to calculate the total rentals for each film and the number of copies of each film in the inventory. Then, I can calculate the rental-to-inventory ratio.

Action: sql_db_query_checker
Action Input: SELECT 
  f.title, 
  COUNT(r.rental_id) AS total_rentals, 
  COUNT(i.inventory_id) AS total_copies, 
  COUNT(r.rental_id) / COUNT(i.inventory_id) AS rental_to_inventory_ratio
FROM 
  film f 
  JOIN inventory i ON f.film_id = i.film_id 
  LEFT JOIN rental r ON i.inventory_id = r.inventory_id 
GROUP BY 
  f.title, 
  i.film_id 
ORDER BY 
  rental_to_inventory_ratio DESC 
LIMIT 10;[0m[36;1m[1;3m```sql
SELECT 
 f.title, 
 COUNT(r.rental_id) AS total_rentals, 
 COUNT(i.inventory_id) AS total_copies, 
 COUNT(r.rental_id) / COUNT(i.inventory_