## Steps:
1. In the utils.py - Instantiate db with 
```python
    db_name="dvdrental" & include_tables = ["actor","film","film_actor"]
```
2. Initiliaze the LLM model.
3. Create a first chain, which will take query and return SQL query using database table schema.
4. Create a second chain which will take the SQL query as input from first chain, execute the query and provide the answer in natural language.

In [2]:
# langchain
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain_community.chat_models import ChatOllama
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate

# local utils
from utils import db, get_schema, custom_str_parser, handle_error_query,camelcase_conditions

In [3]:
# Langsmith - to debug LLM responses

import os
from dotenv import load_dotenv
from langsmith import Client

load_dotenv()

lcs = os.getenv("LANGCHAIN_SECRET")

os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = "langchain-ollama-sql"
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
os.environ["LANGCHAIN_API_KEY"] = lcs

client = Client()

## Load LLM and define chains

In [4]:
# Add the LLM downloaded from Ollama
ollama_llm = "duckdb-nsql"
llm = ChatOllama(model=ollama_llm)

llm_for_response = ChatOllama(model="mistral")

In [5]:
# ------------------------------------------------------------------------------------------------------------------------

# first chain
sql_chain_template = """Based on the table schema below, write a SQL query that would answer the user's question.
{schema}

Question: {question}
SQL Query:"""

sql_chain_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "Given an input question, convert it to a SQL query. ",
        ),  
        (
            "system",
            "Respond with only the SQL query, nothing else.",
        ), 

        ("human", sql_chain_template),
    ]
)

sql_chain = (
    RunnablePassthrough.assign(schema=get_schema)
    | sql_chain_prompt
    | llm
    | StrOutputParser()
    | RunnableLambda(custom_str_parser)
    | RunnableLambda(camelcase_conditions)

)

# ------------------------------------------------------------------------------------------------------------------------

# second_chain
template = """Based on the table schema below, question, sql query, and sql response, write a natural language response:
{schema}

Question: {question}
SQL Query: {query}
SQL Response: {response}"""  # noqa: E501

prompt_response = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "Given an input question and SQL response, convert it to a natural "
            "language answer. No pre-amble.",
        ),
        ("human", template),
    ]
)

sql_run_chain = (
    RunnablePassthrough.assign(query=sql_chain)
    | RunnablePassthrough.assign(
        schema=get_schema, response=lambda x: handle_error_query(x)
    )
    | prompt_response
    | llm_for_response
    | StrOutputParser()
)

## Run Queries

#### Test if langchain can access the database

In [5]:
print(db.get_table_info())


CREATE TABLE actor (
	actor_id SERIAL NOT NULL, 
	first_name VARCHAR(45) NOT NULL, 
	last_name VARCHAR(45) NOT NULL, 
	last_update TIMESTAMP WITHOUT TIME ZONE DEFAULT now() NOT NULL, 
	CONSTRAINT actor_pkey PRIMARY KEY (actor_id)
)


CREATE TABLE film (
	film_id SERIAL NOT NULL, 
	title VARCHAR(255) NOT NULL, 
	description TEXT, 
	release_year INTEGER, 
	language_id SMALLINT NOT NULL, 
	rental_duration SMALLINT DEFAULT 3 NOT NULL, 
	rental_rate NUMERIC(4, 2) DEFAULT 4.99 NOT NULL, 
	length SMALLINT, 
	replacement_cost NUMERIC(5, 2) DEFAULT 19.99 NOT NULL, 
	rating mpaa_rating DEFAULT 'G'::mpaa_rating, 
	last_update TIMESTAMP WITHOUT TIME ZONE DEFAULT now() NOT NULL, 
	special_features TEXT[], 
	fulltext TSVECTOR NOT NULL, 
	CONSTRAINT film_pkey PRIMARY KEY (film_id), 
	CONSTRAINT film_language_id_fkey FOREIGN KEY(language_id) REFERENCES language (language_id) ON DELETE RESTRICT ON UPDATE CASCADE
)


CREATE TABLE film_actor (
	actor_id SMALLINT NOT NULL, 
	film_id SMALLINT NOT NULL, 
	

In [6]:
db.run("""SELECT * FROM actor LIMIT 5""")

"[(1, 'Penelope', 'Guiness', datetime.datetime(2013, 5, 26, 14, 47, 57, 620000)), (2, 'Nick', 'Wahlberg', datetime.datetime(2013, 5, 26, 14, 47, 57, 620000)), (3, 'Ed', 'Chase', datetime.datetime(2013, 5, 26, 14, 47, 57, 620000)), (4, 'Jennifer', 'Davis', datetime.datetime(2013, 5, 26, 14, 47, 57, 620000)), (5, 'Johnny', 'Lollobrigida', datetime.datetime(2013, 5, 26, 14, 47, 57, 620000))]"

In [7]:
db.run("""SELECT COUNT(*) FROM actor WHERE last_name = 'Penelope' AND first_name IS NOT NULL;""")

'[(0,)]'

#### We can run the first chain independently to test 1. We are getting valid SQL queries 2. The format of response is correct i.e. just the query

In [7]:
sql_chain.invoke({"question": "Give me count of actors"})

' SELECT COUNT(*) FROM actor;'

In [6]:
sql_chain.invoke({"question": "How many films has penelope starred in?"})

" SELECT COUNT(*) FROM film_actor WHERE actor_id = (SELECT actor_id FROM actor WHERE first_name = 'Penelope');"

In [6]:
sql_chain.invoke({"question": "How many films has Penelope Guiness starred in?"})

" SELECT COUNT(*) FROM film_actor WHERE actor_id = (SELECT actor_id FROM actor WHERE first_name = 'penelope' AND last_name = 'guisshot');"

In [7]:
sql_chain.invoke(
    {"question": "Can you give me some movie names where Penelope Guiness starred?"}
)

" SELECT film.title FROM actor, film, film_actor WHERE actor.first_name = 'Penelope' AND actor.last_name = 'Guiss' AND film_actor.actor_id = actor.actor_id AND film.film_id = film_actor.film_id;"

In [39]:
sql_chain.invoke({"question": "Give me breakdown of films by release year"})

' SELECT \n    film.title,\n    COUNT(*) OVER (PARTITION BY film.release_year) AS release_year_breakdown\nFROM \n    film;'

In [40]:
sql_chain.invoke({"question": "How many films were released in 2006?"})

' SELECT COUNT(*) FROM film WHERE release_year = 2006;'

In [41]:
sql_chain.invoke(
    {"question": "Going by the description, can you give me names of movies on robots?"}
)

" SELECT title FROM film WHERE description LIKE '%robot%';"

In [42]:
sql_chain.invoke(
    {"question": "Give me names of movies where description contains Robot"}
)

" SELECT title FROM film WHERE description LIKE '%Robot%';"

### Invoking the main second chain on same queries as above.

In [43]:
print(sql_run_chain.invoke({"question": "Give me count of actors"}))

 The number of actors in the database is 200.


In [44]:
print(sql_run_chain.invoke({"question": "How many films has Penelope starred in?"}))

 Based on the given SQL query and response, there were no films found for the actor named Penelope. Therefore, the natural language answer is "Penelope has not starred in any films based on the provided data."


In [9]:
print(
    sql_run_chain.invoke(
        {"question": "How many films has Nick Wahlberg starred in?"}
    )
)

 Nick Wahlberg has starred in 25 films based on the data in the database.


In [10]:
print(
    sql_run_chain.invoke(
        {"question": "Can you give me some movie names where Nick Wahlberg starred?"}
    )
)

 The SQL query returned a list of movies where the actor Nick Wahlberg appeared. The names of these movies are "Adaptation Holes", "Apache Divine", "Baby Hall", "Bull Shawshank", "Chainsaw Uptown", "Chisum Behavior", "Destiny Saturday", "Dracula Crystal", "Fight Jawbreaker", "Flash Wars", "Gilbert Pelican", "Goodfellas Salute", "Happiness United", "Indian Love", "Jekyll Frogmen", "Jersey Sassy", "Liaisons Sweet", "Lucky Flying", "Maguire Apache", "Mallrats United", "Mask Peach", "Roof Champion", "Rushmore Mermaid", "Smile Earring", and "Wardrobe Phantom".


In [11]:
print(sql_run_chain.invoke({"question": "Give me breakdown of films by release year"}))

 It appears you have given a list of tuples, where each tuple contains a string for the title and two integers for the year and rating. Here's how you can access the elements of these tuples:

```python
movies = [('Adventures Raccoons', 2005, 9), ('Aeon Flux', 2005, 7), ('Afterlife Aspen', 2006, 8), ...]

# Accessing the title
title = movies[0][0]
print(title) # 'Adventures Raccoons'

# Accessing the year
year = movies[0][1]
print(year) # 2005

# Accessing the rating
rating = movies[0][2]
print(rating) # 9
```

You can also use a for loop to iterate through the list and access each title, year, and rating:

```python
for movie in movies:
    title, year, rating = movie
    print('Title:', title)
    print('Year:', year)
    print('Rating:', rating)
    print()
```

This will output each title, year, and rating on a separate line.


In [48]:
print(
    sql_run_chain.invoke(
        {
            "question": "Going by the description, can you give me names of movies on robots?"
        }
    )
)

 Based on the table schema and SQL query provided, here is a natural language response:

The SQL query "SELECT film.title FROM film WHERE film.description LIKE '%robot%'" will return the titles of movies that have the word "robot" mentioned in their description. Therefore, the names of movies on robots, according to the table data, are those movies whose descriptions contain the keyword "robot".


In [12]:
print(
    sql_run_chain.invoke(
        {"question": "Give me names of movies where description contains Robot"}
    )
)

 The SQL query returned a list of movie titles from the "film" table where the description contains the word "Robot". Here are the names of those movies: Agent Truman, Alley Evolution, Citizen Shrek, Control Anthem, Mine Titans, Rocketeer Mother, Rollercoaster Bringing, Unbreakable Karate, Wizard Coldblooded.
