# text2sql based on llama2
## Task: To get an sql query for a given database from a natural language query

(This file is for experimentation and model creation)

### Imports

In [1]:
from langchain_community.llms.ollama import Ollama

from langchain_community.vectorstores.pgvector import PGVector

from langchain_community.embeddings.ollama import OllamaEmbeddings
from langchain_text_splitters import CharacterTextSplitter

from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

from pswrd import PASSWORD_OF_DB

### Download llama2

In [2]:
model = Ollama(model="llama2")

## The first idea:
Is add a **prompt template** with the context of the database structure and a query for NL to `llama2`

#### Firstly add DB structure

In [3]:
DB_STRUCTURE = \
    ["""
    CREATE TABLE trip(
        id BIGSERIAL PRIMARY KEY,
        company BIGINT, 
        plane CHARACTER VARYING(60),
        town_from CHARACTER VARYING(60),
        town_to CHARACTER VARYING(60),
        time_out TIMESTAMP,
        time_in TIMESTAMP,
        CONSTRAINT FK_company FOREIGNT KEY (\"company\") REFERENCES public.company (id)
    ); 
    """,
     """
    CREATE TABLE company(
        id BIGSERIAL PRIMARY KEY,
        name CHARACTER VARYING(60) 
    );
    """,
     """
    CREATE TABLE pass_in_trip(
        id BIGSERIAL PRIMARY KEY,
        trip BIGINT,
        passenger BIGINT,
        place CHARACTER VARYING(60),
        CONSTRAINT FK_trip FOREIGNT KEY (\"trip\") REFERENCES public.trip (id),
        CONSTRAINT FK_passanger FOREIGNT KEY (\"passenger\") REFERENCES public.passenger (id)
    );
    """,
     """
    CREATE TABLE passenger(
        id BIGSERIAL PRIMARY KEY,
        name CHARACTER VARYING(60)
    );
    """
    ]

In [4]:
CONNECTION_STRING = PGVector.connection_string_from_db_params(
    driver="psycopg2",
    host="localhost",
    port="5433",
    database="llama-test-vc",
    user="postgres",
    password=PASSWORD_OF_DB,
)
COLLECTION_NAME = "text2sql_vc"

In [5]:
embeddings = OllamaEmbeddings(model="llama2")

In [6]:
structure_retriver = PGVector.from_texts(
    embedding=embeddings,
    texts=DB_STRUCTURE,
    collection_name=COLLECTION_NAME,
    connection_string=CONNECTION_STRING,
    pre_delete_collection=True,
    use_jsonb=True
).as_retriever()

#### Now create a template

In [7]:
template = \
"""
Translate the following query to sql using the following database structure. 
{structure}
As an answer, provide an sql query for postgresql.
Query to translate: {query}
"""

prompt_with_db_structure = ChatPromptTemplate.from_template(template)

In [8]:
model_with_structure_context = (
    {"structure": structure_retriver, "query": RunnablePassthrough()}
    | prompt_with_db_structure
    | model
    | StrOutputParser()
)

In [9]:
res_query = model_with_structure_context.invoke("Select the names of all the people who are in the airline database")
res_query

" Sure! Here's an SQL query that translates the given query for a PostgreSQL database:\n```sql\nSELECT name\nFROM passenger\nJOIN trip ON passenger.id = trip.passenger\nJOIN company ON trip.company = company.id;\n```\nExplanation:\n\n* The `SELECT` clause selects the `name` column from the `passenger`, `trip`, and `company` tables.\n* The `JOIN` clause performs a left outer join between the `passenger` and `trip` tables on the `id` column, and then performs another left outer join between the result set and the `company` table on the `company` column. This allows us to retrieve the names of all the people who are in the airline database, as well as their corresponding company information.\n\nNote: In PostgreSQL, the `JOIN` clause can be written in several ways, including `JOIN`, `INNER JOIN`, `LEFT OUTER JOIN`, etc. The query above uses a left outer join to ensure that all rows from the `passenger` and `trip` tables are included in the result set, even if they have no matching row in t

In [10]:
print(res_query)

 Sure! Here's an SQL query that translates the given query for a PostgreSQL database:
```sql
SELECT name
FROM passenger
JOIN trip ON passenger.id = trip.passenger
JOIN company ON trip.company = company.id;
```
Explanation:

* The `SELECT` clause selects the `name` column from the `passenger`, `trip`, and `company` tables.
* The `JOIN` clause performs a left outer join between the `passenger` and `trip` tables on the `id` column, and then performs another left outer join between the result set and the `company` table on the `company` column. This allows us to retrieve the names of all the people who are in the airline database, as well as their corresponding company information.

Note: In PostgreSQL, the `JOIN` clause can be written in several ways, including `JOIN`, `INNER JOIN`, `LEFT OUTER JOIN`, etc. The query above uses a left outer join to ensure that all rows from the `passenger` and `trip` tables are included in the result set, even if they have no matching row in the `company` 

The answer looks like it's true, but it's not. The model is hallucinating