# This notebook demonstrates the below:
##### - How to work with Open AI LLM models 
##### - Some prompting techniques 
##### - Examples of Text to SQL conversion using gpt-4o-mini LLM model

## Install the required packages

In [19]:
!pip install openai
!pip install -q pandas



## Import the required modules

In [2]:
from openai import OpenAI
import os
import sqlite3
import pandas as pd

## Initialize the OpenAI LLM client using the API Key

In [None]:
client=OpenAI(api_key=os.environ.get('OPENAI_API_KEY'))

## Sample query and response from the LLM model

In [5]:
response=client.responses.create(model='gpt-4o-mini',instructions='You are a coding assistant that talks like a teacher',input='What is the capital of Ethiopia?')
print(response.output_text)

The capital of Ethiopia is Addis Ababa. It's a vibrant city known for its rich history and as a center for diplomacy and culture in Africa. If you have any more questions about Ethiopia or its capital, feel free to ask!


## Text to SQL conversion example with a simple prompt

#### Step 1. Create an sqlite database and a few tables and populate them with data

In [9]:
db_path = 'C:/Smita/AIML/sqlite/sqlite_db/client_data.db'

if not os.path.exists(db_path):
    with sqlite3.connect(db_path) as db_conn:
        db_cur=db_conn.cursor()
        db_cur.executescript('''CREATE TABLE IF NOT EXISTS client (client_id INTEGER PRIMARY KEY, client_name TEXT NOT NULL, country_code TEXT);
         CREATE TABLE IF NOT EXISTS country(country_id INTEGER PRIMARY KEY, country_code TEXT NOT NULL, country_name TEXT);''')
        db_cur.executemany('INSERT INTO client values (?,?,?)',[(1,'Smita','IN'),(2,'Matthias','DE'),(3,'Brendan','UK'),(4,'Frank','US'),(5,'Sergey','RU')])
        db_cur.executemany('INSERT INTO country values (?,?,?)',[(91,'IN','India'),(49,'DE','Germany'),(44,'UK','United Kingdom'),(1,'US','United States of America'),(7,'RU','Sergey')])
    
        print('Database and tables created. Data populated in tables.')

Database and tables created. Data populated in tables.


#### Step 2. View the data in the tables

In [10]:
with sqlite3.connect(db_path) as db_conn:
    for table in ['client','country']:
        data=pd.read_sql_query(f'SELECT * from {table}',db_conn)
        print('\n' + str(table).upper() + ' table: \n')
        print(data)


CLIENT table: 

   client_id client_name country_code
0          1       Smita           IN
1          2    Matthias           DE
2          3     Brendan           UK
3          4       Frank           US
4          5      Sergey           RU

COUNTRY table: 

   country_id country_code              country_name
0           1           US  United States of America
1           7           RU                    Sergey
2          44           UK            United Kingdom
3          49           DE                   Germany
4          91           IN                     India


#### Step 3. Get the schema definition into a variable

In [11]:
def get_schema_defn(db_path):
    db_conn = sqlite3.connect(db_path)
    db_cur=db_conn.cursor()
    db_cur.execute("SELECT name from sqlite_master where type = 'table';")
    table_list=db_cur.fetchall()
    schema_def=[]
    for (table_name,) in table_list:
        db_cur.execute(f'PRAGMA table_info ({table_name})')
        columns=db_cur.fetchall()
        table_def=''.join(f'Table {table_name}:\n')
        table_def+= ''.join(f' {col[1]} ({col[2]})\n' for col in columns)
        schema_def.append(table_def)
    db_conn.close()
    return schema_def

schema_def_txt=get_schema_defn(db_path)
print(''.join(schema_def_txt))

Table client:
 client_id (INTEGER)
 client_name (TEXT)
 country_code (TEXT)
Table country:
 country_id (INTEGER)
 country_code (TEXT)
 country_name (TEXT)



#### Step 4. Write a prompt for the model

In [14]:
def get_prompt(schema, query):
    return f''' You are an AI assistant that converts natural language questions to SQLLite queries. Given the below database schema : 
    <schema>
    {schema}
    </schema>

    Convert the below natural language question to SQL query.
    <query>
    {query}
    </query>

    Provide only the SQL query in your response, enclosed in <sql> tags.
    '''

#Check the prompt
prompt_txt=get_prompt(schema_def_txt,'What is the name of the client who lives in Russia?')
print(prompt_txt)

 You are an AI assistant that converts natural language questions to SQLLite queries. Given the below database schema : 
    <schema>
    ['Table client:\n client_id (INTEGER)\n client_name (TEXT)\n country_code (TEXT)\n', 'Table country:\n country_id (INTEGER)\n country_code (TEXT)\n country_name (TEXT)\n']
    </schema>

    Convert the below natural language question to SQL query.
    <query>
    What is the name of the client who lives in Russia?
    </query>

    Provide only the SQL query in your response, enclosed in <sql> tags.
    


#### Step 5. Call OpenAI API to get the SQL query

In [16]:
def get_sql_query(input_prompt):
    response = client.responses.create(model='gpt-4o-mini',input=input_prompt)
    return response
                                      
response=get_sql_query(prompt_txt) #Pass the above defined prompt to the API
print(response.output_text)

<sql>SELECT client_name FROM client WHERE country_code = (SELECT country_code FROM country WHERE country_name = 'Russia');</sql>


#### Step 6. SQL to text conversion implementation using a more effective prompting technique called few-shot learning

In [17]:
def get_prompt_fewshot(schema, query):

    examples = '''Example 1: 
    <query>Get the party name from Germany.</query>
    <output>SELECT a.name from party a join country b on a.country_code = b.country_code and a.country_name = 'Germany';</output>

    Example 2:
    user: How many parties are there?
    SQL: SELECT count(1) from clients;

    Example 3:
    user: List all the country names
    SQL: SELECT country_name from country

    Example 4:
    user: List all the party names along with their country names
    SQL: SELECT a.name,b.country_name from client a join country b on a.country_code = b.country_code;</output>'''
    
    return f''' You are an AI assistant that converts natural language questions to SQL queries. Given the below database schema: 
    <schema>
    {schema}
    </schema>

    And the below are some examples of natural language questions and their corresponding SQL:
    <example>
    {examples}
    </example>
    
    Now, convert the below natural language question to SQL query.
    <query>
    {query}
    </query>

    Provide only the SQL query in your response, enclosed in <sql> tags.
    '''

#Check the prompt
prompt_txt=get_prompt_fewshot(schema_def_txt,'What is the name of the client who lives in India?')
print(prompt_txt)

 You are an AI assistant that converts natural language questions to SQL queries. Given the below database schema: 
    <schema>
    ['Table client:\n client_id (INTEGER)\n client_name (TEXT)\n country_code (TEXT)\n', 'Table country:\n country_id (INTEGER)\n country_code (TEXT)\n country_name (TEXT)\n']
    </schema>

    And the below are some examples of natural language questions and their corresponding SQL:
    <example>
    Example 1: 
    <query>Get the party name from Germany.</query>
    <output>SELECT a.name from party a join country b on a.country_code = b.country_code and a.country_name = 'Germany';</output>

    Example 2:
    user: How many parties are there?
    SQL: SELECT count(1) from clients;

    Example 3:
    user: List all the country names
    SQL: SELECT country_name from country

    Example 4:
    user: List all the party names along with their country names
    SQL: SELECT a.name,b.country_name from client a join country b on a.country_code = b.country_code;<

#### Let's pass the new prompt and get the sql

In [18]:
response=get_sql_query(prompt_txt) #Pass the above defined prompt to the API
print(response.output_text)

<sql>SELECT client_name FROM client WHERE country_code = (SELECT country_code FROM country WHERE country_name = 'India');</sql>
