In [1]:
!pip install langchain_openai langchain_community langchain pymysql chromadb -q

In [2]:
import os
from dotenv import load_dotenv
from langchain_community.utilities.sql_database import SQLDatabase

# Load environment variables
load_dotenv()

# Get database credentials from environment variables
db_user = os.getenv("AIVEN_USER")
db_password = os.getenv("AIVEN_PASSWORD")
db_host = os.getenv("AIVEN_HOST")
db_port = os.getenv("AIVEN_PORT")
db_name = os.getenv("AIVEN_DATABASE")

###Building a basic NL2SQL model

In [3]:
from langchain_community.utilities.sql_database import SQLDatabase
# db = SQLDatabase.from_uri(f"mysql+pymysql://{db_user}:{db_password}@{db_host}/{db_name}",sample_rows_in_table_info=1,include_tables=['customers','orders'],custom_table_info={'customers':"customer"})
db = SQLDatabase.from_uri(f"mysql+pymysql://{db_user}:{db_password}@{db_host}:{db_port}/{db_name}", schema=db_name, view_support=True)
print(db.dialect)
print(db.get_usable_table_names())
print(db.table_info)

mysql
['ad_creatives', 'ad_insights', 'ad_set_insights', 'ad_sets', 'ads', 'campaign_actions', 'campaign_insights', 'campaigns']

CREATE TABLE defaultdb.ad_creatives (
	creative_key BIGINT NOT NULL AUTO_INCREMENT, 
	ad_key BIGINT, 
	name VARCHAR(255), 
	title VARCHAR(255), 
	body TEXT, 
	call_to_action_type VARCHAR(50), 
	PRIMARY KEY (creative_key), 
	CONSTRAINT ad_creatives_ibfk_1 FOREIGN KEY(ad_key) REFERENCES defaultdb.ads (ad_key)
)

/*
3 rows from ad_creatives table:
creative_key	ad_key	name	title	body	call_to_action_type
1	1	{{product.name}} 2024-09-14-983ed8e258f06d7f470610a885c37d94	None	None	None
2	2	{{product.name}} 2024-09-14-983ed8e258f06d7f470610a885c37d94	None	None	None
3	3	{{product.name}} 2024-09-14-983ed8e258f06d7f470610a885c37d94	None	None	None
*/


CREATE TABLE defaultdb.ad_insights (
	insight_key BIGINT NOT NULL AUTO_INCREMENT, 
	ad_key BIGINT, 
	impressions INTEGER, 
	clicks INTEGER, 
	spend DECIMAL(10, 2), 
	reach INTEGER, 
	frequency DECIMAL(10, 6), 
	ctr DECIMAL

In [4]:
import os
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

In [5]:
from langchain.chains import create_sql_query_chain
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4o", temperature=0)
generate_query = create_sql_query_chain(llm, db)
query = generate_query.invoke({"question": "How many tables are in the database?"})
print(query)

Question: How many tables are in the database?
SQLQuery: 
```sql
SELECT COUNT(*) AS table_count
FROM information_schema.tables
WHERE table_schema = 'defaultdb';
```


In [6]:
from langchain_community.tools.sql_database.tool import QuerySQLDataBaseTool
execute_query = QuerySQLDataBaseTool(db=db)
execute_query.invoke(query)

'Error: (pymysql.err.ProgrammingError) (1064, "You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near \'Question: How many tables are in the database?\\nSQLQuery: \\n```sql\\nSELECT COUNT(*)\' at line 1")\n[SQL: Question: How many tables are in the database?\nSQLQuery: \n```sql\nSELECT COUNT(*) AS table_count\nFROM information_schema.tables\nWHERE table_schema = \'defaultdb\';\n```]\n(Background on this error at: https://sqlalche.me/e/20/f405)'

In [7]:
chain = generate_query | execute_query
chain.invoke({"question": "How many adsets are there"})

'Error: (pymysql.err.ProgrammingError) (1064, "You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near \'Question: How many adsets are there\\nSQLQuery: \\n```sql\\nSELECT COUNT(*) AS `adset_\' at line 1")\n[SQL: Question: How many adsets are there\nSQLQuery: \n```sql\nSELECT COUNT(*) AS `adset_count`\nFROM `defaultdb.ad_sets`;\n```]\n(Background on this error at: https://sqlalche.me/e/20/f405)'

In [8]:
chain.get_prompts()[0].pretty_print()

You are a MySQL expert. Given an input question, first create a syntactically correct MySQL query to run, then look at the results of the query and return the answer to the input question.
Unless the user specifies in the question a specific number of examples to obtain, query for at most 5 results using the LIMIT clause as per MySQL. You can order the results to return the most informative data in the database.
Never query for all columns from a table. You must query only the columns that are needed to answer the question. Wrap each column name in backticks (`) to denote them as delimited identifiers.
Pay attention to use only the column names you can see in the tables below. Be careful to not query for columns that do not exist. Also, pay attention to which column is in which table.
Pay attention to use CURDATE() function to get the current date, if the question involves "today".

Use the following format:

Question: Question here
SQLQuery: SQL Query to run
SQLResult: Result of the S

In [9]:
from operator import itemgetter

from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough

answer_prompt = PromptTemplate.from_template(
    """Given the following user question, corresponding SQL query, and SQL result, answer the user question.

Question: {question}
SQL Query: {query}
SQL Result: {result}
Answer: """
)

rephrase_answer = answer_prompt | llm | StrOutputParser()

chain = (
    RunnablePassthrough.assign(query=generate_query).assign(
        result=itemgetter("query") | execute_query
    )
    | rephrase_answer
)
print(query)

chain.invoke({"question": "How many adsets are there"})

Question: How many tables are in the database?
SQLQuery: 
```sql
SELECT COUNT(*) AS table_count
FROM information_schema.tables
WHERE table_schema = 'defaultdb';
```


'It appears that there was an error in the SQL query execution due to an incorrect syntax. The error message indicates that the query was not properly formatted, likely because the question and the SQL query were combined in a way that the SQL interpreter could not understand.\n\nTo correct this, you should only execute the SQL query part without including the question or any additional text. Here is the corrected SQL query:\n\n```sql\nSELECT COUNT(*) AS `adset_count` FROM `defaultdb`.`ad_sets`;\n```\n\nPlease execute this corrected query to get the number of adsets.'

###Adding few-shot examples

In [32]:
examples = [
    {
        "input": "Show me the daily performance trends for the last 30 days",
        "accountId": "act_624496083171435",
        "query": """
WITH daily_performance AS (
    SELECT 
        DATE(FROM_UNIXTIME(ci.date_start)) AS date,
        SUM(ci.impressions) AS daily_impressions,
        SUM(ci.unique_clicks) AS daily_clicks,
        SUM(ci.spend) AS daily_spend,
        AVG(ci.unique_ctr) AS daily_ctr,
        AVG(ci.cpc) AS daily_cpc
    FROM campaign_insights ci
    JOIN campaigns c ON ci.campaign_key = c.campaign_key
    WHERE c.account_id = 'act_624496083171435'
        AND ci.date_start >= UNIX_TIMESTAMP(DATE_SUB(CONVERT_TZ(NOW(), '+00:00', '+05:30'), INTERVAL 30 DAY))
    GROUP BY DATE(FROM_UNIXTIME(ci.date_start))
)
SELECT 
    date,
    daily_impressions,
    daily_clicks,
    daily_spend,
    daily_ctr,
    daily_cpc,
    SUM(daily_spend) OVER (ORDER BY date) AS running_total_spend
FROM daily_performance
            ORDER BY date;
        """,
    },
    {
        "input": "Analyze the performance of different optimization goals across all ad sets in the past month",
        "accountId": "act_624496083171435",
        "query": """
SELECT 
    ads.optimization_goal,
    COUNT(DISTINCT ads.adset_key) AS adset_count,
    SUM(asi.impressions) AS total_impressions,
    SUM(asi.unique_clicks) AS total_clicks,
    SUM(asi.spend) AS total_spend,
    AVG(asi.unique_ctr) AS avg_ctr,
    AVG(asi.cpm) AS avg_cpm,
    AVG(asi.cpc) AS avg_cpc,
    SUM(asi.reach) AS total_reach,
    AVG(asi.frequency) AS avg_frequency
FROM ad_sets ads
JOIN ad_set_insights asi ON ads.adset_key = asi.adset_key
WHERE ads.campaign_key IN (
    SELECT campaign_key 
    FROM campaigns 
    WHERE account_id = 'act_624496083171435'
)
AND asi.date_start >= UNIX_TIMESTAMP(DATE_SUB(CONVERT_TZ(NOW(), '+00:00', '+05:30'), INTERVAL 30 DAY))
GROUP BY ads.optimization_goal
ORDER BY avg_ctr DESC, avg_cpc ASC;
        """,
    },
    {
        "input": "What are the top 10 best performing campaign in past 30 days",
        "accountId": "act_624496083171435",
        "query": """
SELECT 
    c.name AS campaign_name,
    c.objective,
    SUM(ci.impressions) AS total_impressions,
    SUM(ci.unique_clicks) AS total_clicks,
    SUM(ci.spend) AS total_spend,
    AVG(ci.unique_ctr) AS avg_ctr,
    AVG(ci.cpm) AS avg_cpm,
    AVG(ci.cpc) AS avg_cpc,
    COUNT(DISTINCT DATE(FROM_UNIXTIME(ci.date_start))) AS days_run
FROM campaigns c
JOIN campaign_insights ci ON c.campaign_key = ci.campaign_key
WHERE c.account_id = 'act_624496083171435'
    AND ci.date_start >= UNIX_TIMESTAMP(DATE_SUB(CONVERT_TZ(NOW(), '+00:00', '+05:30'), INTERVAL 30 DAY))
GROUP BY  c.name, c.objective
ORDER BY avg_ctr DESC , avg_cpm ASC 
LIMIT 10;
""",
    },
    {
        "input": "Provide a comparison of campaign performance grouped by campaign objectives for the past month.",
        "accountId": "act_624496083171435",
        "query": """
SELECT 
    c.objective,
    COUNT(DISTINCT c.name) AS campaign_count,
    SUM(ci.impressions) AS total_impressions,
    SUM(ci.clicks) AS total_clicks,
    SUM(ci.spend) AS total_spend,
    AVG(ci.ctr) AS average_ctr,
    AVG(ci.cpc) AS average_cpc,
    AVG(ci.cpm) AS average_cpm
FROM campaigns c
JOIN campaign_insights ci ON c.campaign_key = ci.campaign_key
WHERE c.account_id = 'act_624496083171435'
    AND ci.date_start >= UNIX_TIMESTAMP(DATE_SUB(CONVERT_TZ(NOW(), '+00:00', '+05:30'), INTERVAL 30 DAY))
GROUP BY c.objective
            ORDER BY total_spend DESC;
        """,
    },
]

In [12]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder,FewShotChatMessagePromptTemplate,PromptTemplate

example_prompt = ChatPromptTemplate.from_messages(
    [
        ("human", "{input}\nSQLQuery:"),
        ("ai", "{query}"),
    ]
)
few_shot_prompt = FewShotChatMessagePromptTemplate(
    example_prompt=example_prompt,
    examples=examples,
    # input_variables=["input","top_k"],
    input_variables=["input"],
)
print(few_shot_prompt.format(input1="How many Adsets are there?"))

Human: Show me the daily performance trends for the last 30 days
SQLQuery:
AI: 
WITH daily_performance AS (
    SELECT 
        DATE(FROM_UNIXTIME(ci.date_start)) AS date,
        SUM(ci.impressions) AS daily_impressions,
        SUM(ci.unique_clicks) AS daily_clicks,
        SUM(ci.spend) AS daily_spend,
        AVG(ci.unique_ctr) AS daily_ctr,
        AVG(ci.cpc) AS daily_cpc
    FROM campaign_insights ci
    JOIN campaigns c ON ci.campaign_key = c.campaign_key
    WHERE c.account_id = 'act_624496083171435'
        AND ci.date_start >= UNIX_TIMESTAMP(DATE_SUB(CONVERT_TZ(NOW(), '+00:00', '+05:30'), INTERVAL 30 DAY))
    GROUP BY DATE(FROM_UNIXTIME(ci.date_start))
)
SELECT 
    date,
    daily_impressions,
    daily_clicks,
    daily_spend,
    daily_ctr,
    daily_cpc,
    SUM(daily_spend) OVER (ORDER BY date) AS running_total_spend
FROM daily_performance
            ORDER BY date;
        
Human: Analyze the performance of different optimization goals across all ad sets in the past m

###Dynamic few-shot example selection

In [13]:
from langchain_chroma import Chroma
from langchain_core.example_selectors import SemanticSimilarityExampleSelector
from langchain_openai import OpenAIEmbeddings

vectorstore = Chroma()
vectorstore.delete_collection()
example_selector = SemanticSimilarityExampleSelector.from_examples(
    examples,
    OpenAIEmbeddings(),
    vectorstore,
    k=2,
    input_keys=["input"],
)
example_selector.select_examples({"input": "how many adsets are there?"})
# example_selector.select_examples({"input": "How many employees?"})

[{'accountId': 'act_624496083171435',
  'input': 'Analyze the performance of different optimization goals across all ad sets in the past month',
  'query': "\nSELECT \n    ads.optimization_goal,\n    COUNT(DISTINCT ads.adset_key) AS adset_count,\n    SUM(asi.impressions) AS total_impressions,\n    SUM(asi.unique_clicks) AS total_clicks,\n    SUM(asi.spend) AS total_spend,\n    AVG(asi.unique_ctr) AS avg_ctr,\n    AVG(asi.cpm) AS avg_cpm,\n    AVG(asi.cpc) AS avg_cpc,\n    SUM(asi.reach) AS total_reach,\n    AVG(asi.frequency) AS avg_frequency\nFROM ad_sets ads\nJOIN ad_set_insights asi ON ads.adset_key = asi.adset_key\nWHERE ads.campaign_key IN (\n    SELECT campaign_key \n    FROM campaigns \n    WHERE account_id = 'act_624496083171435'\n)\nAND asi.date_start >= UNIX_TIMESTAMP(DATE_SUB(CONVERT_TZ(NOW(), '+00:00', '+05:30'), INTERVAL 30 DAY))\nGROUP BY ads.optimization_goal\nORDER BY avg_ctr DESC, avg_cpc ASC;\n        "},
 {'accountId': 'act_624496083171435',
  'input': 'What are the 

In [14]:
few_shot_prompt = FewShotChatMessagePromptTemplate(
    example_prompt=example_prompt,
    example_selector=example_selector,
    input_variables=["input","top_k"],
)
print(few_shot_prompt.format(input="How many adsets are there?"))

Human: Analyze the performance of different optimization goals across all ad sets in the past month
SQLQuery:
AI: 
SELECT 
    ads.optimization_goal,
    COUNT(DISTINCT ads.adset_key) AS adset_count,
    SUM(asi.impressions) AS total_impressions,
    SUM(asi.unique_clicks) AS total_clicks,
    SUM(asi.spend) AS total_spend,
    AVG(asi.unique_ctr) AS avg_ctr,
    AVG(asi.cpm) AS avg_cpm,
    AVG(asi.cpc) AS avg_cpc,
    SUM(asi.reach) AS total_reach,
    AVG(asi.frequency) AS avg_frequency
FROM ad_sets ads
JOIN ad_set_insights asi ON ads.adset_key = asi.adset_key
WHERE ads.campaign_key IN (
    SELECT campaign_key 
    FROM campaigns 
    WHERE account_id = 'act_624496083171435'
)
AND asi.date_start >= UNIX_TIMESTAMP(DATE_SUB(CONVERT_TZ(NOW(), '+00:00', '+05:30'), INTERVAL 30 DAY))
GROUP BY ads.optimization_goal
ORDER BY avg_ctr DESC, avg_cpc ASC;
        
Human: Provide a comparison of campaign performance grouped by campaign objectives for the past month.
SQLQuery:
AI: 
SELECT 
    c

Customizing prompts

In [35]:
final_prompt = ChatPromptTemplate.from_messages(
    [
        (
            """system", "You are a MySQL expert. Given an input question, create a syntactically correct MySQL query to run. Unless otherwise specificed.
         Important notes:
1. Always consider the currency when dealing with monetary values.
2. When comparing or aggregating monetary values, ensure they are in the same currency or use appropriate conversion rates.
3. Include the currency in your SELECT statement when querying monetary values.
4.Do not use ad_set_key in grouping since we will have multiple ad_set_key for a single adset instead use name
5.Always change the timestamp to IST Timezone by using this UNIX_TIMESTAMP(DATE_SUB(CONVERT_TZ(NOW(), '+00:00', '+05:30')))
Below are a number of examples of questions, account IDs, and their corresponding SQL queries\n\nHere is the relevant table info: {table_info}\n\nBelow are a number of examples of questions and their corresponding SQL queries. Those examples are just for referecne and hsould be considered while answering follow up questions"""
        ),
        few_shot_prompt,
        ("human", "{input}"),
    ]
)
print(
    final_prompt.format(
        input="How many adsets are there?", table_info="some table info"
    )
)

Human: system", "You are a MySQL expert. Given an input question, create a syntactically correct MySQL query to run. Unless otherwise specificed.
         Important notes:
1. Always consider the currency when dealing with monetary values.
2. When comparing or aggregating monetary values, ensure they are in the same currency or use appropriate conversion rates.
3. Include the currency in your SELECT statement when querying monetary values.
4.Do not use ad_set_key in grouping since we will have multiple ad_set_key for a single adset instead use name
5.Always change the timestamp to IST Timezone by using this UNIX_TIMESTAMP(DATE_SUB(CONVERT_TZ(NOW(), '+00:00', '+05:30')))
Below are a number of examples of questions, account IDs, and their corresponding SQL queries

Here is the relevant table info: some table info

Below are a number of examples of questions and their corresponding SQL queries. Those examples are just for referecne and hsould be considered while answering follow up questio

In [16]:
generate_query = create_sql_query_chain(llm, db,final_prompt)
chain = (
RunnablePassthrough.assign(query=generate_query).assign(
    result=itemgetter("query") | execute_query
)
| rephrase_answer
)
chain.invoke({"question": "How many ads are there"})

'There are 1,335 ads.'

###Dynamic relevant table selection

In [22]:
from operator import itemgetter
from langchain.chains.openai_tools import create_extraction_chain_pydantic
from langchain_core.pydantic_v1 import BaseModel, Field
from typing import List
import pandas as pd


def load_schema_from_csv(file_path):
    schema = {}
    with open(file_path, "r", newline="", encoding="utf-8") as csvfile:
        reader = csv.DictReader(csvfile)
        headers = reader.fieldnames
        print(headers)

        # Check for alternative column names
        table_col = next((col for col in headers if "table" in col.lower()), None)
        column_col = next((col for col in headers if "column" in col.lower()), None)
        data_type_col = next(
            (col for col in headers if "data type" in col.lower()), None
        )
        description_col = next(
            (col for col in headers if "description" in col.lower()), None
        )
        primary_key_col = next(
            (col for col in headers if "primary key" in col.lower()), None
        )
        foreign_key_col = next(
            (col for col in headers if "foreign key" in col.lower()), None
        )

        if not all(
            [
                table_col,
                column_col,
                data_type_col,
                description_col,
                primary_key_col,
                foreign_key_col,
            ]
        ):
            raise ValueError("CSV file is missing required columns")

        for row in reader:
            table_name = row[table_col]
            if table_name not in schema:
                schema[table_name] = []
            schema[table_name].append(
                {
                    "Column Name": row[column_col],
                    "Data Type": row[data_type_col],
                    "Description": row[description_col],
                    "Is Primary Key": row[primary_key_col],
                    "Foreign Key Reference": row[foreign_key_col],
                }
            )
    return schema


def format_schema_for_prompt(schema):
    formatted_schema = ""
    for table_name, columns in schema.items():
        formatted_schema += f"Table: {table_name}\n"
        for column in columns:
            formatted_schema += f"  - {column['Column Name']} ({column['Data Type']}): {column['Description']}"
            if column["Is Primary Key"] == "Yes":
                formatted_schema += " (Primary Key)"
            if column["Foreign Key Reference"]:
                formatted_schema += f" (Foreign Key: {column['Foreign Key Reference']})"
            formatted_schema += "\n"
        formatted_schema += "\n"
    return formatted_schema


class Table(BaseModel):
    """Table in SQL database."""

    name: str = Field(description="Name of table in SQL database.")

# table_names = "\n".join(db.get_usable_table_names())
file_path = r"C:\Sreerag\gen-ai\loading\sample.csv"
schema = load_schema_from_csv(file_path)
table_details = format_schema_for_prompt(schema)
print(table_details)

['Table Name,Column Name,Data Type,Description,Is Primary Key,Foreign Key Reference']
Table: campaigns,campaign_key,BIGINT,"Unique auto-incrementing identifier for each campaign used as the primary key for efficient querying and relationships",Yes,
  - campaigns,campaign_key,BIGINT,"Unique auto-incrementing identifier for each campaign used as the primary key for efficient querying and relationships",Yes, (campaigns,campaign_key,BIGINT,"Unique auto-incrementing identifier for each campaign used as the primary key for efficient querying and relationships",Yes,): campaigns,campaign_key,BIGINT,"Unique auto-incrementing identifier for each campaign used as the primary key for efficient querying and relationships",Yes, (Foreign Key: campaigns,campaign_key,BIGINT,"Unique auto-incrementing identifier for each campaign used as the primary key for efficient querying and relationships",Yes,)

Table: campaigns,id,BIGINT,"Facebook's campaign ID, which may repeat across different date ranges for th

In [36]:
table_details_prompt = f"""Return the names of ALL the SQL tables that MIGHT be relevant to the user question. \
The tables are:

{table_details}

Remember to include ALL POTENTIALLY RELEVANT tables, even if you're not sure that they're needed."""

table_chain = create_extraction_chain_pydantic(Table, llm, system_message=table_details_prompt)
tables = table_chain.invoke({"input": "give me details of ads and their insights"})
tables

[Table(name='ads'), Table(name='ad_insights')]

In [37]:
def get_tables(tables: List[Table]) -> List[str]:
    tables  = [table.name for table in tables]
    return tables

select_table = {"input": itemgetter("question")} | create_extraction_chain_pydantic(Table, llm, system_message=table_details_prompt) | get_tables
select_table.invoke({"question": "give me details of campaign and their insights"})

KeyboardInterrupt: 

In [25]:
chain = (
RunnablePassthrough.assign(table_names_to_use=select_table) |
RunnablePassthrough.assign(query=generate_query).assign(
    result=itemgetter("query") | execute_query
)
| rephrase_answer
)
chain.invoke({"question": "Adsets with most impressions"})


'The adsets with the most impressions are as follows:\n\n1. **MM_Static_olympic_Adset** with 45,971 impressions\n2. **MM_Static_olympic_Adset** with 44,016 impressions\n3. **MM_Static_Ways_Adset** with 33,001 impressions\n4. **UGC_Video_Adset_Insta** with 31,258 impressions\n5. **UGC_Video_Adset_Insta** with 30,884 impressions\n6. **UGC_Video_Adset_Insta** with 30,292 impressions\n7. **UGC_Video_Adset_Insta** with 28,368 impressions\n8. **MM_Static_Ways_Adset** with 27,328 impressions\n9. **MM_Static_olympic_Adset** with 26,606 impressions\n10. **MM_Static_Ways_Adset** with 26,208 impressions'

In [None]:
chain.invoke({"question": "Can you list their names?"})

"The names of the customers in France with a credit limit greater than $20,000 are: Atelier graphique, La Rochelle Gifts, Saveley & Henriot, Co., Daedalus Designs Imports, La Corne D'abondance, Co., Mini Caravy, Alpha Cognac, Lyon Souveniers, Auto Associés & Cie., Marseille Mini Autos, Reims Collectables, and Auto Canal+ Petit."

###Adding memory to the chatbot so that it answers follow-up questions related to the database.






In [30]:
final_prompt = ChatPromptTemplate.from_messages(
    [
        (
            """system", "You are a MySQL expert. Given an input question, create a syntactically correct MySQL query to run. Unless otherwise specificed.
         Important notes:
1. Always consider the currency when dealing with monetary values.
2. When comparing or aggregating monetary values, ensure they are in the same currency or use appropriate conversion rates.
3. Include the currency in your SELECT statement when querying monetary values.
4.Do not use ad_set_key in grouping since we will have multiple ad_set_key for a single adset instead use name
5.Always change the timestamp to IST Timezone by using this UNIX_TIMESTAMP(DATE_SUB(CONVERT_TZ(NOW(), '+00:00', '+05:30')))
Below are a number of examples of questions, account IDs, and their corresponding SQL queries\n\nHere is the relevant table info: {table_info}\n\nBelow are a number of examples of questions and their corresponding SQL queries. Those examples are just for referecne and hsould be considered while answering follow up questions"""
        ),
        few_shot_prompt,
        MessagesPlaceholder(variable_name="messages"),
        ("human", "{input}"),
    ]
)
print(
    final_prompt.format(
        input="How many campaigns are there?", table_info="some table info", messages=[]
    )
)

Human: system", "You are a MySQL expert. Given an input question, create a syntactically correct MySQL query to run. Unless otherwise specificed.
         Important notes:
1. Always consider the currency when dealing with monetary values. The currency is stored in the 'currency' column of the 'ads' and 'campaigns' tables.
2. When comparing or aggregating monetary values, ensure they are in the same currency or use appropriate conversion rates.
3. Include the currency in your SELECT statement when querying monetary values.
4.Do not use ad_set_key in grouping since we will have multiple ad_set_key for a single adset instead use name
5.Always change the timestamp to IST Timezone by using this UNIX_TIMESTAMP(DATE_SUB(CONVERT_TZ(NOW(), '+00:00', '+05:30')))
Below are a number of examples of questions, account IDs, and their corresponding SQL queries

Here is the relevant table info: some table info

Below are a number of examples of questions and their corresponding SQL queries. Those examp

In [41]:
from langchain.memory import ChatMessageHistory
history = ChatMessageHistory()

generate_query = create_sql_query_chain(llm, db,final_prompt)
execute_query = QuerySQLDataBaseTool(db=db)


def process_question(question):
    # Generate the query
    generated_query = generate_query.invoke(
        {
            "question": question,
            "table_info": db.table_info,
            "messages": history.messages,
        }
    )

    # Clean the query
    clean_query = (
        generated_query.strip().replace("```sql", "").replace("```", "").strip()
    )

    try:
        # Execute the query
        result = execute_query.invoke(clean_query)

        # Generate the answer
        answer = rephrase_answer.invoke(
            {
                "question": question,
                "query": clean_query,
                "result": result,
            }
        )

        # Add the interaction to the history
        history.add_user_message(question)
        history.add_ai_message(answer)

        return clean_query, answer
    except Exception as e:
        error_message = f"An error occurred: {str(e)}"
        history.add_user_message(question)
        history.add_ai_message(error_message)
        return clean_query, error_message


# Example usage
question = "Best performing campaign this month"
# account_id = "act_624496083171435"

query, answer = process_question(question)
print("Generated SQL query:")
print(query)
print("\nInterpreted answer:")
print(answer)

Generated SQL query:
SELECT 
    c.name AS campaign_name,
    c.objective,
    SUM(ci.impressions) AS total_impressions,
    SUM(ci.unique_clicks) AS total_clicks,
    SUM(ci.spend) AS total_spend,
    AVG(ci.unique_ctr) AS avg_ctr,
    AVG(ci.cpm) AS avg_cpm,
    AVG(ci.cpc) AS avg_cpc
FROM campaigns c
JOIN campaign_insights ci ON c.campaign_key = ci.campaign_key
WHERE c.account_id = 'act_624496083171435'
    AND MONTH(FROM_UNIXTIME(ci.date_start)) = MONTH(CONVERT_TZ(NOW(), '+00:00', '+05:30'))
    AND YEAR(FROM_UNIXTIME(ci.date_start)) = YEAR(CONVERT_TZ(NOW(), '+00:00', '+05:30'))
GROUP BY c.name, c.objective
ORDER BY avg_ctr DESC, avg_cpm ASC
LIMIT 1;

Interpreted answer:
The best performing campaign this month is "MM_Static_Blinkit_01092024" with the following performance metrics:
- Objective: OUTCOME_LEADS
- Total Impressions: 175,293
- Total Clicks: 4,085
- Total Spend: $7,274.41
- Average CTR (Click-Through Rate): 2.49%
- Average CPM (Cost Per Thousand Impressions): $42.66
- Ave