Database Credentials


In [1]:
import os

from dotenv import load_dotenv

load_dotenv()

True

In [2]:
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

In [3]:
import os
from dotenv import load_dotenv
from langchain_community.utilities.sql_database import SQLDatabase

# Load environment variables
load_dotenv()

# Get database credentials from environment variables
db_user = os.getenv("AIVEN_USER")
db_password = os.getenv("AIVEN_PASSWORD")
db_host = os.getenv("AIVEN_HOST")
db_port = os.getenv("AIVEN_PORT")
db_name = os.getenv("AIVEN_DATABASE")

# Create database connection
db = SQLDatabase.from_uri(f"mysql+pymysql://{db_user}:{db_password}@{db_host}:{db_port}/{db_name}", schema=db_name, view_support=True)
print(db.dialect)
print(db.get_usable_table_names())
print(db.table_info)

mysql
['ad_actions', 'ad_creatives', 'ad_insights', 'ad_set_insights', 'ad_sets', 'ads', 'campaign_actions', 'campaign_insights', 'campaigns']

CREATE TABLE defaultdb.ad_actions (
	id INTEGER NOT NULL AUTO_INCREMENT, 
	ad_id BIGINT NOT NULL, 
	action_type VARCHAR(255) NOT NULL, 
	value VARCHAR(255) NOT NULL, 
	PRIMARY KEY (id), 
	CONSTRAINT ad_actions_ibfk_1 FOREIGN KEY(ad_id) REFERENCES defaultdb.ads (id)
)

/*
3 rows from ad_actions table:
id	ad_id	action_type	value
1	120215785378420311	landing_page_view	41
2	120215785378420311	post_engagement	3770
3	120215785378420311	page_engagement	3770
*/


CREATE TABLE defaultdb.ad_creatives (
	id BIGINT NOT NULL, 
	ad_id BIGINT NOT NULL, 
	name VARCHAR(255), 
	title VARCHAR(255), 
	call_to_action_type VARCHAR(50), 
	description TEXT, 
	PRIMARY KEY (id, ad_id), 
	CONSTRAINT ad_creatives_ibfk_1 FOREIGN KEY(ad_id) REFERENCES defaultdb.ads (id)
)

/*
3 rows from ad_creatives table:
id	ad_id	name	title	call_to_action_type	description
120213321698260

In [4]:
from langchain.chains import create_sql_query_chain
from langchain_openai import ChatOpenAI

llm=ChatOpenAI(model_name="gpt-4o", temperature=0)
generate_query_chain=create_sql_query_chain(llm, db)

query=generate_query_chain.invoke({"question":"which campaign has the most clicks?"})

print(query)


```sql
SELECT `campaigns`.`name`, `campaign_insights`.`clicks`
FROM `defaultdb`.`campaign_insights`
JOIN `defaultdb`.`campaigns` ON `campaign_insights`.`campaign_id` = `campaigns`.`id`
ORDER BY `campaign_insights`.`clicks` DESC
LIMIT 1;
```


In [5]:
from langchain_community.tools.sql_database.tool import QuerySQLDataBaseTool
execute_query = QuerySQLDataBaseTool(db=db)
execute_query.invoke(query)


'Error: (pymysql.err.ProgrammingError) (1064, "You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near \'```sql\\nSELECT `campaigns`.`name`, `campaign_insights`.`clicks`\\nFROM `defaultdb`.\' at line 1")\n[SQL: ```sql\nSELECT `campaigns`.`name`, `campaign_insights`.`clicks`\nFROM `defaultdb`.`campaign_insights`\nJOIN `defaultdb`.`campaigns` ON `campaign_insights`.`campaign_id` = `campaigns`.`id`\nORDER BY `campaign_insights`.`clicks` DESC\nLIMIT 1;\n```]\n(Background on this error at: https://sqlalche.me/e/20/f405)'

In [6]:
chain= generate_query_chain | execute_query
chain.invoke({"question":"Which campaign has the most clicks?"})

'Error: (pymysql.err.ProgrammingError) (1064, "You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near \'Question: Which campaign has the most clicks?\\nSQLQuery: \\n```sql\\nSELECT `campaign\' at line 1")\n[SQL: Question: Which campaign has the most clicks?\nSQLQuery: \n```sql\nSELECT `campaign_id`, `clicks`\nFROM `defaultdb.campaign_insights`\nORDER BY `clicks` DESC\nLIMIT 1;\n```]\n(Background on this error at: https://sqlalche.me/e/20/f405)'

In [7]:
chain.get_prompts()[0].pretty_print()

You are a MySQL expert. Given an input question, first create a syntactically correct MySQL query to run, then look at the results of the query and return the answer to the input question.
Unless the user specifies in the question a specific number of examples to obtain, query for at most 5 results using the LIMIT clause as per MySQL. You can order the results to return the most informative data in the database.
Never query for all columns from a table. You must query only the columns that are needed to answer the question. Wrap each column name in backticks (`) to denote them as delimited identifiers.
Pay attention to use only the column names you can see in the tables below. Be careful to not query for columns that do not exist. Also, pay attention to which column is in which table.
Pay attention to use CURDATE() function to get the current date, if the question involves "today".

Use the following format:

Question: Question here
SQLQuery: SQL Query to run
SQLResult: Result of the S

In [8]:
from operator import itemgetter

from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough

answer_prompt = PromptTemplate.from_template(
    """Given the following user question, corresponding SQL query, and SQL result, provide a comprehensive answer following the guidelines below:

You are an expert Meta ads analyst and SQL query specialist. Your task is to interpret user questions about Meta ad performance, analyze SQL query results (including multi-row and multi-column data), and provide comprehensive, data-driven answers.

Follow these steps:
1. Analyze the user's question to understand the core topic and intent.
2. Carefully examine the SQL result, noting the number of rows and columns.
3. If the result is tabular (multiple rows/columns):
   a) Summarize the overall structure of the data (e.g., "The result shows data for 5 ads across 3 metrics").
   b) Identify and highlight key trends or patterns in the data.
   c) Mention the top 3-5 rows or most significant data points, providing context.
   d) Compare and contrast different rows or columns as relevant.
4. Interpret the query results, focusing on key Meta advertising metrics (e.g., CTR, CPC, ROAS, Frequency, Reach).
5. Provide a clear, actionable answer structured as follows:
   a) Summary of findings
   b) Detailed analysis with specific metrics and comparisons
   c) Performance insights and their implications
   d) At least two actionable recommendations based on the data
   e) Suggestions for follow-up analyses or questions
6. Relate your analysis to common Meta advertising objectives (e.g., awareness, consideration, conversion).
7. Consider the impact on different parts of the advertising funnel.

Remember to:
- Use ALL the data available in the query results, not just the top row.
- For tabular data, provide a holistic interpretation that covers the entire dataset.
- Clearly state any assumptions or limitations in your analysis.
- Maintain a professional yet conversational tone.
- Prioritize accuracy, relevance, and actionable insights.
- Use Meta-specific terminology where appropriate (e.g., ad sets, campaigns, placements).
- Consider the broader context of the Meta ads ecosystem (e.g., algorithm learning, audience saturation).
- If the data spans a time period, note any temporal trends or changes.

Question: {question}
SQL Query: {query}
SQL Result: {result}

Answer: """
)

rephrase_answer = answer_prompt | llm | StrOutputParser()

chain = (
    RunnablePassthrough.assign(query=generate_query_chain).assign(
        result=itemgetter("query") | execute_query
     )
     | rephrase_answer
 )

chain.invoke({"question": "Ad with the most clicks?"})

'### Summary of Findings\nThe SQL query intended to identify the ad with the most clicks encountered a syntax error and did not execute successfully. Therefore, we do not have the data needed to determine which ad received the most clicks.\n\n### Detailed Analysis\n1. **SQL Query Error**: The error message indicates a syntax issue in the SQL query. Specifically, the inclusion of backticks and the `SQLQuery:` prefix within the query string is causing the problem.\n2. **Corrected Query**: To resolve this, the query should be simplified to:\n   ```sql\n   SELECT `ad_id`, `clicks`\n   FROM `defaultdb.ad_insights`\n   ORDER BY `clicks` DESC\n   LIMIT 1;\n   ```\n\n### Performance Insights and Implications\nSince the query did not execute, we cannot provide insights into the ad performance based on clicks. However, understanding which ad has the most clicks is crucial for evaluating engagement and the effectiveness of your ad creatives and targeting.\n\n### Actionable Recommendations\n1. **C

In [10]:
examples = [
    {
        "input": "What are the top 5 ads by CTR and their spend in the last 30 days?",
        "accountId": "act_624496083171435",
        "query": """
            SELECT 
                a.name AS ad_name,
                ai.clicks / ai.impressions * 100 AS ctr,
                ai.impressions,
                ai.clicks,
                ai.spend,
                a.currency,
                FROM_UNIXTIME(ai.date_start) AS date_start,
                FROM_UNIXTIME(ai.date_stop) AS date_stop
            FROM ads a
            JOIN ad_insights ai ON a.id = ai.ad_id
            WHERE a.account_id = 'act_624496083171435'
                AND ai.date_start >= UNIX_TIMESTAMP(DATE_SUB(CURDATE(), INTERVAL 30 DAY))
            ORDER BY ctr DESC
            LIMIT 5;
        """,
    },
    {
        "input": "Compare the performance of different campaign objectives in terms of spend and conversions",
        "accountId": "act_624496083171435",
        "query": """
            SELECT 
                c.objective,
                SUM(ci.spend) AS total_spend,
                c.currency,
                SUM(ca.value) AS total_conversions,
                AVG(ci.ctr) AS avg_ctr,
                AVG(ci.cpm) AS avg_cpm
            FROM campaigns c
            JOIN campaign_insights ci ON c.id = ci.campaign_id
            LEFT JOIN campaign_actions ca ON ci.id = ca.campaign_insight_id
            WHERE c.account_id = 'act_624496083171435'
            GROUP BY c.objective, c.currency
            ORDER BY total_spend DESC;
        """,
    },
    {
        "input": "What is the daily performance trend of our top-spending campaign in the last week?",
        "accountId": "act_624496083171435",
        "query": """
            WITH top_campaign AS (
                SELECT campaign_id
                FROM campaign_insights
                WHERE date_start >= UNIX_TIMESTAMP(DATE_SUB(CURDATE(), INTERVAL 7 DAY))
                GROUP BY campaign_id
                ORDER BY SUM(spend) DESC
                LIMIT 1
            )
            SELECT 
                c.name AS campaign_name,
                FROM_UNIXTIME(ci.date_start) AS date_start,
                ci.impressions,
                ci.clicks,
                ci.spend,
                c.currency,
                ci.ctr,
                ci.cpm,
                ca.value AS conversions
            FROM campaigns c
            JOIN campaign_insights ci ON c.id = ci.campaign_id
            LEFT JOIN campaign_actions ca ON ci.id = ca.campaign_insight_id
            WHERE c.id = (SELECT campaign_id FROM top_campaign)
                AND c.account_id = 'act_624496083171435'
                AND ci.date_start >= UNIX_TIMESTAMP(DATE_SUB(CURDATE(), INTERVAL 7 DAY))
            ORDER BY ci.date_start;
        """,
    },
    {
        "input": "Which ad creative type has the highest conversion rate and what's the total spend for each?",
        "accountId": "act_624496083171435",
        "query": """
            SELECT 
                ac.call_to_action_type,
                SUM(ai.clicks) AS total_clicks,
                SUM(ca.value) AS total_conversions,
                SUM(ca.value) / SUM(ai.clicks) * 100 AS conversion_rate,
                SUM(ai.spend) AS total_spend,
                a.currency
            FROM ad_creatives ac
            JOIN ads a ON ac.ad_id = a.id
            JOIN ad_insights ai ON a.id = ai.ad_id
            JOIN campaigns c ON a.campaign_id = c.id
            LEFT JOIN campaign_insights ci ON c.id = ci.campaign_id
            LEFT JOIN campaign_actions ca ON ci.id = ca.campaign_insight_id
            WHERE c.account_id = 'act_624496083171435'
            GROUP BY ac.call_to_action_type, a.currency
            ORDER BY conversion_rate DESC;
        """,
    },
]

In [11]:
from langchain_core.prompts import ChatPromptTemplate,MessagesPlaceholder,FewShotChatMessagePromptTemplate,PromptTemplate

example_prompt = ChatPromptTemplate.from_messages(
    [
        ("human", "{input}\nAccount ID: {accountId}\nSQL Query:"),
        ("ai", "{query}")
    ]
)

few_shot_prompt=FewShotChatMessagePromptTemplate(
    example_prompt=example_prompt,
    examples=examples,
    input_variables=["input"]
)

print(few_shot_prompt.invoke({"input":"Which ads has the highest CTR?"}))





messages=[HumanMessage(content='What are the top 5 ads by CTR and their spend in the last 30 days?\nAccount ID: act_624496083171435\nSQL Query:'), AIMessage(content="\n            SELECT \n                a.name AS ad_name,\n                ai.clicks / ai.impressions * 100 AS ctr,\n                ai.impressions,\n                ai.clicks,\n                ai.spend,\n                a.currency,\n                FROM_UNIXTIME(ai.date_start) AS date_start,\n                FROM_UNIXTIME(ai.date_stop) AS date_stop\n            FROM ads a\n            JOIN ad_insights ai ON a.id = ai.ad_id\n            WHERE a.account_id = 'act_624496083171435'\n                AND ai.date_start >= UNIX_TIMESTAMP(DATE_SUB(CURDATE(), INTERVAL 30 DAY))\n            ORDER BY ctr DESC\n            LIMIT 5;\n        "), HumanMessage(content='Compare the performance of different campaign objectives in terms of spend and conversions\nAccount ID: act_624496083171435\nSQL Query:'), AIMessage(content="\n           

In [12]:
from langchain_community.vectorstores import Chroma
from langchain_core.example_selectors import SemanticSimilarityExampleSelector
from langchain_openai import OpenAIEmbeddings

vectorstore = Chroma()
vectorstore.delete_collection()
example_selector = SemanticSimilarityExampleSelector.from_examples(
     examples,
     OpenAIEmbeddings(),
     vectorstore,
     k=2,
     input_keys=["input"],
 )
example_selector.select_examples({"input": "how many ads we have?"})
few_shot_prompt = FewShotChatMessagePromptTemplate(
     example_prompt=example_prompt,
     example_selector=example_selector,
     input_variables=["input","top_k"],
 )
print(few_shot_prompt.format(input="How many ads are there?"))

  warn_deprecated(


Human: What are the top 5 ads by CTR and their spend in the last 30 days?
Account ID: act_624496083171435
SQL Query:
AI: 
            SELECT 
                a.name AS ad_name,
                ai.clicks / ai.impressions * 100 AS ctr,
                ai.impressions,
                ai.clicks,
                ai.spend,
                a.currency,
                FROM_UNIXTIME(ai.date_start) AS date_start,
                FROM_UNIXTIME(ai.date_stop) AS date_stop
            FROM ads a
            JOIN ad_insights ai ON a.id = ai.ad_id
            WHERE a.account_id = 'act_624496083171435'
                AND ai.date_start >= UNIX_TIMESTAMP(DATE_SUB(CURDATE(), INTERVAL 30 DAY))
            ORDER BY ctr DESC
            LIMIT 5;
        
Human: Which ad creative type has the highest conversion rate and what's the total spend for each?
Account ID: act_624496083171435
SQL Query:
AI: 
            SELECT 
                ac.call_to_action_type,
                SUM(ai.clicks) AS total_clicks,
  

In [16]:
final_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """You are a MySQL expert. Given an input question and an account ID, create a syntactically correct MySQL query to run. Always include the account_id in your WHERE clause for filtering.

Here is the relevant table info: {table_info}

Important notes:
1. Always consider the currency when dealing with monetary values. The currency is stored in the 'currency' column of the 'ads' and 'campaigns' tables.
2. When comparing or aggregating monetary values, ensure they are in the same currency or use appropriate conversion rates.
3. Include the currency in your SELECT statement when querying monetary values.

Below are a number of examples of questions, account IDs, and their corresponding SQL queries.""",
        ),
        few_shot_prompt,
        ("human", "{input}\nAccount ID: {accountId}"),
    ]
)

generate_query = create_sql_query_chain(llm, db, final_prompt)

chain = (

    RunnablePassthrough.assign(query=generate_query).assign(

        result=itemgetter("query") | execute_query
    )

    | rephrase_answer
)


generated_query = generate_query.invoke(
    {
        "question": "How are my ads performing?",
        "accountId": "act_624496083171435",
    }
)

print("Generated SQL query:")
print(generated_query)

clean_query = generated_query.strip().replace('```sql', '').replace('```', '').strip()


try:

    result = execute_query.invoke(clean_query)

    print("\nQuery result:")
    print(result)


    # Process the result with the answer prompt

    answer = rephrase_answer.invoke(

        {

            "question": "How are my ads performing?",

            "query": clean_query,

            "result": result,

        }
    )

    print("\nInterpreted answer:")

    print(answer)


except Exception as e:

    print(f"\nAn error occurred: {str(e)}")

    import traceback

    traceback.print_exc()

Generated SQL query:
SELECT 
                a.name AS ad_name,
                ai.impressions,
                ai.clicks,
                ai.spend,
                a.currency,
                ai.ctr,
                ai.cpm,
                ai.cpp,
                ai.date_start,
                ai.date_stop
            FROM ads a
            JOIN ad_insights ai ON a.id = ai.ad_id
            WHERE a.account_id = 'act_624496083171435'
            ORDER BY ai.date_start DESC;

Query result:
[('Motion(C)_Video_Ad_Insta', 19914, 168, Decimal('591.69'), 'INR', Decimal('0.843628'), Decimal('29.712263'), Decimal('30.225276'), 1725042600, 1725042600), ('MM_Motion-3_Video_Ad', 20039, 118, Decimal('770.09'), 'INR', Decimal('0.588852'), Decimal('38.429562'), Decimal('40.946988'), 1725042600, 1725042600), ('MM_static_Ways_Ad', 23954, 157, Decimal('758.87'), 'INR', Decimal('0.655423'), Decimal('31.680304'), Decimal('33.442182'), 1725042600, 1725042600), ('MM_static_Olympic_Ad', 26957, 284, Decimal(