Database Credentials


In [1]:
import os

from dotenv import load_dotenv

load_dotenv()

True

In [2]:
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

In [3]:
import os
from dotenv import load_dotenv
from langchain_community.utilities.sql_database import SQLDatabase

# Load environment variables
load_dotenv()

# Get database credentials from environment variables
db_user = os.getenv("AIVEN_USER")
db_password = os.getenv("AIVEN_PASSWORD")
db_host = os.getenv("AIVEN_HOST")
db_port = os.getenv("AIVEN_PORT")
db_name = os.getenv("AIVEN_DATABASE")

# Create database connection
db = SQLDatabase.from_uri(f"mysql+pymysql://{db_user}:{db_password}@{db_host}:{db_port}/{db_name}", schema=db_name, view_support=True)
print(db.dialect)
print(db.get_usable_table_names())
print(db.table_info)

mysql
['ad_creatives', 'ad_insights', 'ad_set_insights', 'ad_sets', 'ads', 'campaign_actions', 'campaign_insights', 'campaigns']

CREATE TABLE defaultdb.ad_creatives (
	creative_key BIGINT NOT NULL AUTO_INCREMENT, 
	ad_key BIGINT, 
	name VARCHAR(255), 
	title VARCHAR(255), 
	body TEXT, 
	call_to_action_type VARCHAR(50), 
	PRIMARY KEY (creative_key), 
	CONSTRAINT ad_creatives_ibfk_1 FOREIGN KEY(ad_key) REFERENCES defaultdb.ads (ad_key)
)

/*
3 rows from ad_creatives table:
creative_key	ad_key	name	title	body	call_to_action_type
1	1	Subscribe for FREE today 2023-12-23-3dca6e1e13c15e06f7e1773d0acb90cc	Subscribe for FREE today	📈 Unveil Strategic Marketing Insights! 🌐

Dive deep into Nykaa's Omnichannel Mastery and Bollywood's	SUBSCRIBE
2	2	Marketing Monk 2023-09-20-157a22d6bba9885c95c2ae7f35892134	Marketing Monk	📈 Unveil Strategic Marketing Insights! 🌐

Dive deep into Nykaa's Omnichannel Mastery and Bollywood's	SUBSCRIBE
3	3	Subscribe today and join 10,000+ readers 2023-09-18-99efc5b686974

In [4]:
from langchain.chains import create_sql_query_chain
from langchain_anthropic import ChatAnthropic


llm = ChatAnthropic(
    model="claude-3-5-sonnet-20240620",  # Ensure the model name is correct
    temperature=0,
    max_tokens=1024,
    timeout=None,  # Optional: set a timeout if needed
    max_retries=2,  # Optional: set the number of retries
)
generate_query_chain=create_sql_query_chain(llm, db)

query=generate_query_chain.invoke({"question":"which campaign has the most clicks?"})

print(query)

To find the campaign with the most clicks, we need to query the campaign_insights table and join it with the campaigns table to get the campaign names. Here's the query:

```sql
SELECT c.`name`, ci.`clicks`
FROM defaultdb.campaign_insights ci
JOIN defaultdb.campaigns c ON ci.`campaign_key` = c.`campaign_key`
ORDER BY ci.`clicks` DESC
LIMIT 1;
```

Let's run this query and see the result.


In [5]:
from langchain_community.tools.sql_database.tool import QuerySQLDataBaseTool
execute_query = QuerySQLDataBaseTool(db=db)
execute_query.invoke(query)


'Error: (pymysql.err.ProgrammingError) (1064, "You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near \'To find the campaign with the most clicks, we need to query the campaign_insight\' at line 1")\n[SQL: To find the campaign with the most clicks, we need to query the campaign_insights table and join it with the campaigns table to get the campaign names. Here\'s the query:\n\n```sql\nSELECT c.`name`, ci.`clicks`\nFROM defaultdb.campaign_insights ci\nJOIN defaultdb.campaigns c ON ci.`campaign_key` = c.`campaign_key`\nORDER BY ci.`clicks` DESC\nLIMIT 1;\n```\n\nLet\'s run this query and see the result.]\n(Background on this error at: https://sqlalche.me/e/20/f405)'

In [6]:
chain= generate_query_chain | execute_query
chain.invoke({"question":"Which campaign has the most clicks?"})

'Error: (pymysql.err.ProgrammingError) (1064, "You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near \'To find the campaign with the most clicks, we need to query the campaign_insight\' at line 1")\n[SQL: To find the campaign with the most clicks, we need to query the campaign_insights table and join it with the campaigns table to get the campaign names. Here\'s the query:\n\n```sql\nSELECT c.`name`, ci.`clicks`\nFROM defaultdb.campaign_insights ci\nJOIN defaultdb.campaigns c ON ci.`campaign_key` = c.`campaign_key`\nORDER BY ci.`clicks` DESC\nLIMIT 1;\n```\n\nLet\'s run this query and see the result.]\n(Background on this error at: https://sqlalche.me/e/20/f405)'

In [7]:
chain.get_prompts()[0].pretty_print()

You are a MySQL expert. Given an input question, first create a syntactically correct MySQL query to run, then look at the results of the query and return the answer to the input question.
Unless the user specifies in the question a specific number of examples to obtain, query for at most 5 results using the LIMIT clause as per MySQL. You can order the results to return the most informative data in the database.
Never query for all columns from a table. You must query only the columns that are needed to answer the question. Wrap each column name in backticks (`) to denote them as delimited identifiers.
Pay attention to use only the column names you can see in the tables below. Be careful to not query for columns that do not exist. Also, pay attention to which column is in which table.
Pay attention to use CURDATE() function to get the current date, if the question involves "today".

Use the following format:

Question: Question here
SQLQuery: SQL Query to run
SQLResult: Result of the S

In [8]:
from operator import itemgetter

from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough

answer_prompt = PromptTemplate.from_template(
    """Given the following user question, corresponding SQL query, and SQL result, provide a comprehensive answer following the guidelines below:

You are an expert Meta ads analyst and SQL query specialist. Your task is to interpret user questions about Meta ad performance, analyze SQL query results (including multi-row and multi-column data), and provide comprehensive, data-driven answers.

Follow these steps:
1. Analyze the user's question to understand the core topic and intent.
2. Carefully examine the SQL result, noting the number of rows and columns.
3. If the result is tabular (multiple rows/columns):
   a) Summarize the overall structure of the data (e.g., "The result shows data for 5 ads across 3 metrics").
   b) Identify and highlight key trends or patterns in the data.
   c) Mention the top 3-5 rows or most significant data points, providing context.
   d) Compare and contrast different rows or columns as relevant.
4. Interpret the query results, focusing on key Meta advertising metrics (e.g., CTR, CPC, ROAS, Frequency, Reach).
5. Provide a clear, actionable answer structured as follows:
   a) Summary of findings
   b) Detailed analysis with specific metrics and comparisons
   c) Performance insights and their implications
   d) At least two actionable recommendations based on the data
   e) Suggestions for follow-up analyses or questions
6. Relate your analysis to common Meta advertising objectives (e.g., awareness, consideration, conversion).
7. Consider the impact on different parts of the advertising funnel.

Remember to:
- Use ALL the data available in the query results, not just the top row.
- For tabular data, provide a holistic interpretation that covers the entire dataset.
- Clearly state any assumptions or limitations in your analysis.
- Maintain a professional yet conversational tone.
- Prioritize accuracy, relevance, and actionable insights.
- Use Meta-specific terminology where appropriate (e.g., ad sets, campaigns, placements).
- Consider the broader context of the Meta ads ecosystem (e.g., algorithm learning, audience saturation).
- If the data spans a time period, note any temporal trends or changes.

Context 
Conduct an in-depth analysis of the user’s Meta ad campaigns, focusing on individual campaign performance, trends, and optimization opportunities. When information is limited or unclear, engage the user to gather more context. Follow these steps:
Data Collection and Campaign Identification:
a) Retrieve data for all active and recently concluded campaigns (past 90 days)
b) If data access is limited, ask the user:
“I can only access data for [X] campaigns. Are there specific campaigns you’d like me to focus on?”
c) Identify campaign objectives and types (e.g., conversions, traffic, awareness)
d) If campaign objectives are unclear, ask:
“Could you clarify the primary objectives for your main campaigns? This will help me provide more relevant insights.”
Individual Campaign Performance Analysis:
a) For each campaign, analyze key metrics:
Return on Ad Spend (ROAS)
Cost Per Result (based on campaign objective)
Click-Through Rate (CTR)
Conversion Rate
Total Spend
Impressions
Reach
Frequency
Relevance Score or Quality Ranking
b) Compare each campaign’s performance to:
Account averages
Previous period (e.g., last 30 days vs. 30 days before)
Industry benchmarks
c) Identify performance trends over time
d) If any metrics show unusual patterns, ask:
“I’ve noticed [Metric X] for [Campaign Y] is [unusually high/low]. Has there been any recent change in strategy for this campaign?”
Campaign Structure Evaluation:
a) Analyze the structure of each campaign (ad sets, ads)
b) Assess the alignment between campaign structure and objectives
c) Identify any structural issues that might be impacting performance
d) If the structure seems unusual, ask:
“Can you explain the reasoning behind the structure of [Campaign Z]? I want to ensure I’m interpreting it correctly.”
Audience Analysis by Campaign:
a) Evaluate audience performance within each campaign
b) Identify best and worst-performing audiences
c) Analyze audience overlap between campaigns
d) If audience data is limited, ask:
“I have limited information about your audience targeting. Can you tell me about the target audiences for your key campaigns?”
Creative Performance by Campaign:
a) Analyze performance of different ad formats within each campaign
b) Identify top-performing ad creatives for each campaign
c) Evaluate ad copy effectiveness and messaging themes
d) If creative data is limited, ask:
“Could you describe the main creative approaches you’re using in [Campaign X]? I’d like to provide more accurate insights on creative performance.”
Budget and Bidding Analysis by Campaign:
a) Assess budget utilization and pacing for each campaign
b) Analyze the effectiveness of bidding strategies used
c) Identify opportunities for budget reallocation between campaigns
d) If budget information is unclear, ask:
“Can you provide more details about your budget allocation strategy across campaigns? Are there any spending constraints I should be aware of?”
Placement Performance by Campaign:
a) Evaluate ad performance across different placements for each campaign
b) Identify most and least effective placements per campaign
c) If placement data is limited, ask:
“Are you using specific placement strategies for different campaigns, or primarily automatic placements?”
Campaign-Specific Conversion Funnel Analysis:
a) Analyze the conversion path for each campaign
b) Identify drop-off points in the funnel
c) Compare funnel performance between campaigns
d) If conversion data is incomplete, ask:
“I’m seeing limited conversion data for [Campaign Y]. Can you describe the expected customer journey for this campaign?”
Cross-Campaign Insights:
a) Identify patterns or insights that emerge across multiple campaigns
b) Analyze how different campaigns might be impacting each other
c) Suggest opportunities for synergy between campaigns
Campaign Optimization Recommendations:
a) Provide 3-5 specific optimization recommendations for each campaign
b) Prioritize recommendations based on potential impact
c) Reference AdsNerd’s knowledge base for best practices
d) After presenting recommendations, ask:
“Do these optimization suggestions align with your campaign goals? Are there any you’d like more details on?”
Competitive Analysis (if Ad Library data is available):
a) Compare campaign strategies to visible competitor approaches
b) Identify potential opportunities based on competitor activities
c) If competitor data is unavailable, ask:
“Are you aware of any specific campaign strategies your competitors are using that we should consider?”
Future Campaign Planning:
a) Based on the analysis, suggest ideas for future campaigns
b) Identify underutilized opportunities in the current campaign mix
c) Ask the user:
“Are there any new campaign ideas or objectives you’re considering that we should factor into this analysis?”
Data Visualization:
a) Create clear, comparative charts for campaign performance
b) Visualize trends and patterns across campaigns
c) Ensure visualizations can be easily explained in text format
Summary and Next Steps:
a) Provide a concise summary of overall campaign performance
b) Highlight key areas for improvement across campaigns
c) Outline recommended next steps for campaign optimization
d) Suggest specific AdsNerd modules for deeper dives into areas of concern
e) After presenting the summary, ask:
“Based on this campaign analysis, which areas would you like to focus on improving first?”
Throughout the analysis, maintain a conversational tone and avoid jargon. Be prepared to explain any technical terms or concepts if the user requests clarification. Always specify the source of data, especially when referencing industry benchmarks or competitor insights.
If any critical data is missing or there are limitations in the analysis, clearly communicate this to the user and explain how it might impact the insights provided. Use the questions provided to gather more context and provide the most accurate and helpful analysis possible.
Remember, the goal is to provide the user with a clear, actionable understanding of their individual campaign performance and how campaigns work together, setting the stage for targeted optimizations and improved overall ad account performance.

Question: {question}
SQL Query: {query}
SQL Result: {result}

Answer: """
)

rephrase_answer = answer_prompt | llm | StrOutputParser()

chain = (
    RunnablePassthrough.assign(query=generate_query_chain).assign(
        result=itemgetter("query") | execute_query
     )
     | rephrase_answer
 )

chain.invoke({"question": "Ad with the most clicks?"})

"I apologize for the confusion. It seems there was an error in executing the SQL query. Without the actual query results, I can't provide a specific analysis of the ad with the most clicks. However, I can guide you on how to approach this question and what insights you could gain from such data.\n\nTo properly answer your question about the ad with the most clicks, we would need to successfully run a query that joins the ad_insights table with the ads table, ordering the results by clicks in descending order. This would give us the name of the ad and its click count.\n\nSince we don't have this data, let me provide some general insights and recommendations:\n\n1. Summary of approach:\n   When looking for the ad with the most clicks, we're essentially identifying your top-performing ad in terms of user engagement. This is crucial for understanding what resonates with your audience.\n\n2. Importance of click data:\n   Clicks are a key engagement metric that indicates how compelling your 

In [9]:
examples = [
    {
        "input": "What are the top 5 ads by CTR and their spend in the last 30 days?",
        "accountId": "act_624496083171435",
        "query": """
            SELECT 
                a.name AS ad_name,
                ai.ctr,
                ai.impressions,
                ai.clicks,
                ai.spend,
                a.currency,
                FROM_UNIXTIME(ai.date_start) AS date_start,
                FROM_UNIXTIME(ai.date_stop) AS date_stop
            FROM ads a
            JOIN ad_insights ai ON a.id = ai.ad_id
            WHERE a.account_id = 'act_624496083171435'
                AND ai.date_start >= UNIX_TIMESTAMP(DATE_SUB(CURDATE(), INTERVAL 30 DAY))
            ORDER BY ctr DESC
            LIMIT 5;
        """,
    },
    # {
    #     "input": "Compare the performance of different campaign objectives in terms of spend and conversions",
    #     "accountId": "act_624496083171435",
    #     "query": """
    #         SELECT 
    #             c.objective,
    #             SUM(ci.spend) AS total_spend,
    #             c.currency,
    #             SUM(ca.value) AS total_conversions,
    #             AVG(ci.ctr) AS avg_ctr,
    #             AVG(ci.cpm) AS avg_cpm
    #         FROM campaigns c
    #         JOIN campaign_insights ci ON c.id = ci.campaign_id
    #         LEFT JOIN campaign_actions ca ON ci.id = ca.campaign_insight_id
    #         WHERE c.account_id = 'act_624496083171435' AND ca.action_type='offsite_conversion.fb_pixel_custom'
    #         GROUP BY c.objective, c.currency
    #         ORDER BY total_spend DESC;
    #     """,
    # },
    {
        "input": "What is the daily performance trend of our top-spending campaign in the last week?",
        "accountId": "act_624496083171435",
        "query": """
            WITH top_campaign AS (
                SELECT campaign_id
                FROM campaign_insights
                WHERE date_start >= UNIX_TIMESTAMP(DATE_SUB(CURDATE(), INTERVAL 7 DAY))
                GROUP BY campaign_id
                ORDER BY SUM(spend) DESC
                LIMIT 1
            )
            SELECT 
                c.name AS campaign_name,
                FROM_UNIXTIME(ci.date_start) AS date_start,
                ci.impressions,
                ci.clicks,
                ci.spend,
                c.currency,
                ci.ctr,
                ci.cpm,
                ca.value AS conversions
            FROM campaigns c
            JOIN campaign_insights ci ON c.id = ci.campaign_id
            LEFT JOIN campaign_actions ca ON ci.id = ca.campaign_insight_id
            WHERE c.id = (SELECT campaign_id FROM top_campaign)
                AND c.account_id = 'act_624496083171435'
                AND ci.date_start >= UNIX_TIMESTAMP(DATE_SUB(CURDATE(), INTERVAL 7 DAY))
            ORDER BY ci.date_start;
        """,
    }
    # {
    #     "input": "Which ad creative type has the highest conversion rate and what's the total spend for each?",
    #     "accountId": "act_624496083171435",
    #     "query": """
    #         SELECT 
    #             ac.call_to_action_type,
    #             SUM(ai.clicks) AS total_clicks,
    #             SUM(ca.value) AS total_conversions,
    #             SUM(ca.value) / NULLIF(SUM(ai.clicks), 0) * 100 AS conversion_rate,  // Avoid division by zero
    #             SUM(ai.spend) AS total_spend,
    #             a.currency
    #         FROM ad_creatives ac
    #         JOIN ads a ON ac.ad_id = a.id
    #         JOIN ad_insights ai ON a.id = ai.ad_id
    #         JOIN campaigns c ON a.campaign_id = c.id
    #         LEFT JOIN campaign_insights ci ON c.id = ci.campaign_id
    #         LEFT JOIN campaign_actions ca ON ci.id = ca.campaign_insight_id
    #         WHERE c.account_id = 'act_624496083171435'
    #         GROUP BY ac.call_to_action_type, a.currency
    #         ORDER BY conversion_rate DESC;
    #     """,
    # },
]

In [10]:
from langchain_core.prompts import ChatPromptTemplate,MessagesPlaceholder,FewShotChatMessagePromptTemplate,PromptTemplate

example_prompt = ChatPromptTemplate.from_messages(
    [
        ("human", "{input}\nAccount ID: {accountId}\nSQL Query:"),
        ("ai", "{query}")
    ]
)

few_shot_prompt=FewShotChatMessagePromptTemplate(
    example_prompt=example_prompt,
    examples=examples,
    input_variables=["input"]
)

print(few_shot_prompt.invoke({"input":"Which ads has the highest CTR?"}))

messages=[HumanMessage(content='What are the top 5 ads by CTR and their spend in the last 30 days?\nAccount ID: act_624496083171435\nSQL Query:'), AIMessage(content="\n            SELECT \n                a.name AS ad_name,\n                ai.ctr,\n                ai.impressions,\n                ai.clicks,\n                ai.spend,\n                a.currency,\n                FROM_UNIXTIME(ai.date_start) AS date_start,\n                FROM_UNIXTIME(ai.date_stop) AS date_stop\n            FROM ads a\n            JOIN ad_insights ai ON a.id = ai.ad_id\n            WHERE a.account_id = 'act_624496083171435'\n                AND ai.date_start >= UNIX_TIMESTAMP(DATE_SUB(CURDATE(), INTERVAL 30 DAY))\n            ORDER BY ctr DESC\n            LIMIT 5;\n        "), HumanMessage(content='What is the daily performance trend of our top-spending campaign in the last week?\nAccount ID: act_624496083171435\nSQL Query:'), AIMessage(content="\n            WITH top_campaign AS (\n                

In [11]:
from langchain_community.vectorstores import Chroma
from langchain_core.example_selectors import SemanticSimilarityExampleSelector
from langchain_openai import OpenAIEmbeddings

vectorstore = Chroma()
vectorstore.delete_collection()
example_selector = SemanticSimilarityExampleSelector.from_examples(
     examples,
     OpenAIEmbeddings(),
     vectorstore,
     k=2,
     input_keys=["input"],
 )
example_selector.select_examples({"input": "how many ads we have?"})
few_shot_prompt = FewShotChatMessagePromptTemplate(
     example_prompt=example_prompt,
     example_selector=example_selector,
     input_variables=["input","top_k"],
 )
print(few_shot_prompt.format(input="How many ads are there?"))

  warn_deprecated(


Human: What are the top 5 ads by CTR and their spend in the last 30 days?
Account ID: act_624496083171435
SQL Query:
AI: 
            SELECT 
                a.name AS ad_name,
                ai.ctr,
                ai.impressions,
                ai.clicks,
                ai.spend,
                a.currency,
                FROM_UNIXTIME(ai.date_start) AS date_start,
                FROM_UNIXTIME(ai.date_stop) AS date_stop
            FROM ads a
            JOIN ad_insights ai ON a.id = ai.ad_id
            WHERE a.account_id = 'act_624496083171435'
                AND ai.date_start >= UNIX_TIMESTAMP(DATE_SUB(CURDATE(), INTERVAL 30 DAY))
            ORDER BY ctr DESC
            LIMIT 5;
        
Human: What is the daily performance trend of our top-spending campaign in the last week?
Account ID: act_624496083171435
SQL Query:
AI: 
            WITH top_campaign AS (
                SELECT campaign_id
                FROM campaign_insights
                WHERE date_start >= UNIX_TI

In [12]:
with open("additional_knowledge.txt", "r") as file:
    additional_knowledge = file.read()

final_prompt = ChatPromptTemplate.from_messages(
    [

        (

            "system",

            """You are a MySQL expert.Given an input question and an account ID, create a syntactically correct MySQL query to run. Always include the account_id in your WHERE clause for filtering.




Here is the relevant table info: {table_info}

Additional knowledge for query generation:
{additional_knowledge}


Important notes:



1. Always consider the currency when dealing with monetary values. The currency is stored in the 'currency' column of the 'ads' and 'campaigns' tables.
2. When comparing or aggregating monetary values, ensure they are in the same currency or use appropriate conversion rates.
3. Include the currency in your SELECT statement when querying monetary values.
Below are a number of examples of questions, account IDs, and their corresponding SQL queries.""",
        ),
        few_shot_prompt,

        ("human", "{input}\nAccount ID: {accountId}"),
    ]

)
generate_query = create_sql_query_chain(llm, db, final_prompt)
# Modify the process_question function
def process_question(question, account_id):
    generated_query = generate_query.invoke(
        {
            "question": question,
            "accountId": account_id,
            "table_info": db.table_info,
            "additional_knowledge": additional_knowledge,
        }
    )
    clean_query = (
        generated_query.strip().replace("```sql", "").replace("```", "").strip()
    )

    try:
        result = execute_query.invoke(clean_query)
        answer = rephrase_answer.invoke(
            {
                "question": question,
                "query": clean_query,
                "result": result,
            }
        )
        return clean_query, answer
    except Exception as e:
        return clean_query, f"An error occurred: {str(e)}"


# Example usage
if __name__ == "__main__":
    question = "Which campaign has the most impressions?"
    account_id = "act_624496083171435"

    query, answer = process_question(question, account_id)
    print("Generated SQL query:")
    print(query)
    print("\nInterpreted answer:")
    print(answer)

Generated SQL query:
To find the campaign with the most impressions for the given account, we can use the following SQL query:


SELECT 
    c.campaign_key,
    c.name AS campaign_name,
    SUM(ci.impressions) AS total_impressions
FROM defaultdb.campaigns c
JOIN defaultdb.campaign_insights ci ON c.campaign_key = ci.campaign_key
WHERE c.account_id = 'act_624496083171435'
GROUP BY c.campaign_key, c.name
ORDER BY total_impressions DESC
LIMIT 1;


This query does the following:

1. Joins the `campaigns` table with the `campaign_insights` table to get impression data.
2. Filters for the specific account ID.
3. Groups the results by campaign to sum up all impressions for each campaign.
4. Orders the results by total impressions in descending order.
5. Limits the result to 1 to get only the top campaign.

This will give you the campaign with the highest total impressions for the specified account.

Interpreted answer:
I apologize for the confusion. It appears there was an error in executing t