Database Credentials


In [1]:
import os

from dotenv import load_dotenv

load_dotenv()

True

In [2]:
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

In [3]:
import os
from dotenv import load_dotenv
from langchain_community.utilities.sql_database import SQLDatabase

# Load environment variables
load_dotenv()

# Get database credentials from environment variables
db_user = os.getenv("DB_USER")
db_password = os.getenv("DB_PASSWORD")
db_host = os.getenv("DB_HOST")
db_port = os.getenv("DB_PORT")
db_name = os.getenv("DB_NAME")

# Create database connection
db = SQLDatabase.from_uri(f"mysql+pymysql://{db_user}:{db_password}@{db_host}:{db_port}/{db_name}")
print(db.dialect)
print(db.get_usable_table_names())
print(db.table_info)

mysql
['ad_creatives', 'ad_insights', 'ad_tracking_specs', 'ads', 'campaign_actions', 'campaign_conversions', 'campaign_insights', 'campaigns']

CREATE TABLE ad_creatives (
	id BIGINT NOT NULL, 
	name VARCHAR(255), 
	title VARCHAR(255), 
	body TEXT, 
	thumbnail_url TEXT, 
	video_id BIGINT, 
	call_to_action_type VARCHAR(50), 
	page_id VARCHAR(50), 
	PRIMARY KEY (id)
)ENGINE=InnoDB COLLATE utf8mb4_0900_ai_ci DEFAULT CHARSET=utf8mb4

/*
3 rows from ad_creatives table:
id	name	title	body	thumbnail_url	video_id	call_to_action_type	page_id
120213321698260311	Be the Monk in Marketing 2024-05-22-06a6e70c8d6b902539eaa0f241f2e1c4	Be the Monk in Marketing	🚀 Elevate your game! Dentsu & Publicis insiders swear by us. Unlock 🔓 top case studies for free.	https://external.fmaa1-2.fna.fbcdn.net/emg1/v/t13/6227554188890667404?url=https%3A%2F%2Fwww.facebook	None	SUBSCRIBE	125267114004054
120213858232380311	{{product.name}} 2024-06-10-e966e3395993c4a04c6f1e2f176eb43f	None	None	https://scontent.fmaa1-2.fna

In [4]:
from langchain.chains import create_sql_query_chain
from langchain_openai import ChatOpenAI

llm=ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
generate_query_chain=create_sql_query_chain(llm, db)

query=generate_query_chain.invoke({"question":"which campaign has the most clicks?"})

print(query)


SELECT campaigns.id, campaigns.name, SUM(campaign_insights.clicks) AS total_clicks
FROM campaigns
JOIN campaign_insights ON campaigns.id = campaign_insights.campaign_id
GROUP BY campaigns.id
ORDER BY total_clicks DESC
LIMIT 1;


In [5]:
from langchain_community.tools.sql_database.tool import QuerySQLDataBaseTool
execute_query = QuerySQLDataBaseTool(db=db)
execute_query.invoke(query)


"[(120214454200140311, 'MM_Motion(B)_VideoAd_07072024', Decimal('2122'))]"

In [6]:
chain= generate_query_chain | execute_query
chain.invoke({"question":"Which campaign has the most clicks?"})

"[(120214454200140311, 'MM_Motion(B)_VideoAd_07072024', Decimal('2122'))]"

In [7]:
chain.get_prompts()[0].pretty_print()

You are a MySQL expert. Given an input question, first create a syntactically correct MySQL query to run, then look at the results of the query and return the answer to the input question.
Unless the user specifies in the question a specific number of examples to obtain, query for at most 5 results using the LIMIT clause as per MySQL. You can order the results to return the most informative data in the database.
Never query for all columns from a table. You must query only the columns that are needed to answer the question. Wrap each column name in backticks (`) to denote them as delimited identifiers.
Pay attention to use only the column names you can see in the tables below. Be careful to not query for columns that do not exist. Also, pay attention to which column is in which table.
Pay attention to use CURDATE() function to get the current date, if the question involves "today".

Use the following format:

Question: Question here
SQLQuery: SQL Query to run
SQLResult: Result of the S

In [28]:
from operator import itemgetter

from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough

answer_prompt = PromptTemplate.from_template(
     """Given the following user question, corresponding SQL query, and SQL result, provide a comprehensive answer following the guidelines below:

    You are an expert Meta ads analyst and SQL query specialist. Your task is to interpret user questions about Meta ad performance, generate appropriate SQL queries, and provide comprehensive, data-driven answers.

Follow these steps:
1. Analyze the user's question to understand the core topic and intent.
2. Interpret the query results and provide a clear, actionable answer.
3. Include specific metrics, comparative analysis, and performance insights when possible.
4. Offer at least two actionable recommendations based on the data.
5. Suggest relevant follow-up analyses or questions.

Remember to:
- Use only the data available in the query results.
- Clearly state any assumptions or limitations in your analysis.
- Maintain a professional yet conversational tone.
- Prioritize accuracy, relevance, and actionable insights.

 Question: {question}
 SQL Query: {query}
 SQL Result: {result}
 Answer: """
 )

rephrase_answer = answer_prompt | llm | StrOutputParser()

chain = (
    RunnablePassthrough.assign(query=generate_query_chain).assign(
        result=itemgetter("query") | execute_query
     )
     | rephrase_answer
 )

chain.invoke({"question": "Ad with the most clicks?"})








"1. **Core Topic and Intent**: The user is interested in identifying the ad that has received the most clicks.\n\n2. **Interpretation and Actionable Answer**: The SQL query attempted to retrieve the ad with the highest number of clicks by joining the `ads` and `ad_insights` tables and ordering the results by the `clicks` column in descending order. However, there seems to be a syntax error in the query that needs to be corrected to execute successfully.\n\n3. **Insights and Recommendations**:\n   - Without the actual query execution, we can't provide the specific ad with the most clicks. However, once the syntax error is fixed, running the corrected query will reveal the ad that has garnered the highest number of clicks.\n   - To further analyze the performance of this top-clicked ad, you can look into metrics like click-through rate (CTR), conversion rate, and cost per click (CPC) to gain a more comprehensive understanding of its effectiveness.\n\n4. **Actionable Recommendations**:\n 

In [9]:
examples = [
    {
        "input": "Which campaign has the highest number of clicks?",
        "query": """
            SELECT c.id, c.name, SUM(ci.clicks) as total_clicks
            FROM campaigns c
            JOIN campaign_insights ci ON c.id = ci.campaign_id
            GROUP BY c.id, c.name
            ORDER BY total_clicks DESC
            LIMIT 1
        """
    },
    {
        "input": "What is the average CTR for video ads in the last 30 days?",
        "query": """
            SELECT AVG(ai.ctr) as avg_ctr
            FROM ad_insights ai
            JOIN ads a ON ai.ad_id = a.id
            JOIN ad_creatives ac ON a.id = ac.id
            WHERE ac.video_id IS NOT NULL
              AND ai.date_start >= DATE_SUB(CURDATE(), INTERVAL 30 DAY)
        """
    },
    {
        "input": "List the top 5 campaigns by conversion rate for 'purchase' actions.",
        "query": """
            SELECT c.name, 
                   SUM(cc.value) as total_purchases,
                   SUM(ci.clicks) as total_clicks,
                   (SUM(cc.value) / SUM(ci.clicks)) * 100 as conversion_rate
            FROM campaigns c
            JOIN campaign_insights ci ON c.id = ci.campaign_id
            JOIN campaign_conversions cc ON ci.id = cc.campaign_insight_id
            WHERE cc.action_type = 'purchase'
            GROUP BY c.id, c.name
            ORDER BY conversion_rate DESC
            LIMIT 5
        """
    },
    {
        "input": "What is the total spend and ROI for each campaign objective in the last quarter?",
        "query": """
            SELECT c.objective,
                   SUM(ci.spend) as total_spend,
                   SUM(cc.value) as total_conversions,
                   (SUM(cc.value) / SUM(ci.spend)) as ROI
            FROM campaigns c
            JOIN campaign_insights ci ON c.id = ci.campaign_id
            LEFT JOIN campaign_conversions cc ON ci.id = cc.campaign_insight_id
            WHERE ci.date_start >= DATE_SUB(CURDATE(), INTERVAL 3 MONTH)
            GROUP BY c.objective
            ORDER BY ROI DESC
        """
    },
    {
        "input": "Which ad creative has the highest CTR for each campaign?",
        "query": """
            WITH ranked_creatives AS (
                SELECT c.id as campaign_id,
                       c.name as campaign_name,
                       ac.id as creative_id,
                       ac.name as creative_name,
                       ai.ctr,
                       ROW_NUMBER() OVER (PARTITION BY c.id ORDER BY ai.ctr DESC) as rank
                FROM campaigns c
                JOIN ads a ON c.id = a.campaign_id
                JOIN ad_creatives ac ON a.id = ac.id
                JOIN ad_insights ai ON a.id = ai.ad_id
            )
            SELECT campaign_id, campaign_name, creative_id, creative_name, ctr
            FROM ranked_creatives
            WHERE rank = 1
            ORDER BY ctr DESC
        """
    }
]

In [10]:
from langchain_core.prompts import ChatPromptTemplate,MessagesPlaceholder,FewShotChatMessagePromptTemplate,PromptTemplate

example_prompt=ChatPromptTemplate.from_messages(
    [
        ("human", "{input}\nSQL Query:"),
        ("ai", "{query}")
    ]
)

few_shot_prompt=FewShotChatMessagePromptTemplate(
    example_prompt=example_prompt,
    examples=examples,
    input_variables=["input"]
)

print(few_shot_prompt.invoke({"input":"Which ads has the highest CTR?"}))





messages=[HumanMessage(content='Which campaign has the highest number of clicks?\nSQL Query:'), AIMessage(content='\n            SELECT c.id, c.name, SUM(ci.clicks) as total_clicks\n            FROM campaigns c\n            JOIN campaign_insights ci ON c.id = ci.campaign_id\n            GROUP BY c.id, c.name\n            ORDER BY total_clicks DESC\n            LIMIT 1\n        '), HumanMessage(content='What is the average CTR for video ads in the last 30 days?\nSQL Query:'), AIMessage(content='\n            SELECT AVG(ai.ctr) as avg_ctr\n            FROM ad_insights ai\n            JOIN ads a ON ai.ad_id = a.id\n            JOIN ad_creatives ac ON a.id = ac.id\n            WHERE ac.video_id IS NOT NULL\n              AND ai.date_start >= DATE_SUB(CURDATE(), INTERVAL 30 DAY)\n        '), HumanMessage(content="List the top 5 campaigns by conversion rate for 'purchase' actions.\nSQL Query:"), AIMessage(content="\n            SELECT c.name, \n                   SUM(cc.value) as total_purch

In [13]:
 from langchain_community.vectorstores import Chroma
 from langchain_core.example_selectors import SemanticSimilarityExampleSelector
 from langchain_openai import OpenAIEmbeddings

 vectorstore = Chroma()
 vectorstore.delete_collection()
 example_selector = SemanticSimilarityExampleSelector.from_examples(
     examples,
     OpenAIEmbeddings(),
     vectorstore,
     k=2,
     input_keys=["input"],
 )
 example_selector.select_examples({"input": "how many ads we have?"})
 few_shot_prompt = FewShotChatMessagePromptTemplate(
     example_prompt=example_prompt,
     example_selector=example_selector,
     input_variables=["input","top_k"],
 )
 print(few_shot_prompt.format(input="How many ads are there?"))



Human: Which ad creative has the highest CTR for each campaign?
SQL Query:
AI: 
            WITH ranked_creatives AS (
                SELECT c.id as campaign_id,
                       c.name as campaign_name,
                       ac.id as creative_id,
                       ac.name as creative_name,
                       ai.ctr,
                       ROW_NUMBER() OVER (PARTITION BY c.id ORDER BY ai.ctr DESC) as rank
                FROM campaigns c
                JOIN ads a ON c.id = a.campaign_id
                JOIN ad_creatives ac ON a.id = ac.id
                JOIN ad_insights ai ON a.id = ai.ad_id
            )
            SELECT campaign_id, campaign_name, creative_id, creative_name, ctr
            FROM ranked_creatives
            WHERE rank = 1
            ORDER BY ctr DESC
        
Human: What is the average CTR for video ads in the last 30 days?
SQL Query:
AI: 
            SELECT AVG(ai.ctr) as avg_ctr
            FROM ad_insights ai
            JOIN ads a ON ai.ad_i

In [29]:
 final_prompt = ChatPromptTemplate.from_messages(
     [
         ("system", "You are a MySQL expert. Given an input question, create a syntactically correct MySQL query to run. Unless otherwise specificed.\n\nHere is the relevant table info: {table_info}\n\nBelow are a number of examples of questions and their corresponding SQL queries."),
         few_shot_prompt,
         ("human", "{input}"),
     ]
 )
 generate_query = create_sql_query_chain(llm, db,final_prompt)
 chain = (
 RunnablePassthrough.assign(query=generate_query).assign(
     result=itemgetter("query") | execute_query
 )
 | rephrase_answer
 )
 chain.invoke({"question": "Best campaign based on the CTR"})



"1. **Core Topic and Intent**: The user is seeking to identify the best campaign based on Click-Through Rate (CTR) performance.\n\n2. **Interpretation and Actionable Answer**: The SQL query calculates the average CTR for each campaign and sorts them in descending order to find the campaign with the highest average CTR. The result shows that the campaign with ID 120214454200140311 and name 'MM_Motion(B)_VideoAd_07072024' has the highest average CTR of 1.2348770000.\n\n3. **Insights and Analysis**:\n   - The campaign 'MM_Motion(B)_VideoAd_07072024' stands out with a significantly higher CTR compared to other campaigns.\n   - A high CTR indicates that this campaign is effectively engaging users and driving them to click on the ad, which can lead to increased conversions or website visits.\n\n4. **Actionable Recommendations**:\n   - **Optimize Other Campaigns**: Analyze the creatives, targeting, and messaging of other campaigns to identify elements that can be improved to boost CTR.\n   - 