In [None]:
import os
os.environ["OPENAI_API_KEY"] = ""

In [3]:
from openai import OpenAI
client = OpenAI()

Load processed dataset

In [4]:
import pandas as pd

df = pd.read_csv("../data/adobe_style_campaign_data.csv")

metrics = (
    df.groupby("campaign_name")
    .agg({
        "impressions": "sum",
        "clicks": "sum",
        "conversions": "sum",
        "revenue": "sum",
        "spend": "sum",
    })
    .reset_index()
)

# Derived correctly-weighted metrics
metrics["ctr"] = metrics["clicks"] / metrics["impressions"]
metrics["cvr"] = metrics["conversions"] / metrics["clicks"]
metrics["roas"] = metrics["revenue"] / metrics["spend"]
metrics.head()


Unnamed: 0,campaign_name,impressions,clicks,conversions,revenue,spend,ctr,cvr,roas
0,Campaign 1,34689697,1551423,266259,18788660.0,1801393.0,0.044723,0.171622,10.430071
1,Campaign 10,39508953,1771647,311139,21726070.0,2036928.0,0.044842,0.175621,10.666098
2,Campaign 2,35079079,1575930,275353,19350950.0,1803419.0,0.044925,0.174724,10.730148
3,Campaign 3,35284468,1565588,278841,19794190.0,1795664.0,0.04437,0.178106,11.023328
4,Campaign 4,33219928,1503726,261574,18534530.0,1723585.0,0.045266,0.173951,10.753471


Preepare metrics for LLM input

In [5]:
records = metrics.to_dict(orient="records")
records[0]


{'campaign_name': 'Campaign 1',
 'impressions': 34689697,
 'clicks': 1551423,
 'conversions': 266259,
 'revenue': 18788658.27321354,
 'spend': 1801393.1107573558,
 'ctr': 0.04472287549816304,
 'cvr': 0.17162243952809775,
 'roas': 10.430071127181265}

Build prompt templates

In [6]:
def build_summary_prompt(metrics):
    return f"""
You are an analytics assistant.

Given the campaign performance metrics below, generate 4-6 clear insights.
Discuss trends, funnel issues, anomalies, and recommended optimizations.

Metrics:
{metrics}
"""


In [7]:
def build_funnel_prompt(metrics):
    return f"""
Analyze the funnel for this campaign. Identify the largest drop-offs and explain why they might happen.

Use the data below:
{metrics}
"""


In [8]:
def build_anomaly_prompt(metrics, anomaly):
    return f"""
Anomaly detected: {anomaly}

Explain:
1. What probably caused it  
2. How it impacts performance  
3. What actions should be taken next  

Metrics:
{metrics}
"""


Generate insights with LLM

In [9]:
pip install openai

Note: you may need to restart the kernel to use updated packages.


In [13]:
# Call OpenAI LLM

from openai import OpenAI
client = OpenAI()

def generate_insight(metric_row):
    prompt = build_summary_prompt(metric_row)
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[{"role": "user", "content": prompt}]
    )
    return response.choices[0].message.content


Apply insights to every campaign

In [14]:
metrics["llm_insight"] = metrics.apply(lambda row: generate_insight(row.to_dict()), axis=1)
metrics.head()

Unnamed: 0,campaign_name,impressions,clicks,conversions,revenue,spend,ctr,cvr,roas,llm_insight
0,Campaign 1,34689697,1551423,266259,18788660.0,1801393.0,0.044723,0.171622,10.430071,Based on the performance metrics provided for ...
1,Campaign 10,39508953,1771647,311139,21726070.0,2036928.0,0.044842,0.175621,10.666098,Based on the provided campaign performance met...
2,Campaign 2,35079079,1575930,275353,19350950.0,1803419.0,0.044925,0.174724,10.730148,Based on the provided campaign performance met...
3,Campaign 3,35284468,1565588,278841,19794190.0,1795664.0,0.04437,0.178106,11.023328,Based on the provided performance metrics for ...
4,Campaign 4,33219928,1503726,261574,18534530.0,1723585.0,0.045266,0.173951,10.753471,Here are several insights and observations der...


In [16]:
print(metrics.loc[0, "llm_insight"])

Based on the performance metrics provided for **Campaign 1**, here are 4-6 insights, along with discussions on trends, funnel issues, anomalies, and recommended optimizations:

### 1. Strong Return on Ad Spend (ROAS)
- **Insight:** The campaign delivered a **ROAS of 10.43**, indicating for every dollar spent, the campaign generated over $10 in revenue.
- **Recommendation:** Given the high ROAS, it is advisable to consider scaling this campaign. Increasing the budget could help capture more market share and drive additional revenue while maintaining efficiency.

### 2. High Click-Through Rate (CTR) but Moderate Conversion Rate (CVR)
- **Insight:** The **CTR of 4.47%** suggests that the campaign's creative and targeting effectively attract user interest and drive clicks. However, the **CVR of 17.16%** indicates that while many users click on the ad, a relatively smaller proportion of those are converting to sales.
- **Recommendation:** Investigate the post-click experience. This may invo

Evaluate Hallucinations

In [17]:
def check_alignment(text, metrics):
    issues = []
    for key, value in metrics.items():
        if isinstance(value, (int, float)) and str(value)[:4] not in text:
            issues.append(key)
    return issues

metrics["quality_check"] = metrics.apply(lambda row: check_alignment(row["llm_insight"], row.to_dict()), axis=1)


In [18]:
metrics[["campaign_name", "llm_insight", "quality_check"]].head()

Unnamed: 0,campaign_name,llm_insight,quality_check
0,Campaign 1,Based on the performance metrics provided for ...,"[impressions, clicks, conversions, revenue, sp..."
1,Campaign 10,Based on the provided campaign performance met...,"[impressions, clicks, conversions, revenue, sp..."
2,Campaign 2,Based on the provided campaign performance met...,"[impressions, clicks, conversions, revenue, sp..."
3,Campaign 3,Based on the provided performance metrics for ...,"[impressions, clicks, conversions, revenue, sp..."
4,Campaign 4,Here are several insights and observations der...,"[impressions, clicks, conversions, revenue, sp..."


In [19]:
print(metrics.loc[0, "quality_check"])

['impressions', 'clicks', 'conversions', 'revenue', 'spend', 'ctr', 'cvr']
