In [1]:
!pip install git+https://github.com/openai/swarm.git
!pip install openai
!pip install firecrawl-py
!pip install fireducks

Collecting git+https://github.com/openai/swarm.git
  Cloning https://github.com/openai/swarm.git to /tmp/pip-req-build-osjfkwv1
  Running command git clone --filter=blob:none --quiet https://github.com/openai/swarm.git /tmp/pip-req-build-osjfkwv1
  Resolved https://github.com/openai/swarm.git to commit 9db581cecaacea0d46a933d6453c312b034dbf47
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone


In [2]:
import os
from firecrawl import FirecrawlApp
from swarm import Agent, Swarm
from openai import OpenAI
import fireducks.pandas as pd
import numpy as np



In [None]:
api = OpenAI(api_key="")

In [4]:
client = Swarm(api)

In [5]:
data = pd.read_csv('https://raw.githubusercontent.com/mysycry/AI_First_Activities/refs/heads/main/AI_First_Day_6/ai%20first%20sales%20data%20-%20sales.csv')

In [6]:
# Dataset as provided
data = {
    "date": ["2020-05-11", "2020-05-11", "2020-05-11"],
    "source": ["google", "facebook", "google"],
    "medium": ["organic", "cpc", "cpc"],
    "delivery_available": ["no data", "no data", "no data"],
    "device_type": ["PC", "mobile", "mobile"],
    "promo_activated": ["no", "yes", "no"],
    "filter_used": ["no", "no", "no"],
    "pageviews": [4087, 4326, 3891],
    "visits": [1233, 544, 1450],
    "productClick": [5240, 9930, 5460],
    "addToCart": [1048, 1984, 1090],
    "checkout": [672, 1812.48, 766.72],
    "transactions": [90, 217, 100],
    "revenue": ["₱456,877", "₱1,289,066", "₱554,427"],
    "ad spend": ["₱384,039", "₱817,514", "₱435,105"]
}

# Convert to DataFrame
df = pd.DataFrame(data)

# Remove currency symbols and commas, and convert to numeric
df['revenue'] = df['revenue'].replace({'₱': '', ',': ''}, regex=True).astype(float)
df['ad spend'] = df['ad spend'].replace({'₱': '', ',': ''}, regex=True).astype(float)

# Convert columns to appropriate datatypes
df['date'] = pd.to_datetime(df['date'])
df['pageviews'] = df['pageviews'].astype(int)
df['visits'] = df['visits'].astype(int)
df['productClick'] = df['productClick'].astype(int)
df['addToCart'] = df['addToCart'].astype(int)
df['checkout'] = df['checkout'].astype(float)
df['transactions'] = df['transactions'].astype(int)

# Remove any rows with missing data
df = df.dropna()

# Display cleaned data
print(df.head())


        date    source   medium delivery_available device_type  \
0 2020-05-11    google  organic            no data          PC   
1 2020-05-11  facebook      cpc            no data      mobile   
2 2020-05-11    google      cpc            no data      mobile   

  promo_activated filter_used  pageviews  visits  productClick  addToCart  \
0              no          no       4087    1233          5240       1048   
1             yes          no       4326     544          9930       1984   
2              no          no       3891    1450          5460       1090   

   checkout  transactions    revenue  ad spend  
0    672.00            90   456877.0  384039.0  
1   1812.48           217  1289066.0  817514.0  
2    766.72           100   554427.0  435105.0  


# *DEFINE AGENT FUNCTIONS*


In [8]:
def detect_anomalies():
    # Check for missing data
    missing_data = df.isnull().sum()

    # Identify any obvious anomalies in numeric columns (outliers, etc.)
    anomalies = {
        "missing_data": missing_data,
        "outliers": df.describe()  # Detect outliers using basic statistics
    }

    return anomalies


def analyze_marketing_performance():
    # Calculate conversion rates
    df['conversion_rate'] = df['transactions'] / df['visits']

    # Calculate cost per transaction and revenue per spend
    df['cost_per_transaction'] = df['ad spend'] / (df['transactions'] + 1)
    df['revenue_per_spend'] = df['revenue'] / (df['ad spend'] + 1)

    # Aggregate data by source for overall performance analysis
    grouped = df.groupby('source').agg({
        'ad spend': 'sum',
        'transactions': 'sum',
        'revenue': 'sum',
        'conversion_rate': 'mean',
        'cost_per_transaction': 'mean',
        'revenue_per_spend': 'mean'
    })

    return grouped.sort_values('revenue_per_spend', ascending=False)


def analyze_customer_journey():
    # Calculate drop-offs at each stage in the customer journey
    df['click_to_cart'] = df['addToCart'] / df['productClick']
    df['cart_to_checkout'] = df['checkout'] / df['addToCart']
    df['checkout_to_transaction'] = df['transactions'] / df['checkout']

    # Summarize by source for the customer journey
    journey_stats = df.groupby('source').agg({
        'click_to_cart': 'mean',
        'cart_to_checkout': 'mean',
        'checkout_to_transaction': 'mean'
    })

    return journey_stats


def analyze_revenue_intelligence():
    # Revenue per visit and per pageview
    df['revenue_per_visit'] = df['revenue'] / df['visits']
    df['revenue_per_pageview'] = df['revenue'] / df['pageviews']

    # Identify top sources by revenue and revenue per spend
    grouped = df.groupby('source').agg({
        'revenue': 'sum',
        'revenue_per_visit': 'mean',
        'revenue_per_pageview': 'mean'
    })

    return grouped.sort_values('revenue', ascending=False)

*AGENTS*

In [9]:
data_detective_agent = Agent(
    name="Data Detective Agent",
    model="gpt-4o-mini",
    instructions="You are a data detective agent specialized in identifying anomalies, missing data, and issues within datasets.",
    functions=[detect_anomalies]
)


In [10]:
marketing_performance_agent = Agent(
    name="Marketing Performance Agent",
    model="gpt-4o-mini",
    instructions="You are a marketing performance agent specialized in analyzing key marketing metrics such as conversion rates, cost per transaction, and revenue per spend.",
    functions=[analyze_marketing_performance]
)

In [11]:
customer_journey_agent = Agent(
    name="Customer Journey Agent",
    model="gpt-4o-mini",
    instructions="You are a customer journey agent, analyzing how customers interact with the website, from pageviews to product clicks, adds to cart, and transactions.",
    functions=[analyze_customer_journey]
)


In [12]:
revenue_intelligence_agent = Agent(
    name="Revenue Intelligence Agent",
    model="gpt-4o-mini",
    instructions="You are a revenue intelligence agent, specialized in analyzing revenue generation and identifying high-performing marketing sources.",
    functions=[analyze_revenue_intelligence]
)




*OUTPUT*

In [13]:
# Run the Data Detective Agent
data_detective_response = client.run(
    agent=data_detective_agent,
    messages=[{"role": "user", "content": "Identify any data issues or anomalies."}]
)
print("\nData Detective Agent Results:")
print(data_detective_response.messages[-1]["content"])


Data Detective Agent Results:
The analysis of the dataset reveals the following data issues and anomalies:

### Missing Data
There are no missing values in the dataset across all columns:
- date: 0 missing
- source: 0 missing
- medium: 0 missing
- delivery_available: 0 missing
- device_type: 0 missing
- promo_activated: 0 missing
- filter_used: 0 missing
- pageviews: 0 missing
- visits: 0 missing
- productClick: 0 missing
- addToCart: 0 missing
- checkout: 0 missing
- transactions: 0 missing
- revenue: 0 missing
- ad spend: 0 missing

### Outliers
There are possible outliers within the dataset as indicated by standard deviation and mean calculations for various metrics. Here are the details for some of the columns:

1. **Pageviews**
   - Mean: 4101.33
   - Standard Deviation: 217.85
   - Range: Min 3891 - Max 4326

2. **Visits**
   - Mean: 1075.67
   - Standard Deviation: 473.05
   - Range: Min 544 - Max 1450

3. **Product Clicks**
   - Mean: 6876.67
   - Standard Deviation: 2646.55
 

In [14]:
# Run the Marketing Performance Agent
marketing_performance_response = client.run(
    agent=marketing_performance_agent,
    messages=[{"role": "user", "content": "Analyze the marketing performance and key metrics."}]
)
print("\nMarketing Performance Agent Results:")
print(marketing_performance_response.messages[-1]["content"])




Marketing Performance Agent Results:
Here's the analysis of the marketing performance and key metrics for the two platforms:

### Facebook
- **Ad Spend**: $817,514
- **Transactions**: 217
- **Revenue**: $1,289,066
- **Conversion Rate**: 39.89%
- **Cost per Transaction**: $3,750.06
- **Revenue per Spend**: $1.58

### Google
- **Ad Spend**: $819,144
- **Transactions**: 190
- **Revenue**: $1,011,304
- **Conversion Rate**: 7.10%
- **Cost per Transaction**: $4,264.09
- **Revenue per Spend**: $1.23

### Summary
- **Facebook** has a significantly higher conversion rate and lower cost per transaction compared to Google, making it a more effective platform for this period.
- While both platforms have similar ad spending, Facebook generated more revenue and transactions, yielding a better revenue per spend ratio.


In [15]:
# Run the Customer Journey Agent
customer_journey_response = client.run(
    agent=customer_journey_agent,
    messages=[{"role": "user", "content": "Analyze the customer journey and drop-off points."}]
)
print("\nCustomer Journey Agent Results:")
print(customer_journey_response.messages[-1]["content"])




Customer Journey Agent Results:
The analysis of the customer journey reveals the following conversion rates and drop-off points:

1. **Click to Cart**:
   - **Facebook**: 19.98%
   - **Google**: 19.98%

2. **Cart to Checkout**:
   - **Facebook**: 91.35%
   - **Google**: 67.23%

3. **Checkout to Transaction**:
   - **Facebook**: 11.97%
   - **Google**: 13.22%

### Drop-off Points:
- The most significant drop-off occurs from **Checkout to Transaction**:
  - Only about **12%** of customers from Facebook and **13%** from Google complete the purchase after reaching the checkout phase.
  
- While the transition from **Cart to Checkout** is high for Facebook users (91.35%), it drops significantly at the final checkout step.

### Summary:
- The customer journey shows that while users are interested and go as far as adding products to their carts, a considerable number do not complete the transaction, especially after reaching the checkout stage. 
- Improving the checkout experience and addres

In [16]:
# Run the Revenue Intelligence Agent
revenue_intelligence_response = client.run(
    agent=revenue_intelligence_agent,
    messages=[{"role": "user", "content": "Analyze revenue generation by traffic source."}]
)
print("\nRevenue Intelligence Agent Results:")
print(revenue_intelligence_response.messages[-1]["content"])


Revenue Intelligence Agent Results:
Here's the analysis of revenue generation by traffic source:

1. **Facebook**:
   - Total Revenue: $1,289,066
   - Revenue per Visit: $2,369.61
   - Revenue per Pageview: $297.98

2. **Google**:
   - Total Revenue: $1,011,304
   - Revenue per Visit: $376.45
   - Revenue per Pageview: $127.14

**Insights**:
- Facebook is generating more total revenue and has a significantly higher revenue per visit compared to Google. This suggests that traffic from Facebook is more valuable in terms of conversion.
- Google, while still a strong source of revenue, has lower metrics in comparison, indicating potential areas for optimization. 

Would you like to explore further or analyze any specific aspect in detail?
