# Southeast Asian Android Phone Marketplace Analysis
## Data Analysis and AI Insights

In [None]:
# Install required packages
!pip install openai
!pip install git+https://github.com/openai/swarm.git

In [None]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from openai import OpenAI
from swarm import Swarm, Agent
import logging

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

In [None]:
# Load and preprocess data
df = pd.read_csv('ai first sales data - sales.csv')

# Data preprocessing
numeric_columns = ['revenue', 'transactions', 'pageviews', 'visits', 
                  'productClick', 'addToCart', 'checkout', 'ad_spend']
df_clean = df.copy()

for col in numeric_columns:
    if col in df_clean.columns:
        df_clean[col] = pd.to_numeric(df_clean[col], errors='coerce')

if 'date' in df_clean.columns:
    df_clean['date'] = pd.to_datetime(df_clean['date'])

print(f"Dataset loaded with {len(df_clean)} rows")

In [None]:
# Initialize OpenAI client
api = OpenAI(api_key="your-api-key-here")
client = Swarm(api)

In [None]:
# Create Data Quality Agent
data_quality_agent = Agent(
    name="Data Quality Agent",
    instructions="""Analyze data quality focusing on:
    1. Missing values
    2. Anomalies in metrics
    3. Data type validation
    4. Suspicious patterns"""
)

# Run data quality analysis
quality_response = client.run(
    agent=data_quality_agent,
    messages=[{"role": "user", "content": "Analyze data quality and provide insights"}],
    context_variables={"dataframe": df_clean}
)

print("\nData Quality Analysis:")
print(quality_response.messages[-1].content)

In [None]:
# Marketing Analysis Visualizations
source_metrics = df_clean.groupby('source').agg({
    'revenue': 'sum',
    'ad_spend': 'sum',
    'transactions': 'sum'
}).reset_index()

# Revenue by Channel
fig1 = px.pie(source_metrics, values='revenue', names='source',
              title='Revenue by Channel')
fig1.show()

# Revenue vs Ad Spend
fig2 = px.bar(source_metrics, x='source', y=['revenue', 'ad_spend'],
              title='Revenue vs Ad Spend by Channel')
fig2.show()

In [None]:
# Customer Journey Funnel
funnel_data = {
    'Stage': ['Pageviews', 'Product Clicks', 'Add to Cart', 'Checkout', 'Transactions'],
    'Count': [
        df_clean['pageviews'].sum(),
        df_clean['productClick'].sum(),
        df_clean['addToCart'].sum(),
        df_clean['checkout'].sum(),
        df_clean['transactions'].sum()
    ]
}

fig = go.Figure(go.Funnel(y=funnel_data['Stage'], x=funnel_data['Count']))
fig.update_layout(title='Customer Journey Funnel')
fig.show()

In [None]:
# Revenue Trend Analysis
daily_revenue = df_clean.groupby('date')['revenue'].sum().reset_index()
fig = px.line(daily_revenue, x='date', y='revenue',
              title='Daily Revenue Trend')
fig.update_xaxes(type='date')
fig.show()