# P2 — Marketing Funnel & A/B : 01_eda

In [None]:
import pandas as pd, numpy as np
from sqlalchemy import create_engine, text
import matplotlib.pyplot as plt

DB = 'sqlite:///../../da4.db'
TABLE = 'p2_ads_stage'

engine = create_engine(DB)
with engine.begin() as conn:
    df = pd.read_sql(text(f'SELECT * FROM {TABLE}'), conn)

print('Shape:', df.shape)
display(df.head())
display(df.describe(numeric_only=True).T)
print('\nNull % (top):')
print((df.isna().mean()*100).sort_values(ascending=False).head(15))


# --- Channel performance table
agg = (df.groupby('channel', as_index=False)
         .agg(imp=('impressions','sum'), clk=('clicks','sum'), cnv=('conversions','sum'),
              cost=('cost','sum'), rev=('revenue','sum')))
agg['ctr']  = agg['clk'] / agg['imp'].replace(0, pd.NA)
agg['cvr']  = agg['cnv'] / agg['clk'].replace(0, pd.NA)
agg['cac']  = agg['cost'] / agg['cnv'].replace(0, pd.NA)
agg['roas'] = agg['rev']  / agg['cost'].replace(0, pd.NA)
agg.sort_values('roas', ascending=False, inplace=True)
agg


# --- ROAS by channel (bar)
plt.figure(); plt.bar(agg['channel'], agg['roas'])
plt.xticks(rotation=45, ha='right'); plt.title('ROAS by Channel')
plt.tight_layout(); plt.show()


# --- Monthly ROAS trend
df['ym'] = pd.to_datetime(df['date']).dt.to_period('M').astype(str)
m = (df.groupby('ym', as_index=False)
       .agg(cost=('cost','sum'), rev=('revenue','sum'))
       .sort_values('ym'))
m['roas'] = m['rev'] / m['cost'].replace(0, pd.NA)
plt.figure(); plt.plot(m['ym'], m['roas'])
plt.xticks(rotation=90); plt.title('Monthly ROAS'); plt.tight_layout(); plt.show()
m
