In [None]:
import json
import seaborn as sns
from matplotlib import pyplot as plt
from matplotlib.legend_handler import HandlerLine2D
from matplotlib.patches import Patch
import pandas as pd
import numpy as np

from app.data.utils.bigquery import BigQuery

bq = BigQuery()

In [None]:
sql = '''
WITH by_ds_creative AS
(
    SELECT
        ds,
        site,
        campaign_id,
        line_item_id,
        creative_id,
        SUM(n_prints) AS n_prints,
        SUM(n_clicks) AS n_clicks
    FROM `meli-bi-data.SBOX_DSPCREATIVOS.BQ_PRINTS_CLICKS_PER_HOUR`
    WHERE ds BETWEEN DATE("2022-12-27") + 1 AND CURRENT_DATE - 1
    GROUP BY 1,2,3,4,5
    HAVING n_prints >= 100
),
ctrs AS
(
    SELECT
        ds,
        site,
        campaign_id,
        line_item_id,
        STRING_AGG(CAST(creative_id AS STRING), ',' ORDER BY creative_id) AS creative_ids,
        COUNT(DISTINCT creative_id) AS n_creatives,
        SUM(n_prints) AS n_prints,
        SUM(n_clicks) AS n_clicks,
        AVG(SAFE_DIVIDE(n_clicks, n_prints)) * 100 AS avg_ctr_by_creative,
        SAFE_DIVIDE(SUM(n_clicks), SUM(n_prints)) * 100 AS ctr_by_line_item
    FROM by_ds_creative
    GROUP BY 1,2,3,4
    HAVING n_creatives > 1
),
grouped AS
(
    SELECT
        site,
        campaign_id,
        line_item_id,
        creative_ids,
        n_creatives,
        MIN(ds) AS min_ds,
        MAX(ds) AS max_ds,
        SUM(n_prints) AS n_prints,
        SUM(n_clicks) AS n_clicks,
        AVG(avg_ctr_by_creative) AS avg_ctr_by_creative,
        AVG(ctr_by_line_item) AS ctr_by_line_item
    FROM ctrs
    GROUP BY 1,2,3,4,5
)

SELECT
    * EXCEPT(n_clicks),
    DATE_DIFF(max_ds, min_ds, DAY) + 1 AS n_days,
    n_clicks,
    avg_ctr_by_creative * n_prints / 100 AS avg_ctr_by_creative_x_n_prints,
    (ctr_by_line_item - avg_ctr_by_creative) * n_prints / 100 AS additional_clicks,
    ctr_by_line_item - avg_ctr_by_creative AS lift,
    SAFE_DIVIDE((ctr_by_line_item - avg_ctr_by_creative), avg_ctr_by_creative) AS relative_increment
FROM grouped
WHERE DATE_DIFF(max_ds, min_ds, DAY) + 1 > 2
'''

df = bq.run_query(sql)
df.sort_values('relative_increment', ascending=False)

In [None]:
df['additional_clicks'].sum()

In [None]:
df['additional_clicks'].sum() / df['avg_ctr_by_creative_x_n_prints'].sum()

In [None]:
campaign_id = 5834
line_item_id = 12797
sql = f'''
WITH grouped AS
(
    SELECT ds, campaign_id, line_item_id, creative_id, SUM(n_clicks) AS n_clicks, SUM(n_prints) AS n_prints
    FROM `meli-bi-data.SBOX_DSPCREATIVOS.BQ_PRINTS_CLICKS_PER_HOUR`
    WHERE campaign_id = {campaign_id} AND line_item_id = {line_item_id}
    AND ds >= DATE("2023-01-05")
    GROUP BY 1,2,3,4
)

SELECT *, n_clicks / n_prints AS ctr
FROM grouped
ORDER BY ds, creative_id
'''

df = bq.run_query(sql)

In [None]:
pivot = pd.pivot_table(data=df, index=['ds'], columns=['creative_id'], values='n_prints')
pivot_ctr = pd.pivot_table(data=df, index=['ds'], columns=['creative_id'], values='ctr')

In [None]:
std_pivot = (pivot.T / pivot.agg(sum, axis = 1)).T

In [None]:
df_grouped = df.groupby('creative_id')[['n_prints', 'n_clicks']].sum()

In [None]:
df_grouped = pd.concat([df_grouped, pd.DataFrame(df_grouped.sum()).T.rename({0: 'Total'})])

In [None]:
df_grouped['ctr'] = df_grouped['n_clicks'] / df_grouped['n_prints']
df_grouped

In [None]:
sns.set_theme()
fig, (ax1, ax2) = plt.subplots(2, figsize=(7, 9))

fig.suptitle(f'campaign_id = {df["campaign_id"].values[0]}, line_item_id = {df["line_item_id"].values[0]}')

std_pivot.plot.bar(stacked=True, ax = ax2)
ax2.set_ylabel('Prints')
ax2.get_legend().remove()

pivot_ctr.set_index(ax2.get_xticks(), inplace=True)
pivot_ctr.plot(ax=ax1)
ax1.set_ylabel('ctr')
ax1.set_xlim(ax2.get_xlim())
ax1.tick_params(
    axis='x',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    bottom=False,      # ticks along the bottom edge are off
    top=False,         # ticks along the top edge are off
    labelbottom=False) # labels along the bottom edge are off
ax1.set_xlabel('')

plt.tight_layout()
plt.savefig(f'bidder_example_{df["campaign_id"].values[0]}_{df["line_item_id"].values[0]}.png', dpi=300)

In [None]:
plt.stackplot(std_pivot.index, [std_pivot[14648], std_pivot[14649], std_pivot[14650]])
plt.xticks(rotation = 90)
plt.show()

In [None]:
df_pivoted = df.pivot(index='ds', columns='creative_id', values=['n_clicks', 'n_prints']).cumsum()

for column in df_pivoted.columns:
    creative_id = column[1]
    df_pivoted[('ctr', creative_id)] = df_pivoted[('n_clicks', creative_id)] / df_pivoted[('n_prints', creative_id)]

In [None]:
sns.set_theme()
fig, ax = plt.subplots(figsize=(7, 7))

fig.suptitle(f'campaign_id = {df["campaign_id"].values[0]}, line_item_id = {df["line_item_id"].values[0]}')

df_pivoted.plot(y='ctr', ax=ax)
ax.set_ylabel('accumulated ctr')
ax.set_xlabel('')
plt.xticks(rotation=90)

plt.tight_layout()
plt.savefig(f'bidder_accumulated_ctr_{df["campaign_id"].values[0]}_{df["line_item_id"].values[0]}.png', dpi=300)

In [None]:
df_pivoted = df.pivot(index='ds', columns='creative_id', values='n_prints')

In [None]:
sns.set_theme()
fig, ax = plt.subplots(figsize=(7, 7))

fig.suptitle(f'campaign_id = {df["campaign_id"].values[0]}, line_item_id = {df["line_item_id"].values[0]}')

df_pivoted.plot.bar(stacked=True, ax = ax)
ax.set_xlabel('')
ax.set_ylabel('Prints')

plt.tight_layout()
plt.savefig(f'bidder_absolute_prints_{df["campaign_id"].values[0]}_{df["line_item_id"].values[0]}.png', dpi=300)

In [None]:
sql = '''
SELECT
    COUNT(DISTINCT campaign_id) AS campaigns,
    COUNT(DISTINCT line_item_id) AS line_items,
    SUM(n_prints) AS n_prints
FROM `meli-bi-data.SBOX_DSPCREATIVOS.BQ_PRINTS_CLICKS_PER_HOUR`
WHERE ds >= DATE("2023-01-05")
'''

bq.run_query(sql)

In [None]:
sql = '''
WITH by_ds_creative AS
(
    SELECT
        ds,
        site,
        campaign_id,
        line_item_id,
        creative_id,
        SUM(n_prints) AS n_prints,
        SUM(n_clicks) AS n_clicks
    FROM `meli-bi-data.SBOX_DSPCREATIVOS.BQ_PRINTS_CLICKS_PER_HOUR`
    WHERE ds >= DATE("2023-01-05")
    GROUP BY 1,2,3,4,5
    HAVING n_prints >= 100
),
ctrs AS
(
    SELECT
        ds,
        site,
        campaign_id,
        line_item_id,
        STRING_AGG(CAST(creative_id AS STRING), ',' ORDER BY creative_id) AS creative_ids,
        COUNT(DISTINCT creative_id) AS n_creatives,
        SUM(n_prints) AS n_prints,
        SUM(n_clicks) AS n_clicks,
        AVG(SAFE_DIVIDE(n_clicks, n_prints)) * 100 AS avg_ctr_by_creative,
        SAFE_DIVIDE(SUM(n_clicks), SUM(n_prints)) * 100 AS ctr_by_line_item
    FROM by_ds_creative
    GROUP BY 1,2,3,4
    HAVING n_creatives > 1
)

SELECT
    COUNT(DISTINCT campaign_id) AS campaigns,
    COUNT(DISTINCT line_item_id) AS line_items,
    SUM(n_prints) AS n_prints
FROM ctrs
'''

bq.run_query(sql)