In [None]:
import json
import seaborn as sns
from matplotlib import pyplot as plt
from matplotlib.legend_handler import HandlerLine2D
from matplotlib.patches import Patch
import pandas as pd
import numpy as np
from getpass import getpass

from melitk.fda2 import inventory

from app.data.utils.bigquery import BigQuery

bq = BigQuery()

token = getpass()
tiger_token = f"Bearer {token}"
inventory.init(token=tiger_token)

In [None]:
sql = '''
SELECT *, n_clicks / n_prints AS CTR
FROM `meli-bi-data.SBOX_DSPCREATIVOS.BQ_PRINTS_CLICKS_PER_DAY`
WHERE campaign_id=5278 AND line_item_id=11346
ORDER BY line_item_id, creative_id, ds
'''

df = bq.run_query(sql)

In [None]:
pivot = pd.pivot_table(data=df, index=['ds'], columns=['creative_id'], values='n_prints')

In [None]:
std_pivot = (pivot.T / pivot.agg(sum, axis = 1)).T

In [None]:
df_grouped = df.groupby('creative_id')['n_prints', 'n_clicks'].sum()

In [None]:
df_grouped = pd.concat([df_grouped, pd.DataFrame(df_grouped.sum()).T.rename({0: 'Total'})])

In [None]:
df_grouped['ctr'] = df_grouped['n_clicks'] / df_grouped['n_prints']

In [None]:
0.003982 * 1000000 - 3086

In [None]:
0.0039 - (0.0023 + 0.0029)/2

In [None]:
pivot.sum()

In [None]:
fig, ax = plt.subplots(figsize=(10, 7))
pivot.plot.bar(stacked = True, ax=ax)
creative_handler = []
for creative_id in df['creative_id'].unique():
    df_temp = df[df['creative_id']==creative_id]
    x = range(len(df_temp))
    line, = ax.plot(x, df_temp['CTR'], label = creative_id)
    color = line.get_markeredgecolor()
    creative_handler.append(Patch(label=creative_id, facecolor=color, edgecolor=color))
ax.set_xticks(ticks=x, labels=df_temp['ds'], rotation=90)
ax.legend(bbox_to_anchor=(1, 1), handles=creative_handler, title='Creative id')
ax.set_xlabel('')
ax.set_ylabel('Number of prints')
ax.set_ylim((0,255000))
ax.yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, p: format(int(x), ',')))
fig.tight_layout()
plt.savefig('dist.png', dpi=300)

In [None]:
30 * 2 / 8 * 1166307 *900/1000000

In [None]:
sql = '''
SELECT *, n_clicks / n_prints AS CTR
FROM `meli-bi-data.SBOX_DSPCREATIVOS.BQ_PRINTS_CLICKS_PER_DAY`
WHERE campaign_id=5278 AND line_item_id=11867
ORDER BY line_item_id, creative_id, ds
'''

df = bq.run_query(sql)
pivot = pd.pivot_table(data=df, index=['ds'], columns=['creative_id'], values='n_prints')
std_pivot = (pivot.T / pivot.agg(sum, axis = 1)).T

fig, ax = plt.subplots(figsize=(10, 7))
pivot.plot.bar(stacked = True, ax=ax)
creative_handler = []
for creative_id in df['creative_id'].unique():
    df_temp = df[df['creative_id']==creative_id]
    x = range(len(df_temp))
    line, = ax.plot(x, df_temp['CTR'], label = creative_id)
    color = line.get_markeredgecolor()
    creative_handler.append(Patch(label=creative_id, facecolor=color, edgecolor=color))
ax.set_xticks(ticks=x, labels=df_temp['ds'], rotation=90)
ax.legend(bbox_to_anchor=(1, 1), handles=creative_handler, title='Creative id')
ax.set_xlabel('')
ax.set_ylabel('Number of prints')
ax.set_ylim((0,255000))
ax.yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, p: format(int(x), ',')))
fig.tight_layout()
plt.savefig('dist2.png', dpi=300)

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4))
pivot.plot.bar(stacked = True, ax=ax1)
creative_handler = []
for creative_id in df['creative_id'].unique():
    df_temp = df[df['creative_id']==creative_id]
    x = range(len(df_temp))
    line, = ax2.plot(x, df_temp['CTR'], label = creative_id)
    color = line.get_markeredgecolor()
    creative_handler.append(Patch(label=creative_id, facecolor=color, edgecolor=color))
ax2.set_xticks(ticks=x, labels=df_temp['ds'], rotation=90)
ax2.legend(bbox_to_anchor=(1, 1), handles=creative_handler, title='Creative id')
ax1.set_xlabel('')
ax1.legend().set_visible(False)
ax1.set_ylabel('Distribución')
ax2.set_ylabel('CTR')
fig.tight_layout()
plt.show()

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4))
std_pivot.plot.bar(stacked = True, ax=ax1)
creative_handler = []
for creative_id in df['creative_id'].unique():
    df_temp = df[df['creative_id']==creative_id]
    x = range(len(df_temp))
    line, = ax2.plot(x, df_temp['CTR'], label = creative_id)
    color = line.get_markeredgecolor()
    creative_handler.append(Patch(label=creative_id, facecolor=color, edgecolor=color))
ax2.set_xticks(ticks=x, labels=df_temp['ds'], rotation=90)
ax2.legend(bbox_to_anchor=(1, 1), handles=creative_handler, title='Creative id')
ax1.set_xlabel('')
ax1.legend().set_visible(False)
ax1.set_ylabel('Distribución')
ax2.set_ylabel('CTR')
fig.tight_layout()
plt.show()

In [None]:
artifact_input = next(inventory.filter(name='test_cr_parameters'))

In [None]:
dictionary = json.loads(artifact.load_to_bytes())

In [None]:
inventory.create_artifact??

In [None]:
artifact_name = 'prod_ctr_beta_parameters'
artifact = inventory.create_artifact(artifact_name=artifact_name, version='0.0.1-MLA-Production', type_='fda.Bytes', ttl=7)

In [None]:
artifact.save_from_bytes(artifact_input.load_to_bytes())

In [None]:
def dict2dataframe(dictionary: dict) -> pd.DataFrame:
    dataframe = pd.DataFrame()
    for key, value in dictionary.items():
        for key2, value2 in value.items():
            for key3, value3 in value2.items():
                temp = pd.DataFrame({
                    'campaign_id': [key],
                    'line_item_id': [key2],
                    'creative_id': [key3],
                    'alpha': [value3['alpha']],
                    'beta': [value3['beta']],
                })
                dataframe = pd.concat([dataframe, temp])
    return dataframe

df = dict2dataframe(dictionary)

In [None]:
sql = '''
SELECT ds, campaign_id, line_item_id, COUNT(*) AS n_creatives
FROM meli-bi-data.SBOX_DSPCREATIVOS.BQ_PRINTS_CLICKS_PER_DAY
GROUP BY 1,2,3
'''

df = bq.run_query(sql)

In [None]:
df[(df['n_creatives'] > 1) & (df['ds'].map(str) == '2022-12-14')]

In [None]:
sql = '''
SELECT * EXCEPT(n_prints, n_clicks),
    n_clicks / n_prints AS ctr,
    SUM(n_prints) OVER cumulative_window AS n_prints,
    SUM(n_clicks) OVER cumulative_window AS n_clicks,
FROM meli-bi-data.SBOX_DSPCREATIVOS.BQ_PRINTS_CLICKS_PER_DAY
WHERE campaign_id = 5278 AND line_item_id IN (11867/*, 11346*/)

WINDOW cumulative_window AS
(
    PARTITION BY campaign_id, line_item_id, creative_id
    ORDER BY ds
    ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
)

ORDER BY line_item_id, creative_id, ds
'''

df = bq.run_query(sql)

In [None]:
df.head()

In [None]:
def beta_simulation(n_prints: int, n_clicks: int, size: int) -> np.ndarray:
    
    theta = np.random.beta(n_clicks + 1, n_prints + 1, size = size)
    percentiles = np.percentile(theta, [.005, .5, .995])
    
    return theta, percentiles

In [None]:
df[df['ds'].map(str) == "2022-12-13"]

In [None]:
df.loc[:, ['p015', 'p50', 'p995']] = [beta_simulation(n_clicks=x, n_prints=y, size=100000).tolist() for x, y in df[['n_clicks', 'n_prints']].values]

In [None]:
sql = '''
SELECT * EXCEPT(n_prints, n_clicks),
    n_clicks / n_prints AS ctr,
    SUM(n_prints) OVER cumulative_window AS n_prints,
    SUM(n_clicks) OVER cumulative_window AS n_clicks,
FROM meli-bi-data.SBOX_DSPCREATIVOS.BQ_PRINTS_CLICKS_PER_DAY
WHERE campaign_id = 5278 AND line_item_id IN (/*11867,*/11346)

WINDOW cumulative_window AS
(
    PARTITION BY campaign_id, line_item_id, creative_id
    ORDER BY ds
    ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
)

ORDER BY line_item_id, creative_id, ds
'''

df = bq.run_query(sql)
df.loc[:, ['p015', 'p50', 'p995']] = [beta_simulation(n_clicks=x, n_prints=y, size=100000)[1].tolist() for x, y in df[['n_clicks', 'n_prints']].values]
from datetime import timedelta

fig, ax = plt.subplots(figsize=(10, 6))
lines = []
for creative_id in df['creative_id'].unique():
    df_temp = df[df['creative_id'] == creative_id]
    ax.fill_between(range(1, len(df_temp) + 1), df_temp['p015']*100, df_temp['p995']*100, alpha=.3)
    line, = ax.plot(range(1, len(df_temp) + 1), df_temp['p50']*100, label=f'{creative_id} cumulative beta median')
    line2, = ax.plot(range(len(df_temp)), df_temp['ctr']*100, color=line.get_color(), ls='dotted', label=f'{creative_id} daily ctr')
    lines.append(line)
    lines.append(line2)
ax.legend(handles=lines, bbox_to_anchor=(1,1))
ax.set_xticks(range(len(df_temp) + 1), df_temp['ds'].values.tolist() + [df_temp['ds'].max() + timedelta(days=1)])
plt.xticks(rotation=90)
ax.set_ylabel('ctr')
ax.set_ylim((0, 0.6))
ax.yaxis.set_major_formatter(ticker.PercentFormatter())
plt.tight_layout()
plt.savefig('ctr_performance.png', dpi=300)

In [None]:
sql = '''
SELECT * EXCEPT(n_prints, n_clicks),
    n_clicks / n_prints AS ctr,
    SUM(n_prints) OVER cumulative_window AS n_prints,
    SUM(n_clicks) OVER cumulative_window AS n_clicks,
FROM meli-bi-data.SBOX_DSPCREATIVOS.BQ_PRINTS_CLICKS_PER_DAY
WHERE campaign_id = 5278 AND line_item_id IN (11867/*,11346*/)

WINDOW cumulative_window AS
(
    PARTITION BY campaign_id, line_item_id, creative_id
    ORDER BY ds
    ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
)

ORDER BY line_item_id, creative_id, ds
'''

df = bq.run_query(sql)
df.loc[:, ['p015', 'p50', 'p995']] = [beta_simulation(n_clicks=x, n_prints=y, size=100000).tolist() for x, y in df[['n_clicks', 'n_prints']].values]
from datetime import timedelta

fig, ax = plt.subplots(figsize=(10, 6))
lines = []
for creative_id in df['creative_id'].unique():
    df_temp = df[df['creative_id'] == creative_id]
    ax.fill_between(range(1, len(df_temp) + 1), df_temp['p015']*100, df_temp['p995']*100, alpha=.3)
    line, = ax.plot(range(1, len(df_temp) + 1), df_temp['p50']*100, label=f'{creative_id} cumulative beta median')
    line2, = ax.plot(range(len(df_temp)), df_temp['ctr']*100, color=line.get_color(), ls='dotted', label=f'{creative_id} daily ctr')
    lines.append(line)
    lines.append(line2)
ax.legend(handles=lines, bbox_to_anchor=(1,1))
ax.set_xticks(range(len(df_temp) + 1), df_temp['ds'].values.tolist() + [df_temp['ds'].max() + timedelta(days=1)])
plt.xticks(rotation=90)
ax.set_ylabel('ctr')
ax.set_ylim((0, 1.3))
ax.yaxis.set_major_formatter(ticker.PercentFormatter())
plt.tight_layout()
plt.savefig('ctr_performance2.png', dpi=300)

In [None]:
sql = '''
SELECT * EXCEPT(n_prints, n_clicks),
    n_clicks / n_prints AS ctr,
    SUM(n_prints) OVER cumulative_window AS n_prints,
    SUM(n_clicks) OVER cumulative_window AS n_clicks,
FROM meli-bi-data.SBOX_DSPCREATIVOS.BQ_PRINTS_CLICKS_PER_DAY
WHERE campaign_id = 5278 AND line_item_id IN (11867/*,11346*/)

WINDOW cumulative_window AS
(
    PARTITION BY campaign_id, line_item_id, creative_id
    ORDER BY ds
    ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
)

ORDER BY line_item_id, creative_id, ds
'''

df = bq.run_query(sql)

In [None]:
dist = []
for ds in df['ds'].unique():
    df_temp = df[df['ds'] == ds]
    temp = np.apply_along_axis(
        np.argmax,
        axis=0,
        arr=np.array([beta_simulation(n_clicks=x, n_prints=y, size=100000)[0].tolist() for x, y in df_temp[['n_clicks', 'n_prints']].values])
    ).tolist()
    counts = pd.Series(df['creative_id'].unique()[temp].tolist()).value_counts()
    dist.append(counts.tolist())

In [None]:
df_temp = df.loc[df['ds'].map(str) == '2022-12-07', ['creative_id', 'n_prints']].rename({'n_prints': '2022-12-13'}, axis = 1).set_index('creative_id').T

In [None]:
df_temp

In [None]:
df2 = pd.DataFrame(dist, index=df['ds'].unique().map(lambda x: str(x + timedelta(days=1))), columns=df['creative_id'].unique())

In [None]:
df3 = pd.concat([df_temp, df2])
df3

In [None]:
std_df3 = (df3.T / df3.T.sum()).T*100

In [None]:
fig, ax = plt.subplots(figsize=(10, 7))
std_df3.plot.bar(stacked=True, ax=ax)
ax.yaxis.set_major_formatter(ticker.PercentFormatter())
ax.legend(bbox_to_anchor=(1,1))
plt.tight_layout()
plt.savefig('dist_bt.png', dpi=300)

In [None]:


fig, ax = plt.subplots(figsize=(10, 6))
lines = []
for creative_id in df['creative_id'].unique():
    df_temp = df[df['creative_id'] == creative_id]
    ax.fill_between(range(1, len(df_temp) + 1), df_temp['p015']*100, df_temp['p995']*100, alpha=.3)
    line, = ax.plot(range(1, len(df_temp) + 1), df_temp['p50']*100, label=f'{creative_id} cumulative beta median')
    line2, = ax.plot(range(len(df_temp)), df_temp['ctr']*100, color=line.get_color(), ls='dotted', label=f'{creative_id} daily ctr')
    lines.append(line)
    lines.append(line2)
ax.legend(handles=lines, bbox_to_anchor=(1,1))
ax.set_xticks(range(len(df_temp) + 1), df_temp['ds'].values.tolist() + [df_temp['ds'].max() + timedelta(days=1)])
plt.xticks(rotation=90)
ax.set_ylabel('ctr')
ax.set_ylim((0, 1.3))
ax.yaxis.set_major_formatter(ticker.PercentFormatter())
plt.tight_layout()
plt.savefig('ctr_performance2.png', dpi=300)