In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from definitions import ROOT_DIR

In [None]:
data_path = ROOT_DIR / "data" / "raw_traffic" / "period-7.csv"
df = pd.read_csv(data_path)

In [None]:
df

In [None]:
df["standardConversionAction"] = (df.conversionAction - df.pValue) / (df.pValue * (1 - df.pValue))
exposed_df = df[df.isExposed == 1]
exposed_df.groupby("adSlot").agg({"standardConversionAction": ["mean", "std"]})

In [None]:
df.groupby("adSlot").agg({"standardConversionAction": ["mean", "std"]})

In [None]:
df.groupby("adSlot").agg({"isExposed": ["mean", "std"]})

In [None]:
df.groupby("adSlot").agg({"cost": ["mean", "std"]})

In [None]:
slot_df = df[df.adSlot == 2]
slot_df.standardConversionAction.describe()

In [None]:
df.groupby(["timeStepIndex", "pvIndex"]).agg({"xi": "sum", "isExposed": "sum"})
df

In [None]:
pvalues_df = df.groupby(["deliveryPeriodIndex", "timeStepIndex", "advertiserNumber", "advertiserCategoryIndex"]).agg({"pValue": lambda x: x.tolist(), "pValueSigma": lambda x: x.tolist()})
pvalues_df = pvalues_df.reset_index()
pvalues_df

In [None]:
def generate_bids_df(data):
    bids_df = (
        data[data["xi"] == 1]
        .groupby(["deliveryPeriodIndex", "timeStepIndex", "pvIndex"])
        .agg(
            {
                "bid": lambda x: x.tolist(),
                "isExposed": lambda x: x.tolist(),
                "cost": lambda x: x.tolist(),
            }
        )
    )
    bids_df.reset_index(inplace=True)
    bids_df = bids_df.groupby(["deliveryPeriodIndex", "timeStepIndex"]).agg(
        {
            "bid": lambda x: x.tolist(),
            "isExposed": lambda x: x.tolist(),
            "cost": lambda x: x.tolist(),
        }
    )
    bids_df.reset_index(inplace=True)
    return bids_df

bids_df = generate_bids_df(df)

In [None]:
bids_df.head()

In [None]:
# Sort bid, isExposed, and cost according to bid
bids_df["positions"] = bids_df.apply(lambda x: np.argsort(x.bid), axis=1)

def reorder_list_of_lists(lst, positions):
    array_of_lists = np.array(lst)
    reordered_array = np.take_along_axis(array_of_lists, positions, axis=1)
    reordered_lists = reordered_array.tolist()
    return reordered_lists

bids_df["bid"] = bids_df.apply(lambda x: reorder_list_of_lists(x.bid, x.positions), axis=1)
bids_df["isExposed"] = bids_df.apply(lambda x: reorder_list_of_lists(x.isExposed, x.positions), axis=1)
bids_df["cost"] = bids_df.apply(lambda x: reorder_list_of_lists(x.cost, x.positions), axis=1)
bids_df.drop(columns=["positions"], inplace=True)
bids_df.head()


In [None]:
exposed_mat = np.vstack(bids_df.isExposed.apply(np.vstack).tolist())

exposition_mean = np.mean(exposed_mat, axis=0)
print(exposition_mean)

In [None]:
np.array(bids_df[bids_df.timeStepIndex == 0].bid.item()).T

In [None]:
top_bids = np.array(bids_df.loc[0].bid)
my_bid = np.random.randn(len(top_bids))

(my_bid > top_bids.T).any(axis=0)