In [None]:
import pandas as pd
import numpy as np

In [None]:
client_df = pd.read_excel("../datasets/ingo_campaign_report-2023-03-27T162131.802.xlsx")
impressions_df = pd.read_csv("../datasets/impressions/2023/impressions_q1.csv")

In [None]:
# run through predictions
import os
import json


def _get_pred_map():
    pred_map = dict()
    
    for i in range(1, 3):
        if i < 10:
            i = f'0{i}'
        for day in os.listdir(f'../predictions/2023/{i}'):
            for h in os.listdir(f'../predictions/2023/{i}/{day}'):
                if not h.endswith('.ndjson'):
                    continue
                with open(f'../predictions/2023/{i}/{day}/{h}')as f:
                    for line in f.readlines():
                        json_line = json.loads(line)
                        id_ = json_line['id']
                        pred_map[id_] = json_line['prediction']
    return pred_map
                            
pred_map = _get_pred_map()

In [None]:
import json

def extract_pid(placement_ids):
    try:
        placement_id = int(json.loads(placement_ids)[0])
    except ValueError:
        return placement_ids
        
    return placement_id

def extract_pid_type(placement_id):
    try:
        int(placement_id)
    except ValueError:
        r_type = "str"
    else:
        r_type = "int"
    return r_type

impressions_df["pid"] = impressions_df["placement_id_chosen"].apply(extract_pid)
impressions_df["pid_type"] = impressions_df["pid"].apply(extract_pid_type)

In [None]:
# Only select pids from client list
impressions_df = impressions_df[impressions_df["pid"].isin(list(client_df["Placement Id"]))]
impressions_df

In [None]:
impressions_df["true_is_iab_inview"] = impressions_df.apply(
    lambda x: 
    True if (
        (x["ad_technical_format"] == "out-stream") & (x["exist_viewable_1_s_threshold_50"] == True) & (x["exist_viewable_2_s_threshold_50"] == False)
    ) | (x["is_iab_inview"] == True) 
    else False, 
    axis=1
)

In [None]:
def _get_final_fixation(gaze_valid, is_fixated, is_iab_inview, id_, pred_map):
    if gaze_valid and is_iab_inview:
        return is_fixated
    if id_ in pred_map and is_iab_inview:
        return pred_map[id_]
    return False

impressions_df['final_fixation'] = impressions_df.apply(lambda row: _get_final_fixation(row['gaze_valid'], row['is_fixated'], row['true_is_iab_inview'], row['id'], pred_map), axis=1)
#impressions_df

In [None]:
grouped_df = impressions_df.groupby(["hostname", "pid"]).apply(
    lambda x: pd.Series(
        {
            "num_impressions": x["id"].count(),
            "num_fixations": x[x["final_fixation"] == True]["id"].count(),
            "num_is_iab_inview": x[x["true_is_iab_inview"] == True]["id"].count()
        }
    )
)
grouped_df = grouped_df.reset_index()
grouped_df

In [None]:
grouped_df["fixation_ratio_%"] = round(grouped_df["num_fixations"] / grouped_df["num_impressions"] * 100, 2)
grouped_df["inview_fixation_ratio_%"] = round(grouped_df["num_fixations"] / grouped_df["num_is_iab_inview"] * 100, 1)
grouped_df["inview_fixation_ratio_%"] = grouped_df["inview_fixation_ratio_%"].fillna(0)
grouped_df

In [None]:
final_df = grouped_df[["hostname", "pid", "num_impressions", "fixation_ratio_%", "inview_fixation_ratio_%"]]
final_df = final_df.rename(columns={"pid": "Placement Id", "num_impressions": "Impressions"})
final_df.to_excel("../final_reports/ingo_campaign_fixations.xlsx", index=False)