In [57]:
import pandas as pd
import numpy as np

In [58]:
impressions_df = pd.read_csv("../../datasets/impressions/2023/impressions_01_05_brands.csv")

In [59]:
import json

def extract_pid(placement_id):
    placement_id = json.loads(placement_id)
    bam_ad_slots = placement_id["bam_ad_slot"]
    # print(bam_ad_slots)
    final_bam_ad_slots = [bas for bas in bam_ad_slots if bas.strip() != '']
    ret = ''
    try:
        ret = placement_id['tag_id'][0]
    except:
        # print('.')
        ret = ','.join([bas for bas in final_bam_ad_slots])
    else:
        ret += ','.join([bas for bas in final_bam_ad_slots])
    return ret


def pid_type(pid):
    try:
        int(pid)
        return 'int'
    except:
        return 'str'

impressions_df['pid'] = impressions_df['placement_ids'].apply(extract_pid)
impressions_df['pid_type'] = impressions_df['pid'].apply(pid_type)
# filter only impressions with int placement IDs
impressions_df = impressions_df[impressions_df['pid_type'] == 'int']
# convert placement ids that can be converted to the int to be int, not string
# type(impressions_df.iloc[0]['pid'])     # check this, right now they're strings
impressions_df['pid'] = impressions_df['pid'].apply(lambda pid: int(pid))

In [60]:
# run through predictions
import os
import json


def _get_pred_map():
    pred_map = dict()
    for i in range(1, 6):
        if i < 10:
            i = f'0{i}'
        for day in os.listdir(f'../../predictions/2023/{i}'):
            for h in os.listdir(f'../../predictions/2023/{i}/{day}'):
                if not h.endswith('.ndjson'):
                    continue
                with open(f'../../predictions/2023/{i}/{day}/{h}')as f:
                    for line in f.readlines():
                        json_line = json.loads(line)
                        id_ = json_line['id']
                        pred_map[id_] = json_line['prediction']
    return pred_map
                            
pred_map = _get_pred_map()

In [61]:
def _get_final_fixation(gaze_valid, is_fixated, id_, pred_map):
    if gaze_valid:
        return is_fixated
    if id_ in pred_map:
        return pred_map[id_]
    return False

impressions_df['final_fixation'] = impressions_df.apply(lambda row: _get_final_fixation(row['gaze_valid'], row['is_fixated'], row['id'], pred_map), axis=1)

In [62]:
impressions_df["true_is_iab_inview"] = impressions_df.apply(
    lambda x: 
    True if (
        (x["ad_technical_format"] == "out-stream") & (x["exist_viewable_1_s_threshold_50"] == True) & (x["exist_viewable_2_s_threshold_50"] == False)
    ) | (x["is_iab_inview"] == True) 
    else False, 
    axis=1
)

In [63]:
impressions_df = impressions_df[impressions_df["chosen_brand"] == "rusta"]

In [64]:
grouped_df = impressions_df.groupby(["hostname", "pid"]).apply(
    lambda x: pd.Series(
        {
            "impressions": x["id"].count(),
            "fixations": x.loc[x["final_fixation"], "id"].count(),
            "inview": x.loc[x["true_is_iab_inview"], "id"].count()
        }
    )
)
grouped_df = grouped_df.reset_index()

In [65]:
i = 2
client_impressions = pd.read_excel(f"./datasets/Attention_test_{i}.xlsx")
client_impressions["Placement Id"] = client_impressions["Placement Id"].astype("Int64")

In [66]:
merged_df = grouped_df.merge(client_impressions, how="inner", left_on="pid", right_on="Placement Id")
merged_df

Unnamed: 0,hostname,pid,impressions,fixations,inview,Placement Id,Placement Name,Size,Site Domain,Imps,Viewable Imps
0,aftonbladet.se,19499407,15,0,8,19499407,SE-Aftonbladet-wde-Article-Insider_1,300x250,aftonbladet.se,23490,18141
1,aftonbladet.se,19499410,13,0,8,19499410,SE-Aftonbladet-wde-Article-Modul_1,640x320,aftonbladet.se,20254,15479
2,aftonbladet.se,19499410,13,0,8,19499410,SE-Aftonbladet-wde-Article-Modul_1,640x320,convox.cloud,1,1
3,aftonbladet.se,19499411,2,0,1,19499411,SE-Aftonbladet-wde-Article-Outstream,640x320,aftonbladet.se,10115,8825
4,aftonbladet.se,19499428,10,6,7,19499428,SE-Aftonbladet-wde-Front-Modul_1,640x320,aftonbladet.se,15292,9225
5,aftonbladet.se,19499431,7,3,5,19499431,SE-Aftonbladet-wde-Front-Modul_2,640x320,aftonbladet.se,5306,3403
6,aftonbladet.se,19499432,6,3,5,19499432,SE-Aftonbladet-wde-Front-Modul_3,640x320,aftonbladet.se,6887,4610
7,aftonbladet.se,19499433,2,0,0,19499433,SE-Aftonbladet-wde-Front-Modul_4,640x320,aftonbladet.se,5074,3684
8,aftonbladet.se,19499434,2,2,2,19499434,SE-Aftonbladet-wde-Front-Modul_5,640x320,aftonbladet.se,4268,2986
9,aftonbladet.se,19499435,3,1,1,19499435,SE-Aftonbladet-wde-Front-Modul_6,640x320,aftonbladet.se,3825,2587


In [67]:
final_df = merged_df[["hostname", "Placement Id", "impressions", "fixations", "inview", "Imps", "Viewable Imps"]]

In [68]:
final_group = final_df.groupby(["hostname"]).apply(
     lambda x: pd.Series(
         {
             "tobii_imps": x["impressions"].sum(),
             "client_imps": x["Imps"].sum(),
             "tobii_fixations": x["fixations"].sum(),
             "tobii_inviews": x["inview"].sum(),
             "client_inviews": x["Viewable Imps"].sum(),
         }
     )
)
final_group = final_group.reset_index()

final_group["tobii_inview/impression_ratio"] = round(final_group["tobii_inviews"] / final_group["tobii_imps"] * 100, 2)
final_group["tobii_fixation/impression_ratio"] = round(final_group["tobii_fixations"] / final_group["tobii_imps"] * 100, 2)
final_group["tobii_fixation/inview_ratio"] = round(final_group["tobii_fixations"] / final_group["tobii_inviews"] * 100, 2)


final_group["tobii_fixation/inview_ratio"] = final_group["tobii_fixation/inview_ratio"].fillna(0)

final_group["client_inview/impression_ratio"] = round(final_group["client_inviews"] / final_group["client_imps"] * 100, 2)
final_group["client_fixations"] = np.floor(final_group["client_imps"] * (final_group["tobii_fixation/impression_ratio"] / 100))
final_group["client_fixation_ratio"] = round(final_group["client_fixations"] / final_group["client_imps"] * 100, 2)
final_group["client_fixation/inview_ratio"] = round(final_group["client_fixations"] / final_group["client_inviews"] * 100, 2)

final_group["client_fixation/inview_ratio"] = final_group["client_fixation/inview_ratio"].fillna(0)
final_group

Unnamed: 0,hostname,tobii_imps,client_imps,tobii_fixations,tobii_inviews,client_inviews,tobii_inview/impression_ratio,tobii_fixation/impression_ratio,tobii_fixation/inview_ratio,client_inview/impression_ratio,client_fixations,client_fixation_ratio,client_fixation/inview_ratio
0,aftonbladet.se,913,121639,390,460,86306,50.38,42.72,84.78,70.95,51964.0,42.72,60.21
1,blocket.se,12,29279,4,10,18832,83.33,33.33,40.0,64.32,9758.0,33.33,51.82
2,dn.se,23,33618,6,15,24239,65.22,26.09,40.0,72.1,8770.0,26.09,36.18
3,expressen.se,128,65886,11,111,45008,86.72,8.59,9.91,68.31,5659.0,8.59,12.57
4,hemnet.se,2,6577,0,2,3531,100.0,0.0,0.0,53.69,0.0,0.0,0.0
5,omni.se,4,1018,0,0,539,0.0,0.0,0.0,52.95,0.0,0.0,0.0
6,tv.nu,2,39,0,0,17,0.0,0.0,0.0,43.59,0.0,0.0,0.0


In [69]:
def get_sample_size(num_impressions):
    if num_impressions <= 19:
        return "low"
    elif num_impressions >= 20 and num_impressions <= 99:
        return "medium"
    else:
        return "high"

final_group["sample_size"] = final_group["tobii_imps"].apply(lambda x: get_sample_size(x))
final_group

Unnamed: 0,hostname,tobii_imps,client_imps,tobii_fixations,tobii_inviews,client_inviews,tobii_inview/impression_ratio,tobii_fixation/impression_ratio,tobii_fixation/inview_ratio,client_inview/impression_ratio,client_fixations,client_fixation_ratio,client_fixation/inview_ratio,sample_size
0,aftonbladet.se,913,121639,390,460,86306,50.38,42.72,84.78,70.95,51964.0,42.72,60.21,high
1,blocket.se,12,29279,4,10,18832,83.33,33.33,40.0,64.32,9758.0,33.33,51.82,low
2,dn.se,23,33618,6,15,24239,65.22,26.09,40.0,72.1,8770.0,26.09,36.18,medium
3,expressen.se,128,65886,11,111,45008,86.72,8.59,9.91,68.31,5659.0,8.59,12.57,high
4,hemnet.se,2,6577,0,2,3531,100.0,0.0,0.0,53.69,0.0,0.0,0.0,low
5,omni.se,4,1018,0,0,539,0.0,0.0,0.0,52.95,0.0,0.0,0.0,low
6,tv.nu,2,39,0,0,17,0.0,0.0,0.0,43.59,0.0,0.0,0.0,low


In [70]:
avg_cols = ["tobii_inview/impression_ratio", "tobii_fixation/impression_ratio", "tobii_fixation/inview_ratio", "client_inview/impression_ratio", "client_fixation_ratio", "client_fixation/inview_ratio"]
summary_row = round(final_group[avg_cols].mean(), 2)
sum_cols = ["tobii_imps", "client_imps", "tobii_fixations", "tobii_inviews", "client_inviews", "client_fixations"]
#summary_row = final_group[avg_cols].sum()
summary_row.name = "Summary"

for column in final_group.columns:
    if column in sum_cols:
        summary_row[column] = final_group[column].sum()
    elif column in avg_cols:
        continue
    else:
        summary_row[column] = ''

In [71]:
final_group = final_group.append(summary_row, ignore_index=True)
final_group

  final_group = final_group.append(summary_row, ignore_index=True)


Unnamed: 0,hostname,tobii_imps,client_imps,tobii_fixations,tobii_inviews,client_inviews,tobii_inview/impression_ratio,tobii_fixation/impression_ratio,tobii_fixation/inview_ratio,client_inview/impression_ratio,client_fixations,client_fixation_ratio,client_fixation/inview_ratio,sample_size
0,aftonbladet.se,913,121639,390,460,86306,50.38,42.72,84.78,70.95,51964.0,42.72,60.21,high
1,blocket.se,12,29279,4,10,18832,83.33,33.33,40.0,64.32,9758.0,33.33,51.82,low
2,dn.se,23,33618,6,15,24239,65.22,26.09,40.0,72.1,8770.0,26.09,36.18,medium
3,expressen.se,128,65886,11,111,45008,86.72,8.59,9.91,68.31,5659.0,8.59,12.57,high
4,hemnet.se,2,6577,0,2,3531,100.0,0.0,0.0,53.69,0.0,0.0,0.0,low
5,omni.se,4,1018,0,0,539,0.0,0.0,0.0,52.95,0.0,0.0,0.0,low
6,tv.nu,2,39,0,0,17,0.0,0.0,0.0,43.59,0.0,0.0,0.0,low
7,,1084,258056,411,598,178472,55.09,15.82,24.96,60.84,76151.0,15.82,22.97,


In [72]:
result_df = final_group[["hostname", "client_imps", "client_inviews", "client_inview/impression_ratio", "client_fixation/inview_ratio", "tobii_fixation/inview_ratio", "sample_size"]]
result_df = result_df.rename(
    columns={
        "client_imps": "Impressions",
        "client_inviews": "Inviews",
        # "client_fixations": "Fixations",
        "client_inview/impression_ratio": "Inview Ratio",
        "client_fixation/inview_ratio": "Fixation/Inview Ratio",
        "tobii_fixation/inview_ratio": "Fixation / Tobii Inview Ratio",
        "sample_size": "Sample Size"
    }
)

In [73]:
result_df

Unnamed: 0,hostname,Impressions,Inviews,Inview Ratio,Fixation/Inview Ratio,Fixation / Tobii Inview Ratio,Sample Size
0,aftonbladet.se,121639,86306,70.95,60.21,84.78,high
1,blocket.se,29279,18832,64.32,51.82,40.0,low
2,dn.se,33618,24239,72.1,36.18,40.0,medium
3,expressen.se,65886,45008,68.31,12.57,9.91,high
4,hemnet.se,6577,3531,53.69,0.0,0.0,low
5,omni.se,1018,539,52.95,0.0,0.0,low
6,tv.nu,39,17,43.59,0.0,0.0,low
7,,258056,178472,60.84,22.97,24.96,


In [74]:
with pd.ExcelWriter(f"./results/ATEX2-1449_Attention_test_{i}_rusta.xlsx") as writer:
    result_df.to_excel(writer, index=False)
