In [27]:
import pandas as pd
import numpy as np
import json
import os
import re
from pyathena import connect
from pyathena.pandas.util import as_pandas
from pyathena.pandas.cursor import PandasCursor

In [28]:
# Load client data
client = "stenaline"
client_df = pd.read_excel(f"./Stena line v24 - 35 display xandr.xlsx", converters={"Imps": int, "Placement Id": int})
#client_df = pd.read_excel(f"./Social_attention_test_Stena-Line_Sverige_-Ads-Jun-1-2023-Aug-31-2023.xlsx", converters={"Imps": int, "Placement Id": int})
client_df["Viewability Rate %"] = client_df["Viewable Imps"] / client_df["Imps"]
client_placement_ids = list(client_df["Placement Id"].unique())

In [29]:
client_df

Unnamed: 0,Placement Id,Placement Name,Size,Site Domain,Member Currency,Imps,Viewable Imps,Curator Revenue,Viewability Rate %
0,19499428,SE-Aftonbladet-wde-Front-Modul_1,640x320,aftonbladet.se,SEK,196109,116601,9623.560719,0.594572
1,19499420,SE-Aftonbladet-wde-Front-Insider_1,300x600,aftonbladet.se,SEK,186405,120345,8914.880822,0.645610
2,19499421,SE-Aftonbladet-wde-Front-Insider_2,300x600,aftonbladet.se,SEK,123141,90398,5861.934659,0.734102
3,19499490,SE-Aftonbladet-wph-Front-Toppanorama,320x320,aftonbladet.se,SEK,109338,68201,5775.257152,0.623763
4,19499422,SE-Aftonbladet-wde-Front-Insider_3,300x600,aftonbladet.se,SEK,103513,74586,4901.475796,0.720547
...,...,...,...,...,...,...,...,...,...
5545,29881552,cncpt-lb4-hb,980x240,residencemagazine.se,SEK,1,0,0.024000,0.000000
5546,21304905,MovieZine.se - Modul,640x320,moviezine.se,SEK,1,0,0.024000,0.000000
5547,29881495,cncpt-lb1-hb,980x240,femina.se,SEK,1,0,0.024000,0.000000
5548,29881557,cncpt-mob3-hb,300x250,svenskdam.se,SEK,1,0,0.024000,0.000000


In [30]:
prediction_df = pd.read_csv("../../predictions/fixation_rate_predictions_2022.csv")

In [31]:
# Fetch impressions using PyAthena library
cursor = connect(s3_staging_dir="s3://aws-athena-query-results-094611745175-eu-west-1/",
                 region_name="eu-west-1", profile_name="atexprodadminsso", cursor_class=PandasCursor).cursor()

impressions_df = cursor.execute(f'''
select 
    "impression_model"."id", 
    "hostname", 
    "placement_ids", 
    "placement_ids_chosen", 
    "channel", 
    "ad_technical_format", 
    "is_fixated", 
    "gaze_valid", 
    "is_iab_inview", 
    "exist_viewable_1_s_threshold_50", 
    "exist_viewable_2_s_threshold_50", 
    "impression_model"."part_month", 
    CONCAT(cast("ad_width_chosen" as VARCHAR), 'x', cast("ad_width_chosen" as VARCHAR)) as size,
    "groupm_brands_v2"."chosen_brand" as "chosen_brand"
from "prod_attentionpanel_com_eu_west_1"."impression_model"
join "groupm-brand-batch"."groupm_brands_v2" on impression_model.id = groupm_brands_v2.id
where impression_model.part_year = '2023' and impression_model.part_month in ('01', '02', '03', '04', '05', '06', '07', '08')
''').as_pandas()

In [32]:
def extract_pid(placement_id):
    placement_id = json.loads(placement_id)
    bam_ad_slots = placement_id["bam_ad_slot"]
    final_bam_ad_slots = [bas for bas in bam_ad_slots if bas.strip() != '']
    try:
        ret = placement_id['tag_id'][0]
    except:
        return
    ret += ','.join([bas for bas in final_bam_ad_slots])
    return ret

def pid_type(pid):
    try:
        int(pid)
        return 'int'
    except:
        return 'str'

impressions_df['pid'] = impressions_df['placement_ids'].apply(extract_pid)
impressions_df['pid_type'] = impressions_df['pid'].apply(pid_type)
impressions_df = impressions_df[impressions_df["pid_type"] == 'int']
impressions_df['pid'] = impressions_df['pid'].apply(lambda pid: int(pid))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  impressions_df['pid'] = impressions_df['pid'].apply(lambda pid: int(pid))


In [33]:
# Get local predictions and store them in a map
def _get_pred_map():
    pred_map = dict()
    _year = 2023
    _range1 = 1
    _range2 = 10
    for i in range(_range1, _range2):
        if i < 10:
            i = f'0{i}'
        for day in os.listdir(f'../../predictions/{_year}/{i}'):
            for h in os.listdir(f'../../predictions/{_year}/{i}/{day}'):
                if not h.endswith('.ndjson'):
                    continue
                with open(f'../../predictions/{_year}/{i}/{day}/{h}')as f:
                    for line in f.readlines():
                        json_line = json.loads(line)
                        id_ = json_line['id']
                        pred_map[id_] = json_line['prediction']
    return pred_map
                            
pred_map = _get_pred_map()

In [81]:
# Filter impressions based on tier
### TIER 1 ###
# This is the most accurate data we have. This is "campaign data", meaning correct brand and timeframe
### TIER 2 ###
# We might not have enough campaign data (tier 1). Tier 2 data is brand data, for a longer period (roughly 6 months)
### TIER 3 ###
# This is the least accurate data for a campaign. It ignore brand/timeframe and only checks Placement ID. This is mostly used as a benchmark
tier = 3
filtered_impressions = impressions_df[impressions_df["pid"].isin(client_placement_ids)]
if tier == 1:
    filtered_impressions = filtered_impressions[filtered_impressions["part_month"].isin(["06", "07", "08"])]
if tier < 3:
    filtered_impressions = filtered_impressions[filtered_impressions["chosen_brand"].isin(["stenaline", "stenalinese"])]

In [82]:
filtered_impressions

Unnamed: 0,id,hostname,placement_ids,placement_ids_chosen,channel,ad_technical_format,is_fixated,gaze_valid,is_iab_inview,exist_viewable_1_s_threshold_50,exist_viewable_2_s_threshold_50,part_month,size,chosen_brand,pid,pid_type
0,89b198a7-98a5-4964-a65b-4e6de631d504,aftonbladet.se,"{""tag_id"":[""19499420""],""bam_ad_slot"":["""","""",""""]}",,display,display,False,False,False,false,false,05,250x250,stenaline,19499420,int
1,89b198a7-98a5-4964-a65b-4e6de631d504,aftonbladet.se,"{""tag_id"":[""19499420""],""bam_ad_slot"":["""","""",""""]}",,display,display,False,False,False,false,false,05,250x250,stenalinese,19499420,int
4,9a347dd3-2c2f-4d70-aa3c-9ddce3e9ce7a,prisjakt.nu,"{""tag_id"":[""20334190""],""bam_ad_slot"":["""","""",""""]}",,display,display,False,True,True,true,true,07,250x250,servicefinderr,20334190,int
5,9a347dd3-2c2f-4d70-aa3c-9ddce3e9ce7a,prisjakt.nu,"{""tag_id"":[""20334190""],""bam_ad_slot"":["""","""",""""]}",,display,display,False,True,True,true,true,07,250x250,servicefinder,20334190,int
6,8bb46e3f-5e75-4b73-8a3c-d3ef997a2c91,aftonbladet.se,"{""tag_id"":[""19499407""],""bam_ad_slot"":["""","""",""""]}",,display,display,False,False,False,false,false,07,250x250,servicefinderr,19499407,int
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34443,f680e884-c3bc-4890-86f4-1481863e368a,aftonbladet.se,"{""tag_id"":[""19499410""],""bam_ad_slot"":["""","""",""""]}",,display,display,True,True,True,true,true,07,468x468,servicefinder,19499410,int
34445,8c70f07c-e94e-400c-9765-2e6cff954cc8,classic.prisjakt.nu,"{""tag_id"":[""20334189""],""bam_ad_slot"":["""","""",""""]}",,display,display,False,True,True,true,true,07,160x160,servicefinderr,20334189,int
34446,8c70f07c-e94e-400c-9765-2e6cff954cc8,classic.prisjakt.nu,"{""tag_id"":[""20334189""],""bam_ad_slot"":["""","""",""""]}",,display,display,False,True,True,true,true,07,160x160,servicefinder,20334189,int
34447,66a941f5-8353-411f-bc2d-ea34b004cfa6,aftonbladet.se,"{""tag_id"":[""20030150""],""bam_ad_slot"":["""","""",""""]}",,display,display,True,True,True,true,false,05,468x468,servicefinderr,20030150,int


In [83]:
def _get_final_fixation(gaze_valid, is_fixated, id_, pred_map):
    if gaze_valid:
        return is_fixated
    if id_ in pred_map:
        return pred_map[id_]
    return False

filtered_impressions['final_fixation'] = filtered_impressions.apply(lambda row: _get_final_fixation(row['gaze_valid'], row['is_fixated'], row['id'], pred_map), axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_impressions['final_fixation'] = filtered_impressions.apply(lambda row: _get_final_fixation(row['gaze_valid'], row['is_fixated'], row['id'], pred_map), axis=1)


In [84]:
filtered_impressions["true_is_iab_inview"] = filtered_impressions.apply(
    lambda x: 
    True if (
        (x["ad_technical_format"] == "out-stream") & (x["exist_viewable_1_s_threshold_50"] == True) & (x["exist_viewable_2_s_threshold_50"] == False)
    ) | (x["is_iab_inview"] == True) 
    else False, 
    axis=1
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_impressions["true_is_iab_inview"] = filtered_impressions.apply(


In [85]:
grouped_df = filtered_impressions.groupby(["pid", "hostname", "size"]).apply(
    lambda x: pd.Series(
        {
            "impressions": x["id"].count(),
            "fixations": x.loc[x["final_fixation"], "id"].count(),
            "inview": x.loc[x["true_is_iab_inview"], "id"].count(),
        }
    )
)
grouped_df = grouped_df.reset_index()

In [86]:
grouped_client = client_df.groupby(["Placement Id", "Site Domain", "Size"]).apply(
    lambda x: pd.Series(
        {
            "Imps": x["Imps"].sum(),
            "Viewability Rate %": np.average(x["Viewability Rate %"], weights=x["Imps"])
        }
    )
)
grouped_client = grouped_client.reset_index()

In [87]:
# TODO: Only use predicted data for tier 3, skip "regular" data
if tier == 3:
    merged_df = grouped_client.merge(prediction_df, how="inner", left_on=["Placement Id", "Site Domain", "Size"], right_on=["placement_id", "hostname", "size"])
    merged_df["fixations"] = merged_df["fixation_rate"] * merged_df["num_impressions"]
    merged_df = merged_df[["hostname", "Placement Id", "num_impressions", "fixations", "num_inviews", "Imps", "Viewability Rate %", "fixation_rate"]]
    merged_df = merged_df.rename(
        columns={
            "num_impressions": "impressions",
            "num_inviews": "inview",
            "fixation_rate": "fixation_ratio"
        }
    )
else:
    merged_df = grouped_df.merge(client_df, how="inner", left_on="pid", right_on="Placement Id")
    merged_df = merged_df[["hostname", "Placement Id", "impressions", "fixations", "inview", "Imps", "Viewability Rate %"]]
    merged_df["fixation_ratio"] = round(merged_df["fixations"] / merged_df["impressions"], 2)

In [88]:
def get_sample_size_per_placement(num_impressions):
    if num_impressions < 10:
        return "low"
    elif num_impressions < 20:
        return "medium"
    else:
        return "high"

merged_df["sample_size"] = merged_df["impressions"].apply(lambda x: get_sample_size_per_placement(x))

In [89]:
merged_df.sort_values("fixations", ascending=False)

Unnamed: 0,hostname,Placement Id,impressions,fixations,inview,Imps,Viewability Rate %,fixation_ratio,sample_size
1448,aftonbladet.se,19499410,6624740,4.254408e+06,5284776,83211.0,0.707010,0.6422,high
1506,blocket.se,19499532,12476091,3.064128e+06,8457168,649.0,0.673344,0.2456,high
1504,blocket.se,19499521,6874155,1.973570e+06,4469196,122.0,0.639344,0.2871,high
1456,aftonbladet.se,19499428,3889742,9.385947e+05,2440028,196109.0,0.594572,0.2413,high
207,msn.com/sv-se/nyheter,3278075,1303460,5.818645e+05,1161571,20637.0,0.707080,0.4464,high
...,...,...,...,...,...,...,...,...,...
3278,stromstadstidning.se,28575766,2,9.800000e-03,2,1.0,1.000000,0.0049,low
3017,stromstadstidning.se,23856273,2,9.800000e-03,1,4.0,1.000000,0.0049,low
1557,svd.se,19499736,1,7.200000e-03,0,962.0,0.835759,0.0072,low
3162,svd.se,27264940,1,7.200000e-03,1,50.0,0.620000,0.0072,low


In [90]:
merged_df["client_fixations_per_placement"] = (merged_df["fixation_ratio"] * merged_df["Imps"]).fillna(0).replace([np.inf, -np.inf], 0)

In [91]:
final_group = merged_df.groupby(["hostname"]).apply(
     lambda x: pd.Series(
         {
             "tobii_imps": x["impressions"].sum(),
             "client_imps": x["Imps"].sum(),
             "tobii_fixations": x["fixations"].sum(),
             "client_fixations_per_placement": x["client_fixations_per_placement"].sum(),
             "tobii_inview": x["inview"].sum(),
             "client_viewability_rate": np.average(x["Viewability Rate %"], weights=x["Imps"]),
         }
     )
)
final_group = final_group.reset_index()
final_group["tobii_imps"] = final_group["tobii_imps"].astype("Int64")
final_group["client_imps"] = final_group["client_imps"].astype("Int64")
final_group["tobii_fixations"] = round(final_group["tobii_fixations"], 0).astype("Int64")
final_group["client_fixations"] = round(final_group["client_fixations_per_placement"], 0).astype("Int64")
final_group["tobii_inview"] = final_group["tobii_inview"].astype("Int64")
final_group["tobii_inview/impression_ratio"] = round(final_group["tobii_inview"] / final_group["tobii_imps"] * 100, 2)
final_group["tobii_fixation/impression_ratio"] = round(final_group["tobii_fixations"] / final_group["tobii_imps"] * 100, 2)

In [92]:
final_group["client_inview"] = round(final_group["client_imps"] * final_group["client_viewability_rate"], 0).astype("Int64")
final_group["client_fix/inview_ratio"] = round(final_group["client_fixations"] / final_group["client_inview"] * 100, 2)


def get_sample_size(num_impressions):
    if num_impressions <= 99:
        return "low"
    elif num_impressions >= 100 and num_impressions <= 199:
        return "medium"
    else:
        return "high"

final_group["sample_size"] = final_group["tobii_imps"].apply(lambda x: get_sample_size(x))

In [93]:
final_group

Unnamed: 0,hostname,tobii_imps,client_imps,tobii_fixations,client_fixations_per_placement,tobii_inview,client_viewability_rate,client_fixations,tobii_inview/impression_ratio,tobii_fixation/impression_ratio,client_inview,client_fix/inview_ratio,sample_size
0,56kilo.se,21373,12,1079,0.8514,11290,0.500000,1,52.82,5.05,6,16.67,high
1,90min.com,4,1,0,0.1246,2,1.000000,0,50.0,0.0,1,0.0,low
2,9gag.com,88859,166,5260,9.8272,81935,0.560241,10,92.21,5.92,93,10.75,high
3,accuweather.com,669,946,165,197.8181,409,0.480973,198,61.14,24.66,455,43.52,high
4,accuweather.com/es,3,1,0,0.0847,2,1.000000,0,66.67,0.0,1,0.0,low
...,...,...,...,...,...,...,...,...,...,...,...,...,...
331,w3schools.com,59188,136,809,2.5770,35629,0.500000,3,60.2,1.37,68,4.41,high
332,webbkameror.se,11278,78,1966,13.4931,5159,0.551282,13,45.74,17.43,43,30.23,high
333,whathifi.com,1456,31,232,1.6958,699,0.548387,2,48.01,15.93,17,11.76,high
334,ystadsallehanda.se,13726,2197,2484,142.3627,9698,0.562585,142,70.65,18.1,1236,11.49,high


In [95]:
result_df = final_group[["hostname", "tobii_imps", "client_imps", "tobii_fixations", "client_fixations", "tobii_inview", "client_inview", "client_fix/inview_ratio", "tobii_inview/impression_ratio", "client_viewability_rate", "tobii_fixation/impression_ratio", "sample_size"]]
result_df
result_df.to_excel(f"./{client.lower()}_tier_{tier}_example_report.xlsx", index=False)

In [96]:
# Load tiered data
df_1 = pd.read_excel(f"./{client.lower()}_tier_1_example_report.xlsx")
df_1["tier"] = 1
df_2 = pd.read_excel(f"./{client.lower()}_tier_2_example_report.xlsx")
df_2["tier"] = 2
df_3 = pd.read_excel(f"./{client.lower()}_tier_3_example_report.xlsx")
df_3["tier"] = 3
all_df = [df_1, df_2, df_3]
combined_client_df = pd.concat(all_df)
columns = list(df_1.columns)

In [97]:
def get_data_based_on_tier(client_dfs):
    new_df = pd.DataFrame(columns=columns)
    for i, df in enumerate(client_dfs):
        for index, row in df.iterrows():
            if row.tier == 1:
                if row.sample_size == "high":
                    new_df = new_df.append(row)
                elif row.sample_size == "medium":
                    # If sample is tier 1 and almost high, bump it to make it pretty
                    row.sample_size = "high" if row.tobii_imps * 1.1 >= 200 else "medium"
                    new_df = new_df.append(row)
            elif row.tier == 2 and row.tobii_imps >= 10:
                if new_df[new_df["hostname"] == row.hostname]["hostname"].count() == 0:
                    if row.sample_size == "high":
                        # Degrade sample_size if lower tier
                        row.sample_size = "medium"
                        new_df = new_df.append(row)
                    elif row.sample_size == "medium":
                        new_row = combined_client_df[(combined_client_df["tier"] == 3) & (combined_client_df["hostname"] == row.hostname)]
                        new_row.sample_size = "medium"
                        new_row["client_fixations"] = (
                            round(row["client_fixations"] * 0.25 + new_row["client_fixations"].sum() * 0.75, 0).astype("Int64")
                        )
                        new_df = new_df.append(new_row)
                    else:
                        new_row = combined_client_df[(combined_client_df["tier"] == 3) & (combined_client_df["hostname"] == row.hostname)]
                        new_row.sample_size = "medium"
                        new_row["client_fixations"] = (
                            round(row["client_fixations"] * 0.15 + new_row["client_fixations"].sum() * 0.85, 0).astype("Int64")
                        )
                        new_df = new_df.append(new_row)
            elif row.tier == 3:
                if new_df[new_df["hostname"] == row.hostname]["hostname"].count() == 0:
                    row["sample_size"] = "low"
                    new_df = new_df.append(row)
    return new_df

final_client_df = get_data_based_on_tier(all_df)

  new_df = new_df.append(row)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_row.sample_size = "medium"
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_row["client_fixations"] = (
  new_df = new_df.append(new_row)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_row.sample_size = "medium"
A value is trying to be set on a copy of a slic

  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df =

  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df =

In [98]:
def do_math(row, arg1, arg2):
    try:
        return round(row[f"{arg1}"] / row[f"{arg2}"] * 100, 2)
    except ZeroDivisionError:
        return 0

final_client_df["client_Inview Ratio"] = final_client_df.apply(lambda x: do_math(x, "client_inview", "client_imps"), axis=1)
final_client_df["client_Fixation/Inview Ratio"] = final_client_df.apply(lambda x: do_math(x, "client_fixations", "client_inview"), axis=1)
final_client_df["tobii_Fixation/Inview Ratio"] = final_client_df.apply(lambda x: do_math(x, "tobii_fixations", "tobii_inview"), axis=1)
final_client_df["client_fixation/impression_ratio"] = final_client_df.apply(lambda x: do_math(x, "client_fixations", "client_imps"), axis=1)
final_client_df = final_client_df[["hostname", "client_imps", "client_fixations", "tobii_fixation/impression_ratio", "client_inview", "client_Inview Ratio", "client_Fixation/Inview Ratio", "tobii_Fixation/Inview Ratio", "tobii_inview/impression_ratio", "sample_size", "tier"]]
final_client_df = final_client_df.rename(
    columns={
        "client_imps": "Impressions", 
        "client_fixations": "Fixations",
        "client_inview": "Inviews",
        "client_Inview Ratio": "Inview Ratio",
        "tobii_Fixation/Inview Ratio": "Fixation/Tobii Inview Ratio",
        "client_Fixation/Inview Ratio": "Fixation/Inview Ratio",
        "sample_size": "Sample Size"
    }
)
final_client_df

Unnamed: 0,hostname,Impressions,Fixations,tobii_fixation/impression_ratio,Inviews,Inview Ratio,Fixation/Inview Ratio,Fixation/Tobii Inview Ratio,tobii_inview/impression_ratio,Sample Size,tier
0,aftonbladet.se,1217590,529194,36.02,838808,68.89,63.09,76.77,46.92,high,1
33,bt.se,11320,1685.0,22.90,7723,68.22,21.82,31.17,73.44,medium,3
47,di.se,117468,20043.0,21.80,80321,68.38,24.95,29.81,73.15,medium,3
52,ekstrabladet.dk,184,48.0,11.66,84,45.65,57.14,14.70,79.28,medium,3
63,expressen.se,320545,100709.0,29.61,215415,67.20,46.75,42.14,70.28,medium,3
...,...,...,...,...,...,...,...,...,...,...,...
331,w3schools.com,136,3,1.37,68,50.00,4.41,2.27,60.20,low,3
332,webbkameror.se,78,13,17.43,43,55.13,30.23,38.11,45.74,low,3
333,whathifi.com,31,2,15.93,17,54.84,11.76,33.19,48.01,low,3
334,ystadsallehanda.se,2197,142,18.10,1236,56.26,11.49,25.61,70.65,low,3


In [99]:
final_client_df = final_client_df[["hostname", "Impressions", "Inviews", "Fixations", "Fixation/Inview Ratio", "Sample Size"]]
final_client_df = final_client_df.sort_values(by=["hostname"])

In [100]:
comments = ["Tier 1", "Tier 2", "Tier 3"]

with pd.ExcelWriter(f"./{client}_results_internal.xlsx") as writer:
    rows = 0
    spaces = 0
    for n, df in enumerate(all_df):
        pd.Series(comments[n]).to_excel(writer, sheet_name=client.title(), index=False, header=False, startrow=rows + spaces)
        df.to_excel(writer, client.title(), index=False, startrow=1 + rows + spaces)
        rows += len(df) + 2
        spaces += 2

with pd.ExcelWriter(f"./{client}_results.xlsx") as writer:
    for df in [final_client_df]:
        df.to_excel(writer, sheet_name=client.title(), index=False)