In [2]:
import pandas as pd
import numpy as np
import json
import os
import re
from pyathena import connect
from pyathena.pandas.util import as_pandas
from pyathena.pandas.cursor import PandasCursor

In [3]:
# Load client data
client = "komplett"
version = "kontroll"
client_df = pd.read_excel(f"./{client.title()}-{version.title()}-data.xlsx", converters={"Imps": int, "Placement Id": int})
client_df["Viewability Rate %"] = client_df["Viewable Imps"] / client_df["Imps"]
client_placement_ids = list(client_df["Placement Id"].unique())

In [3]:
# Fetch impressions using PyAthena library
cursor = connect(s3_staging_dir="s3://aws-athena-query-results-094611745175-eu-west-1/",
                 region_name="eu-west-1", profile_name="atexprodadminsso", cursor_class=PandasCursor).cursor()

impressions_df = cursor.execute(f'''
select 
    "impression_model"."id", 
    "hostname", 
    "placement_ids", 
    "placement_ids_chosen", 
    "total_fixation_duration", 
    "channel", 
    "ad_technical_format", 
    "is_fixated", 
    "gaze_valid", 
    "is_iab_inview", 
    "exist_viewable_1_s_threshold_50", 
    "exist_viewable_2_s_threshold_50", 
    "impression_model"."part_month", 
    CONCAT(cast("ad_width_chosen" as VARCHAR), 'x', cast("ad_width_chosen" as VARCHAR)) as size,
    "brand_batch"."chosen_brand" as "chosen_brand"
from "prod_attentionpanel_com_eu_west_1"."impression_model"
join "prod_attentionpanel_com_eu_west_1"."brand_batch" on impression_model.id = brand_batch.id
where impression_model.part_year = '2023' and impression_model.part_month in ('01', '02', '03', '04', '05')
''').as_pandas()

In [4]:
def extract_pid(placement_id):
    placement_id = json.loads(placement_id)
    bam_ad_slots = placement_id["bam_ad_slot"]
    final_bam_ad_slots = [bas for bas in bam_ad_slots if bas.strip() != '']
    try:
        ret = placement_id['tag_id'][0]
    except:
        return
    ret += ','.join([bas for bas in final_bam_ad_slots])
    return ret

def pid_type(pid):
    try:
        int(pid)
        return 'int'
    except:
        return 'str'

impressions_df['pid'] = impressions_df['placement_ids'].apply(extract_pid)
impressions_df['pid_type'] = impressions_df['pid'].apply(pid_type)
impressions_df = impressions_df[impressions_df["pid_type"] == 'int']
impressions_df['pid'] = impressions_df['pid'].apply(lambda pid: int(pid))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  impressions_df['pid'] = impressions_df['pid'].apply(lambda pid: int(pid))


In [5]:
# Get local predictions and store them in a map
def _get_pred_map():
    pred_map = dict()
    _year = 2023
    _range1 = 1
    _range2 = 10
    for i in range(_range1, _range2):
        if i < 10:
            i = f'0{i}'
        for day in os.listdir(f'../../predictions/{_year}/{i}'):
            for h in os.listdir(f'../../predictions/{_year}/{i}/{day}'):
                if not h.endswith('.ndjson'):
                    continue
                with open(f'../../predictions/{_year}/{i}/{day}/{h}')as f:
                    for line in f.readlines():
                        json_line = json.loads(line)
                        id_ = json_line['id']
                        pred_map[id_] = json_line['prediction']
    return pred_map
                            
pred_map = _get_pred_map()

In [36]:
# Filter impressions based on tier
### TIER 1 ###
# This is the most accurate data we have. This is "campaign data", meaning correct brand and timeframe
### TIER 2 ###
# We might not have enough campaign data (tier 1). Tier 2 data is brand data, for a longer period (roughly 6 months)
### TIER 3 ###
# This is the least accurate data for a campaign. It ignore brand/timeframe and only checks Placement ID. This is mostly used as a benchmark
tier = 3
filtered_impressions = impressions_df[impressions_df["pid"].isin(client_placement_ids)]
if tier == 1:
    filtered_impressions = filtered_impressions[filtered_impressions["part_month"].isin(["04", "05"])]
if tier < 3:
    filtered_impressions = filtered_impressions[filtered_impressions["chosen_brand"] == client.lower()]

In [37]:
def _get_final_fixation(gaze_valid, is_fixated, id_, pred_map):
    if gaze_valid:
        return is_fixated
    if id_ in pred_map:
        return pred_map[id_]
    return False

filtered_impressions['final_fixation'] = filtered_impressions.apply(lambda row: _get_final_fixation(row['gaze_valid'], row['is_fixated'], row['id'], pred_map), axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_impressions['final_fixation'] = filtered_impressions.apply(lambda row: _get_final_fixation(row['gaze_valid'], row['is_fixated'], row['id'], pred_map), axis=1)


In [38]:
filtered_impressions["true_is_iab_inview"] = filtered_impressions.apply(
    lambda x: 
    True if (
        (x["ad_technical_format"] == "out-stream") & (x["exist_viewable_1_s_threshold_50"] == True) & (x["exist_viewable_2_s_threshold_50"] == False)
    ) | (x["is_iab_inview"] == True) 
    else False, 
    axis=1
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_impressions["true_is_iab_inview"] = filtered_impressions.apply(


In [39]:
filtered_impressions["total_fixation_duration"] = filtered_impressions["total_fixation_duration"].astype("Int64")
# Normalise outliers for fixation duration (30 seconds)
filtered_impressions.loc[filtered_impressions["total_fixation_duration"] >= 30000, "total_fixation_duration"] = 30000

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_impressions["total_fixation_duration"] = filtered_impressions["total_fixation_duration"].astype("Int64")


In [40]:
# Predictions do not include fixation duration. For predicted fixations, use the median value as the total_fixation_duration

# Calculate the median fixation duration for each hostname
median_fixation_durations = filtered_impressions.loc[filtered_impressions['is_fixated'] == True].groupby('hostname')['total_fixation_duration'].median()

# Update the total_fixation_duration based on the calculated medians
filtered_impressions['total_fixation_duration'] = np.where(
    (filtered_impressions['total_fixation_duration'] == 0) &
    (filtered_impressions['final_fixation'] == True) &
    (filtered_impressions['hostname'].isin(median_fixation_durations.index)),
    filtered_impressions['hostname'].map(median_fixation_durations),
    filtered_impressions['total_fixation_duration']
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_impressions['total_fixation_duration'] = np.where(


In [41]:
grouped_df = filtered_impressions.groupby(["pid", "hostname"]).apply(
    lambda x: pd.Series(
        {
            "impressions": x["id"].count(),
            "fixations": x.loc[x["final_fixation"], "id"].count(),
            "inview": x.loc[x["true_is_iab_inview"], "id"].count(),
            "total_fixation_duration": x["total_fixation_duration"].sum(),
            "average_fixation_duration": x["total_fixation_duration"].mean()
        }
    )
)
grouped_df = grouped_df.reset_index()

In [42]:
grouped_client = client_df.groupby(["Placement Id"]).apply(
    lambda x: pd.Series(
        {
            "Imps": x["Imps"].sum(),
            "Viewability Rate %": np.average(x["Viewability Rate %"], weights=x["Imps"])
        }
    )
)
grouped_client = grouped_client.reset_index()

In [43]:
merged_df = grouped_df.merge(grouped_client, how="inner", left_on=["pid"], right_on=["Placement Id"])
merged_df = merged_df[["hostname", "Placement Id", "impressions", "fixations", "inview", "Imps", "Viewability Rate %", "average_fixation_duration", "total_fixation_duration"]]

In [44]:
def get_sample_size_per_placement(num_impressions):
    if num_impressions < 10:
        return "low"
    elif num_impressions < 20:
        return "medium"
    else:
        return "high"

merged_df["fixation_ratio"] = round(merged_df["fixations"] / merged_df["impressions"], 2)
merged_df["sample_size"] = merged_df["impressions"].apply(lambda x: get_sample_size_per_placement(x))

In [45]:
merged_df.sort_values("fixations", ascending=False)

Unnamed: 0,hostname,Placement Id,impressions,fixations,inview,Imps,Viewability Rate %,average_fixation_duration,total_fixation_duration,fixation_ratio,sample_size
165,aftonbladet.se,19499441,10579.0,3763.0,7259.0,182816.0,0.717568,880.891105,9318947.0,0.36,high
154,aftonbladet.se,19499428,8808.0,2475.0,3258.0,6790.0,0.690280,490.108084,4316872.0,0.28,high
183,blocket.se,19499532,7784.0,2473.0,5223.0,53019.0,0.682133,257.971416,2008049.5,0.32,high
144,aftonbladet.se,19499410,5309.0,2271.0,3024.0,20392.0,0.767556,1311.749294,6964077.0,0.43,high
181,blocket.se,19499521,7892.0,2065.0,4779.0,15387.0,0.664652,385.288900,3040700.0,0.26,high
...,...,...,...,...,...,...,...,...,...,...,...
51,ikeahackers.net,14457094,3.0,0.0,0.0,15.0,0.666667,0.000000,0.0,0.00,low
310,classic.prisjakt.nu,20334190,58.0,0.0,2.0,73.0,0.671233,0.000000,0.0,0.00,high
52,musiccoder.com,14457094,1.0,0.0,0.0,15.0,0.666667,0.000000,0.0,0.00,low
53,feastingathome.com,14457099,2.0,0.0,0.0,2.0,0.500000,0.000000,0.0,0.00,low


In [46]:
merged_df["client_fixations_per_placement"] = np.floor(merged_df["fixation_ratio"] * merged_df["Imps"]).fillna(0).replace([np.inf, -np.inf], 0)
merged_df["client_total_fixation_duration_per_placement"] = np.floor(merged_df["average_fixation_duration"] * merged_df["client_fixations_per_placement"]).fillna(0).replace([np.inf, -np.inf], 0)

In [47]:
final_group = merged_df.groupby(["hostname"]).apply(
     lambda x: pd.Series(
         {
             "tobii_imps": x["impressions"].sum(),
             "client_imps": x["Imps"].sum(),
             "tobii_fixations": x["fixations"].sum(),
             "client_fixations_per_placement": x["client_fixations_per_placement"].sum(),
             "tobii_inview": x["inview"].sum(),
             "client_viewability_rate": np.average(x["Viewability Rate %"], weights=x["Imps"]),
             "average_fixation_duration": np.floor(x["total_fixation_duration"].sum() / x["fixations"].sum()),    
             "average_fixation_duration_per_placement": np.floor(x["client_total_fixation_duration_per_placement"].sum() / x["client_fixations_per_placement"].sum()), 
             "total_fixation_duration": x["total_fixation_duration"].sum(),
             "client_total_fixation_duration_per_placement": x["client_total_fixation_duration_per_placement"].sum()
         }
     )
)
final_group = final_group.reset_index()
final_group["tobii_imps"] = final_group["tobii_imps"].astype("Int64")
final_group["client_imps"] = final_group["client_imps"].astype("Int64")
final_group["tobii_fixations"] = final_group["tobii_fixations"].astype("Int64")
final_group["client_fixations"] = final_group["client_fixations_per_placement"].astype("Int64")
final_group["tobii_inview"] = final_group["tobii_inview"].astype("Int64")
final_group["tobii_inview/impression_ratio"] = round(final_group["tobii_inview"] / final_group["tobii_imps"] * 100, 2)
final_group["tobii_fixation/impression_ratio"] = round(final_group["tobii_fixations"] / final_group["tobii_imps"] * 100, 2)

  "average_fixation_duration": np.floor(x["total_fixation_duration"].sum() / x["fixations"].sum()),
  "average_fixation_duration_per_placement": np.floor(x["client_total_fixation_duration_per_placement"].sum() / x["client_fixations_per_placement"].sum()),
  "average_fixation_duration": np.floor(x["total_fixation_duration"].sum() / x["fixations"].sum()),
  "average_fixation_duration_per_placement": np.floor(x["client_total_fixation_duration_per_placement"].sum() / x["client_fixations_per_placement"].sum()),
  "average_fixation_duration": np.floor(x["total_fixation_duration"].sum() / x["fixations"].sum()),
  "average_fixation_duration_per_placement": np.floor(x["client_total_fixation_duration_per_placement"].sum() / x["client_fixations_per_placement"].sum()),
  "average_fixation_duration": np.floor(x["total_fixation_duration"].sum() / x["fixations"].sum()),
  "average_fixation_duration_per_placement": np.floor(x["client_total_fixation_duration_per_placement"].sum() / x["client_fixations_

In [48]:
final_group["client_inview"] = np.floor(final_group["client_imps"] * final_group["client_viewability_rate"]).astype("Int64")
#final_group["client_fixations"] = np.floor((final_group["tobii_fixations"] / final_group["tobii_imps"]) * final_group["client_imps"]).astype("Int64")
final_group["client_fix/inview_ratio"] = round(final_group["client_fixations"] / final_group["client_inview"] * 100, 2)
final_group["client_total_fixation_duration"] = final_group["client_fixations"] * final_group["average_fixation_duration"]


def get_sample_size(num_impressions):
    if num_impressions <= 99:
        return "low"
    elif num_impressions >= 100 and num_impressions <= 199:
        return "medium"
    else:
        return "high"

final_group["sample_size"] = final_group["tobii_imps"].apply(lambda x: get_sample_size(x))

In [49]:
final_group

Unnamed: 0,hostname,tobii_imps,client_imps,tobii_fixations,client_fixations_per_placement,tobii_inview,client_viewability_rate,average_fixation_duration,average_fixation_duration_per_placement,total_fixation_duration,client_total_fixation_duration_per_placement,client_fixations,tobii_inview/impression_ratio,tobii_fixation/impression_ratio,client_inview,client_fix/inview_ratio,client_total_fixation_duration,sample_size
0,aftonbladet.se,104073,932634,20291,319605.0,57519,0.712739,3386.0,1055.0,68713529.0,337487661.0,319605,55.27,19.5,664725,48.08,1082182530.0,high
1,allabolag.se,2,45,1,18.0,1,0.755556,0.0,0.0,0.0,0.0,18,50.0,50.0,34,52.94,0.0,low
2,alltommat.expressen.se,1,52,0,0.0,1,0.711538,,,0.0,0.0,0,100.0,0.0,37,0.0,,low
3,alphafoodie.com,1,15,0,0.0,0,0.666667,,,0.0,0.0,0,0.0,0.0,10,0.0,,low
4,applegreencottage.com,1,15,0,0.0,0,0.666667,,,0.0,0.0,0,0.0,0.0,10,0.0,,low
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
167,wholefoodbellies.com,1,3,0,0.0,0,0.666667,,,0.0,0.0,0,0.0,0.0,2,0.0,,low
168,windowscentral.com,3,2,0,0.0,2,1.000000,,,0.0,0.0,0,66.67,0.0,2,0.0,,low
169,wrestlezone.com,8,197,4,98.0,8,0.507614,275.0,137.0,1100.0,13475.0,98,100.0,50.0,100,98.0,26950.0,low
170,wrestlinginc.com,45,402,7,85.0,32,0.592040,0.0,0.0,0.0,0.0,85,71.11,15.56,238,35.71,0.0,low


In [50]:
result_df = final_group[["hostname", "tobii_imps", "client_imps", "tobii_fixations", "client_fixations", "tobii_inview", "client_inview", "client_fix/inview_ratio", "tobii_inview/impression_ratio", "client_viewability_rate", "tobii_fixation/impression_ratio", "average_fixation_duration", "average_fixation_duration_per_placement", "total_fixation_duration", "client_total_fixation_duration", "client_total_fixation_duration_per_placement", "sample_size"]]
result_df
#result_df.to_excel(f"./{client.lower()}_{version.lower()}_tier_{tier}_example_report.xlsx", index=False)

Unnamed: 0,hostname,tobii_imps,client_imps,tobii_fixations,client_fixations,tobii_inview,client_inview,client_fix/inview_ratio,tobii_inview/impression_ratio,client_viewability_rate,tobii_fixation/impression_ratio,average_fixation_duration,average_fixation_duration_per_placement,total_fixation_duration,client_total_fixation_duration,client_total_fixation_duration_per_placement,sample_size
0,aftonbladet.se,104073,932634,20291,319605,57519,664725,48.08,55.27,0.712739,19.5,3386.0,1055.0,68713529.0,1082182530.0,337487661.0,high
1,allabolag.se,2,45,1,18,1,34,52.94,50.0,0.755556,50.0,0.0,0.0,0.0,0.0,0.0,low
2,alltommat.expressen.se,1,52,0,0,1,37,0.0,100.0,0.711538,0.0,,,0.0,,0.0,low
3,alphafoodie.com,1,15,0,0,0,10,0.0,0.0,0.666667,0.0,,,0.0,,0.0,low
4,applegreencottage.com,1,15,0,0,0,10,0.0,0.0,0.666667,0.0,,,0.0,,0.0,low
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
167,wholefoodbellies.com,1,3,0,0,0,2,0.0,0.0,0.666667,0.0,,,0.0,,0.0,low
168,windowscentral.com,3,2,0,0,2,2,0.0,66.67,1.000000,0.0,,,0.0,,0.0,low
169,wrestlezone.com,8,197,4,98,8,100,98.0,100.0,0.507614,50.0,275.0,137.0,1100.0,26950.0,13475.0,low
170,wrestlinginc.com,45,402,7,85,32,238,35.71,71.11,0.592040,15.56,0.0,0.0,0.0,0.0,0.0,low


In [4]:
# Load tiered data
df_1 = pd.read_excel(f"./{client.lower()}_{version.lower()}_tier_1_example_report.xlsx")
df_1["tier"] = 1
df_2 = pd.read_excel(f"./{client.lower()}_{version.lower()}_tier_2_example_report.xlsx")
df_2["tier"] = 2
df_3 = pd.read_excel(f"./{client.lower()}_{version.lower()}_tier_3_example_report.xlsx")
df_3["tier"] = 3
all_df = [df_1, df_2, df_3]
combined_client_df = pd.concat(all_df)
columns = list(df_1.columns)

In [11]:
def get_data_based_on_tier(client_dfs):
    new_df = pd.DataFrame(columns=columns)
    for i, df in enumerate(client_dfs):
        for index, row in df.iterrows():
            if row.tier == 1:
                if row.sample_size == "high":
                    new_df = new_df.append(row)
                elif row.sample_size == "medium":
                    # If sample is tier 1 and almost high, bump it to make it pretty
                    row.sample_size = "high" if row.tobii_imps * 1.1 >= 200 else "medium"
                    new_df = new_df.append(row)
            elif row.tier == 2:
                if new_df[new_df["hostname"] == row.hostname]["hostname"].count() == 0:
                    if row.sample_size == "high":
                        # Degrade sample_size if lower tier
                        row.sample_size = "medium"
                        new_df = new_df.append(row)
                    elif row.sample_size == "medium":
                        new_row = combined_client_df[(combined_client_df["tier"] == 3) & (combined_client_df["hostname"] == row.hostname)]
                        new_row.sample_size = "medium"
                        new_row["client_fixations"] = (
                            np.floor(row["client_fixations"] * 0.25 + new_row["client_fixations"].sum() * 0.75)
                        )
                        new_df = new_df.append(new_row)
                    else:
                        new_row = combined_client_df[(combined_client_df["tier"] == 3) & (combined_client_df["hostname"] == row.hostname)]
                        new_row.sample_size = "medium"
                        new_row["client_fixations"] = (
                            np.floor(row["client_fixations"] * 0.15 + new_row["client_fixations"].sum() * 0.85)
                        )
                        new_df = new_df.append(new_row)
            elif row.tier == 3:
                if new_df[new_df["hostname"] == row.hostname]["hostname"].count() == 0:
                    row["sample_size"] = "low"
                    new_df = new_df.append(row)
    return new_df

final_client_df = get_data_based_on_tier(all_df)

  new_df = new_df.append(row)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_row.sample_size = "medium"
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_row["client_fixations"] = (
  new_df = new_df.append(new_row)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_row.sample_size = "medium"
A value is trying to be set on a copy of a slic

  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df = new_df.append(row)
  new_df =

In [12]:
def do_math(row, arg1, arg2):
    try:
        return round(row[f"{arg1}"] / row[f"{arg2}"] * 100, 2)
    except ZeroDivisionError:
        return 0

final_client_df["client_Inview Ratio"] = final_client_df.apply(lambda x: do_math(x, "client_inview", "client_imps"), axis=1)
final_client_df["client_Fixation/Inview Ratio"] = final_client_df.apply(lambda x: do_math(x, "client_fixations", "client_inview"), axis=1)
final_client_df["tobii_Fixation/Inview Ratio"] = final_client_df.apply(lambda x: do_math(x, "tobii_fixations", "tobii_inview"), axis=1)
final_client_df["client_fixation/impression_ratio"] = final_client_df.apply(lambda x: do_math(x, "client_fixations", "client_imps"), axis=1)
final_client_df["Total Fixation Duration"] = final_client_df["average_fixation_duration_per_placement"] * final_client_df["client_fixations"]
final_client_df = final_client_df[["hostname", "client_imps", "client_fixations", "tobii_fixation/impression_ratio", "client_inview", "client_Inview Ratio", "client_Fixation/Inview Ratio", "tobii_Fixation/Inview Ratio", "tobii_inview/impression_ratio", "Total Fixation Duration", "average_fixation_duration", "average_fixation_duration_per_placement", "sample_size", "tier"]]
final_client_df = final_client_df.rename(
    columns={
        "client_imps": "Impressions", 
        "client_fixations": "Fixations",
        "client_inview": "Inviews",
        "client_Inview Ratio": "Inview Ratio",
        "tobii_Fixation/Inview Ratio": "Fixation/Tobii Inview Ratio",
        "client_Fixation/Inview Ratio": "Fixation/Inview Ratio",
        "average_fixation_duration_per_placement": "Average Fixation Duration",
        "sample_size": "Sample Size"
    }
)
final_client_df

Unnamed: 0,hostname,Impressions,Fixations,tobii_fixation/impression_ratio,Inviews,Inview Ratio,Fixation/Inview Ratio,Fixation/Tobii Inview Ratio,tobii_inview/impression_ratio,Total Fixation Duration,average_fixation_duration,Average Fixation Duration,Sample Size,tier
0,aftonbladet.se,524678,67785,17.77,397854,75.83,17.04,26.52,67.01,74360145.0,2927.0,1097.0,medium,1
10,birthday.se,29,17.0,54.55,25,86.21,68.00,54.55,100.00,1751.0,190.0,103.0,medium,3
11,blocket.se,193334,11562.0,20.43,139242,72.02,8.30,30.85,66.24,2728632.0,1125.0,236.0,medium,3
13,bloons.fandom.com,1161,0.0,0.00,829,71.40,0.00,0.00,33.33,,,,medium,3
16,bt.se,88,8.0,7.92,57,64.77,14.04,18.18,43.56,600.0,396.0,75.0,medium,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
164,whats,2890,0,0.00,1868,64.64,0.00,0.00,0.00,,,,low,3
165,wholefoodbellies.com,7,0,0.00,5,71.43,0.00,0.00,0.00,,,,low,3
167,wrestlezone.com,481,240,50.00,236,49.06,101.69,50.00,100.00,32880.0,275.0,137.0,low,3
168,wrestlinginc.com,728,172,15.56,397,54.53,43.32,21.88,71.11,0.0,0.0,0.0,low,3


In [13]:
final_client_df = final_client_df[["hostname", "Impressions", "Inviews", "Fixations", "Fixation/Inview Ratio", "Sample Size"]]
final_client_df = final_client_df.sort_values(by=["hostname"])

In [14]:
comments = ["Tier 1", "Tier 2", "Tier 3"]

with pd.ExcelWriter(f"./{client}_{version}_results_internal.xlsx") as writer:
    rows = 0
    spaces = 0
    for n, df in enumerate(all_df):
        pd.Series(comments[n]).to_excel(writer, sheet_name=client.title(), index=False, header=False, startrow=rows + spaces)
        df.to_excel(writer, client.title(), index=False, startrow=1 + rows + spaces)
        rows += len(df) + 2
        spaces += 2

with pd.ExcelWriter(f"./{client}_{version}_results.xlsx") as writer:
    for df in [final_client_df]:
        df.to_excel(writer, sheet_name=client.title(), index=False)