In [12]:
import numpy as np
import pandas as pd
import utilities
from pathlib import Path

In [6]:
# load data
performance_extension_path = Path(Path.cwd().parent, 'performance/performance_EXTENSION.json')
performance_extension = utilities.read_json(performance_extension_path)

performance_no_extension_path = Path(Path.cwd().parent, 'performance/performance_NO_EXTENSION.json')
performance_no_extension = utilities.read_json(performance_no_extension_path)

In [9]:
# find sites cralwed in both configs
extension_sites = set()
no_extension_sites = set()

for record in performance_extension:
    if record["dom_content_loaded"] > 0 and record["dom_interactive"] > 0 and record["load_event_time"] > 0:
        extension_sites.add(record["url"])

for record in performance_no_extension:
    if record["dom_content_loaded"] > 0 and record["dom_interactive"] > 0 and record["load_event_time"] > 0:
        no_extension_sites.add(record["url"])

In [10]:
# list the numbers
extension_records = {'dom_content_loaded': [], 'dom_interactive': [], 'load_event_time': []}
no_extension_records = {'dom_content_loaded': [], 'dom_interactive': [], 'load_event_time': []}

for record in performance_extension:
    if record["url"] in extension_sites and record["url"] in no_extension_sites:
        extension_records['dom_content_loaded'].append(record['dom_content_loaded'])
        extension_records['dom_interactive'].append(record['dom_interactive'])
        extension_records['load_event_time'].append(record['load_event_time'])


for record in performance_no_extension:
    if record["url"] in extension_sites and record["url"] in no_extension_sites:
        no_extension_records['dom_content_loaded'].append(record['dom_content_loaded'])
        no_extension_records['dom_interactive'].append(record['dom_interactive'])
        no_extension_records['load_event_time'].append(record['load_event_time'])

In [11]:
print("Extension")
print("==========================")
print("dom_content_loaded: ")
print("mean: ", np.mean(extension_records['dom_content_loaded']), 
      "median: ", np.median(extension_records['dom_content_loaded']))

print("dom_interactive: ")
print("mean: ", np.mean(extension_records['dom_interactive']), 
      "median: ", np.median(extension_records['dom_interactive']))

print("load_event_time: ")
print("mean: ", np.mean(extension_records['load_event_time']), 
      "median: ", np.median(extension_records['load_event_time']))


print("NO Extension")
print("==========================")
print("dom_content_loaded: ")
print("mean: ", np.mean(no_extension_records['dom_content_loaded']), 
      "median: ", np.median(no_extension_records['dom_content_loaded']))

print("dom_interactive: ")
print("mean: ", np.mean(no_extension_records['dom_interactive']), 
      "median: ", np.median(no_extension_records['dom_interactive']))

print("load_event_time: ")
print("mean: ", np.mean(no_extension_records['load_event_time']), 
      "median: ", np.median(no_extension_records['load_event_time']))

Extension
dom_content_loaded: 
mean:  1895.568701823076 median:  1020.0
dom_interactive: 
mean:  1702.153187324116 median:  911.0
load_event_time: 
mean:  3635.015416615686 median:  2136.0
NO Extension
dom_content_loaded: 
mean:  1659.3808242631771 median:  964.0
dom_interactive: 
mean:  1463.8422404304756 median:  842.0
load_event_time: 
mean:  3196.9717500305737 median:  2008.0


In [20]:
# filter out outliers
def iqr(data_list):
    data = pd.Series(data_list)
   

    # Calculate Q1 and Q3
    Q1 = data.quantile(0.25)
    Q3 = data.quantile(0.75)
    IQR = Q3 - Q1

    # Define limits for outlier
    lower_limit = Q1 - 1.5 * IQR
    upper_limit = Q3 + 1.5 * IQR

    # Filter out outliers
    filtered_data = data[(data >= lower_limit) & (data <= upper_limit)]
    print(len(filtered_data))
    # Calculate mean and median
    mean = filtered_data.mean()
    median = filtered_data.median()

#     print("Filtered Data:", filtered_data.tolist())
    print("Mean:", mean)
    print("Median:", median)

In [21]:
# filter out outliers
# extension data
iqr(extension_records['dom_content_loaded'])
iqr(extension_records['dom_interactive'])
iqr(extension_records['load_event_time'])

7324
Mean: 1239.2841343528128
Median: 894.0
7317
Mean: 1085.0959409594095
Median: 792.0
7684
Mean: 2932.3092139510672
Median: 1960.5


In [22]:
# NO extension data
iqr(no_extension_records['dom_content_loaded'])
iqr(no_extension_records['dom_interactive'])
iqr(no_extension_records['load_event_time'])

7483
Mean: 1179.9016437257785
Median: 861.0
7501
Mean: 1031.394214104786
Median: 752.0
7862
Mean: 2746.147545153905
Median: 1902.0
