In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scipy
from scipy.stats import pearsonr

# heavy load test analysis

## Define helper functions

In [None]:
def nano_to_ms(x):
    return x/1000/1000

def ms_to_nano(x):
    return x*1000*1000

def nano_to_micro(x):
    return x/1000


def label_string(s: str):
    return f"{s[:5]}...{s[-5:]}"

def get_suffix(s: str):
    return s.split("_")[-1] #.rstrip("0")

def filter(df, *patterns):
    condition = df["name"].str.contains("|".join(patterns))
    return df[condition]



# outlier removal 

range = 0.01
def remove_outliers_iqr(group):
    q1 = group["time"].quantile(range)
    q3 = group["time"].quantile(1-range)
    iqr = q3 - q1
    lower_bound = q1 - 3 * iqr
    upper_bound = q3 + 3 * iqr
    return group[(group["time"] >= lower_bound) & (group["time"] <= upper_bound)]

# plotting

def lineplot_group(df, col_wrap, height):
    g = sns.FacetGrid(df, col="name", col_wrap=col_wrap, height=height, sharex=False, sharey=False)
    g.map(sns.lineplot, "index", "time")
    g.set_axis_labels("Index of Time Entry", "Time in µs")
    g.set_titles("Line Plot for {col_name}")
    g.tight_layout()
    plt.show()




## load raw measurements

In [None]:
experiments = map(
lambda name: pd.read_csv(name),
[
    #"android/logs/csv/2023-11-03_heavyLoadStringFinal3.csv",
    "android/logs/csv/2023-11-17_heavyLoadStringBinary2.csv",
    "android/logs/csv/2023-11-03_heavyLoadHMFinal.csv",
    #"ios/logs/csv/2023-11-03_heavyLoadStringFinal.csv",
    "ios/logs/csv/2023-11-17_heavyLoadStringWithBinary2.csv",
    "ios/logs/csv/2023-11-03_heavyLoadDictFinal.csv",
])

df = pd.concat(experiments, ignore_index=True)
df.time = df.time.map(nano_to_micro)
df.name = df.name.map(get_suffix)
df.sort_values("name")
df

df.style.format({
    "time": "{:,d}",
})

pd.set_option("display.float_format", lambda x: "%.3f" % x)
import locale
locale.setlocale(locale.LC_NUMERIC, "de_DE.UTF-8")
filter(df, "T3", "T4").groupby("name").describe()

## copy data and remove outliers

In [None]:
cleaned_df = df.groupby("name").apply(remove_outliers_iqr).reset_index(drop=True)
#cleaned_df.groupby("name").describe()
filter(cleaned_df, "T3", "T4").groupby("name").describe()

## visual comparison

In [None]:
lineplot_group(cleaned_df.reset_index(), 2, 10)



In [None]:
mean_df = cleaned_df.groupby("name")["time"].mean().reset_index().rename(columns={"time": "mean"})

mean_df["group"] = mean_df["name"].str.extract(r"(T\d+[a-zA-Z])")
mean_df["complexity_index"] = mean_df.groupby("group").cumcount()
mean_df["complexity"] = mean_df["name"].str.extract(r"T\d+[a-zA-Z](\d+)").astype(int)

group_to_display_name = {"T1a": "Android - StandardMessageCodec", "T1b": "Android - StringCodec", "T1c": "Android - BinaryCodec", "T1d": "Android - BinaryCodecDirect", "T2a": "iOS - StandardMessageCodec", "T2b": "iOS - StringCodec", "T2c": "iOS - BinaryCodec", "T3x": "Android - HashMap", "T4x": "iOS - Dictionary"}
mean_df["display_name"] = mean_df["group"].map(group_to_display_name)

mean_df

In [None]:
palette = sns.color_palette()

def lineplot(df, show_hlines: bool):
    sns.set(style="whitegrid")
    fig, ax = plt.subplots(figsize=(5, 5))
 
    i = 0
    for _, group in df.groupby("display_name"):
        sns.regplot(x="complexity", y="mean", data=group.reset_index(), ax=ax, scatter=False, color=palette[i])
        ax.lines[i].set_linestyle("dotted")
        i+=1

    sns.lineplot(x="complexity", y="mean", hue="display_name", data=df, marker="o", ax=ax)
    
    #plt.title("Zeit vs. Komplexität der Tests")
    plt.xlabel("Komplexität")
    plt.ylabel("Zeit in µs")
    plt.grid(True)
    if show_hlines:
        plt.axhline(y=16_000, color="magenta", linestyle="--", label=f"16ms für neue Frames (Android)")
        plt.axhline(y=5_000, color="teal", linestyle="--", label=f"5ms für neue Frames (iOS)")
    plt.legend(title="Name")

    plt.show()



lineplot((filter(mean_df, "T1").sort_values("display_name")), False)
lineplot((filter(mean_df, "T2").sort_values("display_name")), False)
lineplot((filter(mean_df, "T3", "T4")), True)



In [None]:
correlation_coefficients = mean_df.groupby("group").apply(lambda group: pearsonr(group["mean"], group["complexity"])[0])

print(correlation_coefficients)