In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

pd.__version__

# comparison of Clarkes measurements with my measurement strategy

## Define helper functions

In [None]:
def nano_to_ms(x):
    return x/1000/1000

def ms_to_nano(x):
    return x*1000*1000

def nano_to_micro(x):
    return x/1000


def label_string(s: str):
    return f"{s[:5]}...{s[-5:]}"

def get_suffix(s: str):
    return s.split("_")[-1] #.rstrip("0")

def filter(df, *patterns):
    condition = df["name"].str.contains("|".join(patterns))
    return df[condition]



# outlier removal 

range = 0.01
def remove_outliers_iqr(group):
    q1 = group["time"].quantile(range)
    q3 = group["time"].quantile(1-range)
    iqr = q3 - q1
    lower_bound = q1 - 3 * iqr
    upper_bound = q3 + 3 * iqr
    return group[(group["time"] >= lower_bound) & (group["time"] <= upper_bound)]

# plotting

def boxplot(df, name):
    sns.set(style="whitegrid")
    plt.figure(figsize=(5, 5))
    sns.boxplot(x="name", y="time", data=df)
    #plt.title(name)
    plt.xlabel("Name des Tests")
    plt.ylabel("Zeit in µs")
    plt.xticks(rotation=45)
    plt.show()

def filter(df, *patterns):
    condition = df["name"].str.contains("|".join(patterns))
    return df[condition]

def kdeplot(df, bw_adjust: int):
    g = sns.FacetGrid(df, col="name", col_wrap=6, sharex=False, sharey=False)
    g.map(sns.kdeplot, "time", color="b", fill=True, bw_adjust=bw_adjust)
    g.set_titles("{col_name}")
    g.set_xlabels("Time in µs")
    g.set_ylabels("Density")
    
    # find local maxima, highlight them and color the graph red
    for ax in g.axes.flat:

         # Calculate the mean of the data for this facet
        facet_data = df[df["name"] == ax.get_title()].time

        # Add a vertical line at the mean
        ax.axvline(facet_data.mean(), color="red", linestyle="--", label="Mittelwert")
        ax.axvline(facet_data.median(), color="orange", linestyle="--", label="Median")
    plt.legend()
    plt.show()

def lineplot_group(df, col_wrap, height):
    g = sns.FacetGrid(df, col="name", col_wrap=col_wrap, height=height, sharex=False, sharey=False)
    g.map(sns.lineplot, "index", "time")
    g.set_axis_labels("Index of Time Entry", "Time in µs")
    g.set_titles("Line Plot for {col_name}")
    g.tight_layout()
    plt.show()




## load raw measurements

In [None]:
experiments = map(
lambda name: pd.read_csv(name),
[
    #"android/logs/csv/2023-08-30_measuring_with_baseline_2_rename.csv", # A2
    "android/logs/csv/2023-08-30_measuring_with_baseline copy.csv", # A
    "ios/logs/csv/2023-08-30_measuring_with_baseline copy.csv", # i 
])

df = pd.concat(experiments, ignore_index=True)
df.time = df.time.map(nano_to_micro)
df.name = df.name.map(get_suffix)
df.sort_values("name")

pd.set_option("display.float_format", lambda x: "%.3f" % x)
df.groupby(["name"]).describe()

## copy data and remove outliers

In [None]:
cleaned_df = df.groupby("name").apply(remove_outliers_iqr).reset_index(drop=True)
cleaned_df.groupby("name").describe()

## visual comparison

In [None]:
lineplot_group(cleaned_df.reset_index(), 2, 10)
boxplot(filter(df, "\[i\]").sort_values("name"), "Messungen von Clarkes Tests")
boxplot(filter(cleaned_df, "\[i\]"), "Messungen von Clarkes Tests Korrektur der Ausreißer")

## comparison of individual results

In [None]:
def displot(df):
    g = sns.FacetGrid(df, col="name", col_wrap=3, sharex=False, sharey=False)
    g.map(sns.histplot, "time", kde=True)
    g.set_titles("{col_name}")
    g.set_xlabels("Time")
    g.set_ylabels("Density")
    plt.show()

#displot(cleaned_df.sort_values("name")) 


## plot medians of _all_ experiments in a line

In [None]:
mean_df = pd.read_csv("clarkeCompManualMean.csv")





In [None]:


plt.figure(figsize=(10, 6))
sns.set(style="whitegrid")
sns.lineplot(x="category", y="mean", hue="platform", data=mean_df, marker="o")

#plt.title("Veränderung der Messergebnisse durch Änderungen am Messverfahren")
plt.xlabel("Änderungen am Messverfahren")
plt.ylabel("Zeit in µs")
plt.grid(True)
#plt.axhline(y=16_000, color="red", linestyle="--", label=f"16ms für neue Frames (Android)")
#plt.axhline(y=5_000, color="orange", linestyle="--", label=f"5ms für neue Frames (iOS)")
plt.legend(title="Plattform")


plt.show()

mean_df

