---
title: "2023_Kronensicherung_Plesse_Analyse_Kraft"
author: "Kyell Jensen"
date: "2024-08-06"
format: pdf
editor: visual
---

# 2023_Kronensicherung_Plesse_Analyse_Kraft

## Kombinierte Analyse LineScale3, TreeQinetic und Versuchsaufzeichung

Nutze eine geeignete Python 3.11 Umgebung (z. B. virtuelle Environment).

## Arbeitsumgebung vorbereiten

### IMPORT: Packages

In [None]:
# Struktur & Typen
from pathlib import Path
from typing import Dict, List

# Datenverarbeitung
import json
import numpy as np
import pandas as pd
from pandas.api.types import CategoricalDtype
from slugify import slugify  # Zum Vereinheitlichen von Strings

# Plotting
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import Markdown, display

# Statistik
from scipy.stats import linregress, f_oneway
import statsmodels.api as sm
import statsmodels.formula.api as smf
import statsmodels.stats.multicomp as mc

In [None]:
# Eigene Module und Funktionen
from kj_core.utils.latex_export import (
    generate_latex_table,
    generate_grouped_latex_tables,
    save_latex_table,
    build_data_dict_df
)
from kj_core.utils.labeling import (
    get_label_from_dict,
    get_color_dict
)
from kj_core import (
    CoreConfig,
    PlotManager,
    get_logger
)

# Projekteinstellungen
from project_config import (
    working_directory,
    data_export_directory,
    latex_export_directory,
    filename_clean_dataset,
    filename_clean_data_dict
)

### IMPORT: Manager Instanzen

In [None]:
logger = get_logger(__name__)
CONFIG = CoreConfig(working_directory=f"{working_directory}/combined")
PLOT_MANAGER = PlotManager(CONFIG)

## IMPORT: Daten Import

In [None]:
# Dateien laden
df = pd.read_feather(data_export_directory / filename_clean_dataset)

with open(data_export_directory / filename_clean_data_dict, "r", encoding="utf-8") as f:
    data_dict = json.load(f)

## ANALYSE: Explorative Datenanalyse

In [None]:
df.head(10)

In [None]:
df.columns

### COMBINED: Definition von Darstellungsstandards
Festlegen von Farbcodes für einheitliche Darstellung von Sensoren und Behandlungsvarianten für alle nachfolgenden Plots.

In [None]:
color_palette = PLOT_MANAGER.color_palette_list

# Für die Spalte "treatment":
treatment_color_dict = get_color_dict(df, "treatment", PLOT_MANAGER.color_palette_list)

### LS3 ANALYSE: Zusammenhang Vorspannung und Lastspitze KS

In [None]:
# Funktion zur Erstellung des Text-Strings für die statistischen Parameter
def annotate_stats(x, y):
    slope, intercept, r_value, p_value, std_err = linregress(x, y)
    text_str = f"R = {r_value:.4f}\nSlope = {slope:.4f}\nIntercept = {intercept:.2f}\np-value = {p_value:.2e}\nStd Err = {std_err:.2f}"
    return text_str

In [None]:
df_grouped_by_id = df[['id', 'treatment', 'release_force_target', 'rope_release', 'cable_max']]
df_grouped_by_id = df_grouped_by_id.groupby(['id'], observed=False).first().reset_index()
df_grouped_by_id

In [None]:
data_df = df_grouped_by_id
x_var = 'release_force_target'
y_var = 'rope_release'

fig, ax1 = plt.subplots(figsize=(6, 3))
sns.regplot(data=data_df, x=x_var, y=y_var, ax=ax1, ci=95)
ax2 = ax1.twinx()
ax2.axis('off')
stats_text = annotate_stats(data_df[x_var], data_df[y_var])
ax2.annotate(stats_text, xy=(1.01, 0.1), xycoords='axes fraction')
#ax1.set_title(f'Regression zwischen {get_label_from_dict(x_var, data_dict)} und {get_label_from_dict(y_var, data_dict)}')
ax1.set_xlabel(get_label_from_dict(x_var, data_dict, use_full=True))
ax1.set_ylabel(get_label_from_dict(y_var, data_dict, use_full=True))
plt.tight_layout()
plt.show()
PLOT_MANAGER.save_plot(fig, filename=f"{x_var}_vs_{y_var}", subdir="ls3_regression")

In [None]:
data_df = df_grouped_by_id
group_by = 'treatment'
x_var = 'release_force_target'
y_var = 'rope_release'

fig, ax1 = plt.subplots(figsize=(6, 4))
ax2 = ax1.twinx()
ax2.axis('off')
y_pos_init = 0.1

for idx, (group, color) in enumerate(treatment_color_dict.items()):
    group_df = df_grouped_by_id[df_grouped_by_id[group_by] == group]
    # Sicherstellen, dass in beiden Variablen Werte vorhanden sind
    if group_df[x_var].isna().all() or group_df[y_var].isna().all():
        continue
    sns.regplot(x=x_var, y=y_var, data=group_df, ax=ax1, color=color, label=group, ci=95)
    stats_text = annotate_stats(group_df[x_var], group_df[y_var])
    ax2.annotate(f"{group}:\n{stats_text}", xy=(1.01, y_pos_init + ((idx-0.3) * 0.35)), xycoords='axes fraction')
    
#ax1.set_title(f'Regression zwischen {get_label_from_dict(x_var, data_dict)} und {get_label_from_dict(y_var, data_dict)}')
ax1.set_xlabel(get_label_from_dict(x_var, data_dict, use_full=True))
ax1.set_ylabel(get_label_from_dict(y_var, data_dict, use_full=True))
ax1.legend(title=get_label_from_dict(group_by, data_dict), loc='upper left')
plt.tight_layout()
plt.show()
PLOT_MANAGER.save_plot(fig, filename=f"{x_var}_vs_{y_var}_grouped_by_{group_by}", subdir="ls3_regression")


In [None]:
data_df = df_grouped_by_id
group_by = 'treatment'
x_var = 'rope_release'
y_var = 'cable_max'

fig, ax1 = plt.subplots(figsize=(6, 4))
ax2 = ax1.twinx()
ax2.axis('off')
y_pos_init = 0.1
for idx, (group, color) in enumerate(treatment_color_dict.items()):
    group_df = df_grouped_by_id[df_grouped_by_id[group_by] == group]
    if group_df[y_var].isna().all():
        continue
    sns.regplot(x=x_var, y=y_var, data=group_df, ax=ax1, color=color, label=group, ci=95)
    stats_text = annotate_stats(group_df[x_var], group_df[y_var])
    ax2.annotate(f"{group}:\n{stats_text}", xy=(1.01, y_pos_init + ((idx - 1) * 0.4)), xycoords='axes fraction')
#ax1.set_title(f'Regression zwischen {get_label_from_dict(x_var, data_dict)} und {get_label_from_dict(y_var, data_dict)}')
ax1.set_xlabel(get_label_from_dict(x_var, data_dict, use_full=True))
ax1.set_ylabel(get_label_from_dict(y_var, data_dict, use_full=True))
ax1.legend(title=get_label_from_dict(group_by, data_dict), loc='upper left')
plt.tight_layout()
plt.show()
PLOT_MANAGER.save_plot(fig, filename=f"{x_var}_vs_{y_var}", subdir="ls3_regression")

### LS3 ANALYSE: Lastspitze in der KS

Analyse der Spitzenlasten in der KS gruppiert nach Ziel-Vorspannung und Treatment.

In [None]:
# Zuerst filtern wir alle Zeilen heraus, bei denen 'treatment' den Wert "free" hat.
df_plot = df_grouped_by_id[df_grouped_by_id['treatment'] != "free"].copy()

# Optional: Entferne die leere Kategorie "free" aus der ordered Kategorie, falls sie noch vorhanden ist.
df_plot['treatment'] = df_plot['treatment'].cat.remove_categories(["free"])

fig, ax = plt.subplots(figsize=(6, 4))
sns.boxplot(data=df_plot, 
            x='release_force_target', 
            y='cable_max', 
            hue='treatment',
            palette=treatment_color_dict,
            ax=ax)

# Stripplot (Einzeldaten)
sns.stripplot(data=df_plot,
              x='release_force_target',
              y='cable_max',
              hue='treatment',
              dodge=True,
              color='black',   # etwas abgesetzte Farbe
              alpha=1,       # halbe Transparenz
              ax=ax,
              legend=False)


# Setzt den Titel und die Achsenbeschriftungen entsprechend der Beschriftungsfunktion aus data_dict
#ax.set_title(f"{get_label_from_dict('cable_max', data_dict)} gruppiert nach {get_label_from_dict('treatment', data_dict)} und {get_label_from_dict('release_force_target', data_dict)}")
ax.set_xlabel("Behandlungsvariante")
ax.set_ylabel(get_label_from_dict('cable_max', data_dict, use_axes=True))
#ax.set_ylim(0, 3.2)
plt.tight_layout()
ax.legend(title="Behandlung")
plt.show()

PLOT_MANAGER.save_plot(fig, filename="cable_max_boxplot", subdir="ls3_boxplot")


In [None]:
# Daten filtern
filtered_df = df.query('treatment in ["gefa_dynamic", "cobra_static"]')[["release_force_target", "treatment", "cable_max"]]

# Gruppieren, Aggregieren und Pivotieren
pivoted = (
    filtered_df
    .groupby(['release_force_target', 'treatment'], observed=True)['cable_max']
    .agg(['min', 'mean', 'max'])
    .unstack(level=0)
    .swaplevel(axis=1)
    .sort_index(axis=1, level=[0, 1], ascending=[True, False])
)

# Spaltennamen formatieren (Index Level 0: zwei Nachkommastellen)
pivoted.columns = pd.MultiIndex.from_tuples(
    [(f"{lvl0:.2f}", lvl1) if isinstance(lvl0, float) else (lvl0, lvl1)
     for lvl0, lvl1 in pivoted.columns],
    names=pivoted.columns.names
)
pivoted

In [None]:
# Achsenbeschriftungen setzen
pivoted.columns.set_names(
    [get_label_from_dict("release_force_target", data_dict, template="{Zeichen}"), None],
    inplace=True
)
pivoted.index.set_names(
    get_label_from_dict("treatment", data_dict),
    inplace=True
)

In [None]:
latex_string = pivoted.to_latex(index=True, escape=True, float_format="{:0.2f}".format, column_format="l|rrr|rrr|rrr", multicolumn=True,
    multicolumn_format="c")

# Beschriftung erstellen
caption = "Feldversuch 2 - Ergebnisse, Kraftspitze in der KS"
caption_long = (
    f"Feldversuch 2 - {get_label_from_dict('cable_max', data_dict, use_full=True)} "
    f"gruppiert über {get_label_from_dict('release_force_target', data_dict, use_full=True)} und "
    f"{get_label_from_dict('treatment', data_dict)}. "
    f"Angegeben ist jeweils pro Gruppe das Minimum, der Mittelwert und das Maximum. "
    f"Die Variante 'free' ist nicht aufgeführt, da hier keine KS eingesetzt wurde."
)

# LaTeX-Tabelle speichern
save_latex_table(latex_string, caption, latex_export_directory, caption_long)