In [2]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.io as pio
pio.renderers.default = "vscode"
px.defaults.template = "plotly"
import plotly.graph_objects as go

In [3]:
df = pd.read_csv("data.csv")

In [4]:
# columns odpovidaji top cca 20 z radom forest modelu + talk_mother pridan jako protipol talk_father
list_columns = [
    "SWEETS", 
    "TOOTH_BRUSHING", 
    "BREAKFAST_WEEKDAYS", 
    "BREAKFAST_WEEKEND", 
    "SOFT_DRINKS", 
    "PHYS_ACT_60", 
    "NERVOUS", 
    "FRUITS", 
    "VEGETABLES", 
    "TALK_FATHER",
    "TALK_MOTHER", 
    "FRIEND_TALK", 
    "SLEEP_DIF", 
    "TIME_EXE", 
    "FIGHT_YEAR", 
    "HEADACHE", 
    "BUL_BEEN",
    "FAMILY_MEALS_TOGETHER"
]


df.loc[df["BUL_BEEN"] == 999, "BUL_BEEN"] = np.nan
   
dict_scales = {
    # Symptomy (1=bad → 5=good)
    "HEADACHE": 5,
    "NERVOUS": 5,
    "SLEEP_DIF": 5,
    "FEEL_LOW": 5,
    "STOMACHACHE": 5,
    "DIZZY": 5,
    # Komunikace s rodiči (1=good → 5=bad)
    "TALK_FATHER": 5,
    "TALK_MOTHER": 5,
    "FAMILY_MEALS_TOGETHER": 5,
    # Životní návyky (1=good → max=bad)
    "TIME_EXE": 7,
    "TOOTH_BRUSHING": 5,
    "HEALTH": 4,
    "LIKE_SCHOOL": 4,
    "STUD_TOGETHER": 5,
    # Strava & životní styl
    "FRUITS": 7,
    "VEGETABLES": 7,
    "FRIEND_TALK": 7,
    "BREAKFAST_WEEKDAYS": 5,
    "BREAKFAST_WEEKEND": 3,
    "PHYS_ACT_60": 7,
    "LIFESAT": 10,
    # Rizikové chování
    "SWEETS": 7,
    "SOFT_DRINKS": 7,
    "DRUNK_30": 5,
    "BUL_BEEN": 5,
    "BUL_OTHERS": 5,
    "FIGHT_YEAR": 5,
    "INJURED_YEAR": 5,
    "COMPUTER_NO": 4,
    "THINK_BODY": 5,
    "SCHOOL_PRESSURE": 4
}

# ty factors kde vyssi hodnota = zdravejsi (5 nikdy headache, 6 snidane kazdy den, 7 hodne sportuje -> reverse aby max nejhorsi/nejmene zdravy)
reverse_scales = {
    "HEADACHE",
    "NERVOUS",
    "SLEEP_DIF",
    "DIZZY",
    "FEEL_LOW",
    "STOMACHACHE",
    "BREAKFAST_WEEKDAYS",
    "BREAKFAST_WEEKEND",
    "FRIEND_TALK",
    "FRUITS",
    "LIFESAT",
    "PHYS_ACT_60",
    "VEGETABLES"
}


# Normalizace 0-1:
    # - výsledek 0 (min) = nejlepší (zdravější)
    # - výsledek 1 (max) = nejhorší (rizikovost)

def make_df_2018_norm(df_in):
    # Vrátí DF jen pro rok 2018 + normalizované faktory 0-1,
    # kde 0 ~ nejzdravější, 1 ~ nejrizikovější.
    df_2018 = df_in[df_in["YEAR"] == 2018].copy()
    for col in list_columns:
        df_2018[col] = df_2018[col] / dict_scales[col]
        if col in reverse_scales:
            df_2018[col] = 1 - df_2018[col]
    return df_2018

df_2018_norm = make_df_2018_norm(df)

### GRAPH 1
### Overweight in Time: Boys vs Girls
Story: Nadvaha roste, je to problem, kluci jsou na tom hur nez holky.


In [4]:
### Graph1 
### OVERVIEW OVERWEIGHT IN TIME Boys vs Girls
# Nejdriv zafiltrovat sloupce - az pak dolu
df_trend = df.groupby(["YEAR", "SEX"], as_index=False)["OVERWEIGHT"].mean()

# Zadani df, columns (pripadne uz zafiltrovane) na osy, nazev grafu

df_trend['SEX'] = df_trend['SEX'].map({2: 'Girls', 1: 'Boys'})
colors = {'Girls': "#eb8fbd",'Boys': "#3b8ee1"}

fig1 = px.line(df_trend, y="OVERWEIGHT", x="YEAR", title="Overweight in Time", color="SEX", color_discrete_map=colors)
fig1.update_yaxes(range=[0, 0.5])
fig1.update_xaxes(tickvals=[2002, 2006, 2010, 2014, 2018])
fig1.update_traces(fill="tozeroy")

fig1.update_layout(
    xaxis_title="Year",
    yaxis_title="Overweight",
    legend_title="Gender",
   )


fig1.show()



### GRAPH 2
### TOP Gender behaviour differences of overweight children
Story: V jakych factors/types of behaviour se lisi kluci s nadvahou od holek s nadvahou, top chovani ktera jsou nejzajimavejsi/maji nej dopad. 

*How features selected: feature engineering/random forest, regrese a R-squared, kontrolni grafy treba v Tableau


In [6]:
# korelace faktorů s OVERWEIGHT (2018, normované df_2018_norm)
corr_series = (
    df_2018_norm[list_columns + ["OVERWEIGHT"]]
    .corr()["OVERWEIGHT"]
    .drop("OVERWEIGHT")
)

corr_abs = corr_series.abs()

top5_corr = corr_abs.sort_values(ascending=False).head(5).index.tolist()
print("TOP 5 podle |korelace s OW|:", top5_corr)


TOP 5 podle |korelace s OW|: ['TOOTH_BRUSHING', 'SWEETS', 'BREAKFAST_WEEKDAYS', 'FIGHT_YEAR', 'PHYS_ACT_60']


In [7]:
df_ow_2018 = df_2018_norm[df_2018_norm["OVERWEIGHT"] == 1].copy()

sex_means = (
    df_ow_2018
    .groupby("SEX", as_index=False)[top5_corr]
    .mean()
)

sex_means_long = sex_means.melt(
    id_vars=["SEX"],
    value_vars=top5_corr,
    var_name="FACTOR",
    value_name="VALUE"
)

sex_means_long["SEX_STRING"] = sex_means_long["SEX"].map({1: "Boys", 2: "Girls"})

colors = {"Boys": "#3b8ee1", "Girls": "#eb8fbd"}

# pořadí faktorů podle velikosti korelace (nejvyšší nahoře)
factor_order_top5 = (
    corr_abs.loc[top5_corr]
    .sort_values(ascending=False)
    .index
    .tolist()
)

fig2 = px.bar(
    sex_means_long,
    x="VALUE",
    y="FACTOR",
    color="SEX_STRING",
    orientation="h",
    barmode="group",
    category_orders={"FACTOR": factor_order_top5},
    color_discrete_map=colors,
    title="Top 5 faktorů podle korelace s overweight – Boys vs Girls (OW only, 2018)"
)

fig2.update_layout(
    xaxis_title="Average (normalized 0–1, higher = worse)",
    yaxis_title="Factor",
    legend_title="Gender"
)

fig2.show()


In [18]:
# ------------------------------------------------------------
# GRAPH 2 – Top 5 faktorů (podle korelace), rozdíl Boys vs Girls (OW only, 2018)
# ------------------------------------------------------------

if df_2018_norm.empty:
    fig2 = None
else:
    # jen děti s overweight v roce 2018
    df_ow_2018 = df_2018_norm[df_2018_norm["OVERWEIGHT"] == 1].copy()

    if df_ow_2018.empty:
        fig2 = None
    else:
        # průměry TOP 5 faktorů podle pohlaví
        sex_means = (
            df_ow_2018
            .groupby("SEX", as_index=False, observed=True)[top5_corr]
            .mean()
        )

        # long form pro plotly
        sex_means_long = sex_means.melt(
            id_vars=["SEX"],
            value_vars=top5_corr,
            var_name="FACTOR",
            value_name="VALUE"
        )

        sex_means_long["SEX_STRING"] = sex_means_long["SEX"].map({1: "Boys", 2: "Girls"})

        # tabulka pro výpočet gender gapu (Girls - Boys)
        gap_table = (
            sex_means_long
            .pivot_table(
                index="FACTOR",
                columns="SEX_STRING",
                values="VALUE"
            )
        )

        gap_table["GIRLS_MINUS_BOYS"] = gap_table["Girls"] - gap_table["Boys"]
        gap_table["ABS_GAP"] = gap_table["GIRLS_MINUS_BOYS"].abs()

        # pořadí faktorů podle velikosti rozdílu (největší gap nahoře)
        factor_order_top5 = (
            gap_table
            .sort_values("ABS_GAP", ascending=False)
            .index
            .tolist()
        )

        colors = {"Boys": "#3b8ee1", "Girls": "#eb8fbd"}

        fig2 = px.bar(
            sex_means_long,
            x="VALUE",
            y="FACTOR",
            color="SEX_STRING",
            orientation="h",
            barmode="group",
            category_orders={"FACTOR": factor_order_top5},
            color_discrete_map=colors,
            title="Top 5 faktorů podle korelace s overweight – Boys vs Girls (OW only, 2018)"
        )

        fig2.update_layout(
            xaxis_title="Průměr (normalized 0–1, vyšší = horší)",
            yaxis_title="Faktor",
            legend_title="Pohlaví"
        )
    fig2.show()


In [8]:
# -------------------------------------------------
# 1) Jen OW děti 2018
# -------------------------------------------------
df_ow_2018 = df_2018_norm[df_2018_norm["OVERWEIGHT"] == 1].copy()

# průměry podle pohlaví pro všechny faktory
boys_means  = df_ow_2018[df_ow_2018["SEX"] == 1][list_columns].mean()
girls_means = df_ow_2018[df_ow_2018["SEX"] == 2][list_columns].mean()

# -------------------------------------------------
# 2) TOP 5 podle |korelace| – ať jsou hezky seřazené
# -------------------------------------------------
factor_order = (
    corr_series.loc[top5_corr]
    .abs()
    .sort_values(ascending=False)
    .index
    .tolist()
)

# připravit tabulku pro plot
plot_rows = []
for f in factor_order:
    b = boys_means[f]
    g = girls_means[f]
    gap = g - b
    r   = corr_series[f]
    plot_rows.append({"FACTOR": f, "SEX": "Boys",  "VALUE": b, "GAP": gap, "R": r})
    plot_rows.append({"FACTOR": f, "SEX": "Girls", "VALUE": g, "GAP": gap, "R": r})

df_plot = pd.DataFrame(plot_rows)

# aby se to kreslilo ve správném pořadí
df_plot["FACTOR"] = pd.Categorical(df_plot["FACTOR"], categories=factor_order, ordered=True)

# -------------------------------------------------
# 3) Vytvoření grouped horizontal bar chartu
# -------------------------------------------------
colors = {"Boys": "#3b8ee1", "Girls": "#eb8fbd"}

fig2 = go.Figure()

# bary pro Boys
df_boys = df_plot[df_plot["SEX"] == "Boys"]
fig2.add_bar(
    x=df_boys["VALUE"],
    y=df_boys["FACTOR"],
    name="Boys",
    orientation="h",
    marker=dict(color=colors["Boys"]),
)

# bary pro Girls
df_girls = df_plot[df_plot["SEX"] == "Girls"]
fig2.add_bar(
    x=df_girls["VALUE"],
    y=df_girls["FACTOR"],
    name="Girls",
    orientation="h",
    marker=dict(color=colors["Girls"]),
)

fig2.update_layout(
    barmode="group",
    title="Top 5 faktorů podle korelace s overweight – Boys vs Girls (děti s OW, 2018)",
    xaxis_title="Průměr (normalized 0–1, higher = worse)",
    yaxis_title="Factor",
    legend_title="Pohlaví",
    template="simple_white",
    margin=dict(l=160, r=160, t=80, b=60),
)

# vertikální nula (jen vizuální kotva)
fig2.update_xaxes(
    range=[0, max(df_plot["VALUE"]) * 1.15],
    showgrid=True,
    gridcolor="lightgrey",
    zeroline=False,
)

# -------------------------------------------------
# 4) Anotace: rozdíl Girls−Boys + korelace r
# -------------------------------------------------
# vezmeme jen 1 řádek na faktor (vzít třeba Girls)
df_annot = (
    df_plot
    .groupby("FACTOR")
    .agg({
        "GAP": "first",   # Girls − Boys
        "R": "first",
        "VALUE": "max"    # maximální bar pro posun textu
    })
    .reset_index()
)

for _, row in df_annot.iterrows():
    f = row["FACTOR"]
    gap = row["GAP"]
    r   = row["R"]
    xmax = row["VALUE"]

    # text gapu – kdo je horší
    if gap > 0:
        gap_text = f"Girls worse by {gap:.2f}"
        gap_color = colors["Girls"]
    elif gap < 0:
        gap_text = f"Boys worse by {abs(gap):.2f}"
        gap_color = colors["Boys"]
    else:
        gap_text = "No difference"
        gap_color = "grey"

    # anotace vpravo od barů
    fig2.add_annotation(
        x=xmax * 1.05,
        y=f,
        text=f"{gap_text}<br>r = {r:.2f}",
        showarrow=False,
        font=dict(size=11, color=gap_color),
        align="left"
    )

fig2.show()






In [16]:
#------------------------------------------------------------
# GRAPH 2 – radar: Top 5 faktorů podle korelace, Boys vs Girls (OW only)
# ------------------------------------------------------------

if df_2018_norm.empty:
    fig2 = None
else:
    df_ow_2018 = df_2018_norm[df_2018_norm["OVERWEIGHT"] == 1].copy()

    if df_ow_2018.empty:
        fig2 = None
    else:
        # průměrné hodnoty TOP 5 faktorů pro Boys / Girls
        sex_means_top5 = (
            df_ow_2018
            .groupby("SEX", as_index=False, observed=True)[top5_corr]
            .mean()
        )

        if sex_means_top5.empty:
            fig2 = None
        else:
            sex_means_top5["SEX_STRING"] = sex_means_top5["SEX"].map({1: "Boys", 2: "Girls"})

            radar2_long = sex_means_top5.melt(
                id_vars="SEX_STRING",
                value_vars=top5_corr,
                var_name="FACTOR",
                value_name="VALUE"
            )

            fig2 = px.line_polar(
                radar2_long,
                r="VALUE",
                theta="FACTOR",
                color="SEX_STRING",
                line_close=True,
                color_discrete_map={
                    "Boys": "#3b8ee1",
                    "Girls": "#eb8fbd",
                },
                title="Top 5 faktorů podle korelace s overweight – Boys vs Girls (OW only, 2018)",
            )

            # tady zatím necháme plný rozsah 0–1
            fig2.update_traces(fill="toself", opacity=0.55)

            fig2.update_layout(
                polar=dict(
                    radialaxis=dict(
                        range=[0, 1],
                        showticklabels=True,
                        tickfont=dict(size=10),
                    ),
                    angularaxis=dict(
                        rotation=90,
                        direction="clockwise",
                    ),
                ),
                legend_title="Gender",
                margin=dict(l=40, r=40, t=80, b=40),
            )

            fig2.show()

### GRAPH 3
### Komplexni prehled - Gender differences in all relevant factors
- sirsi obrazek k grafu 2 (tam jen highlights), tady vse

In [9]:
# -----------------------------------------
# GRAPH 3 – gender gap by factor (Girls − Boys)
# seřazeno od "nejvíc holky" po "nejvíc kluci"
# -----------------------------------------

# faktory, které NEJSOU v grafu 2
remaining_factors = [f for f in list_columns if f not in top5_corr]

# jen overweight děti
df_ow_2018 = df_2018_norm[df_2018_norm["OVERWEIGHT"] == 1].copy()

# průměry podle pohlaví
sex_means_all = (
    df_ow_2018
    .groupby("SEX", as_index=False)[remaining_factors]
    .mean()
)

sex_long_all = sex_means_all.melt(
    id_vars=["SEX"],
    value_vars=remaining_factors,
    var_name="FACTOR",
    value_name="VALUE"
)

sex_long_all["SEX_STRING"] = sex_long_all["SEX"].map({1: "Boys", 2: "Girls"})

# tabulka gender gapu
gap_table_rest = (
    sex_long_all
    .groupby(["FACTOR", "SEX_STRING"])["VALUE"]
    .mean()
    .unstack("SEX_STRING")
)

gap_table_rest["GIRLS_MINUS_BOYS"] = gap_table_rest["Girls"] - gap_table_rest["Boys"]

df_gap = gap_table_rest.reset_index()

# pořadí faktorů podle gender gapu:
# nejdřív holky horší (nejvyšší +), pak až kluci (nejnižší −)
factor_order = (
    df_gap
    .sort_values("GIRLS_MINUS_BOYS", ascending=False)["FACTOR"]
    .tolist()
)

# kdo má vyšší průměr (jen pro barvu)
df_gap["SIDE"] = np.where(
    df_gap["GIRLS_MINUS_BOYS"] > 0,
    "Girls",
    "Boys"
)

color_gap = {
    "Girls": "#eb8fbd",
    "Boys": "#3b8ee1"
}

# pro symetrickou osu si můžeme spočítat min/max
y_min = df_gap["GIRLS_MINUS_BOYS"].min()
y_max = df_gap["GIRLS_MINUS_BOYS"].max()
pad   = 0.05 * max(abs(y_min), abs(y_max))

fig3 = px.bar(
    df_gap,
    x="FACTOR",
    y="GIRLS_MINUS_BOYS",
    color="SIDE",
    color_discrete_map=color_gap,
    category_orders={"FACTOR": factor_order},
    title="Gender Gap by Factor"
)

fig3.update_layout(
    xaxis_title="Factor",
    yaxis_title="Gender gap (Girls − Boys, scaled 0–1)",
    legend_title="Higher risk in",
    xaxis=dict(tickangle=-40),
    yaxis=dict(
        zeroline=True,
        zerolinecolor="black",
        zerolinewidth=1.5,
        range=[y_min - pad, y_max + pad]
    ),
    height=500,
    margin=dict(l=80, r=40, b=120)
)

fig3.show()


In [10]:
remaining_factors = [f for f in list_columns if f not in top5_corr]

# průměry podle pohlaví pro zbývající faktory (OW only)
df_ow_2018 = df_2018_norm[df_2018_norm["OVERWEIGHT"] == 1].copy()

sex_means_all = (
    df_ow_2018
    .groupby("SEX", as_index=False)[remaining_factors]
    .mean()
)

sex_long_all = sex_means_all.melt(
    id_vars=["SEX"],
    value_vars=remaining_factors,
    var_name="FACTOR",
    value_name="VALUE"
)

sex_long_all["SEX_STRING"] = sex_long_all["SEX"].map({1: "Boys", 2: "Girls"})

# tabulka gapů
gap_table_rest = (
    sex_long_all
    .groupby(["FACTOR", "SEX_STRING"])["VALUE"]
    .mean()
    .unstack("SEX_STRING")
)

gap_table_rest["GIRLS_MINUS_BOYS"] = gap_table_rest["Girls"] - gap_table_rest["Boys"]
gap_table_rest["ABS_GAP"] = gap_table_rest["GIRLS_MINUS_BOYS"].abs()

# přidáme korelaci s OW
gap_rest = gap_table_rest.reset_index()
gap_rest["CORR"] = gap_rest["FACTOR"].map(corr_series)
gap_rest["ABS_CORR"] = gap_rest["CORR"].abs()

# pořadí faktorů podle |korelace| (nejvíc nahoře)
factor_order_rest = (
    gap_rest
    .sort_values("ABS_CORR", ascending=False)["FACTOR"]
    .tolist()
)

gap_rest["SIDE"] = np.where(
    gap_rest["GIRLS_MINUS_BOYS"] > 0,
    "Girls",
    "Boys"
)

color_gap = {
    "Girls": "#eb8fbd",
    "Boys": "#3b8ee1"
}

# lollipop graph:
fig3 = px.scatter(
    gap_rest,
    x="GIRLS_MINUS_BOYS",
    y="FACTOR",
    color="SIDE",
    size="ABS_CORR",  # čím větší korelace s OW, tím větší puntík
    color_discrete_map=color_gap,
    category_orders={"FACTOR": factor_order_rest},
    title="Gender differences – remaining factors (OW only, 2018; size = |corr with OW|)"
)

# „lollipop“: čára od 0 k bodu
for _, row in gap_rest.iterrows():
    fig3.add_shape(
        type="line",
        x0=0, y0=row["FACTOR"],
        x1=row["GIRLS_MINUS_BOYS"], y1=row["FACTOR"],
        line=dict(color="lightgrey", width=2)
    )

fig3.update_traces(
    mode="markers",
    marker_line_width=1,
    marker_line_color="white"
)

fig3.update_layout(
    xaxis_title="Gender gap (Girls − Boys, normalized 0–1; >0 = girls worse)",
    yaxis_title="Factor",
    legend_title="Higher risk in",
    xaxis=dict(
        zeroline=True,
        zerolinecolor="black",
        zerolinewidth=1.5
    ),
    height=800,
    margin=dict(l=180)
)

fig3.show()



### GRAPH 4
### Behaviour differences: Overweight vs Non-Overweight children

Doplnkovy graf k Kluci vs Holky -> Jak se lisi deti s nadvahou od tech co nemaji nadvahu

In [11]:
ow_means  = df_2018_norm[df_2018_norm["OVERWEIGHT"] == 1][list_columns].mean()
non_means = df_2018_norm[df_2018_norm["OVERWEIGHT"] == 0][list_columns].mean()

diff = ow_means - non_means

df_diff = diff.reset_index()
df_diff.columns = ["FACTOR", "DIFFERENCE"]

df_diff["DIFFERENCE"] = df_diff["DIFFERENCE"].fillna(0.0)
df_diff["ABS_DIFF"] = df_diff["DIFFERENCE"].abs()

df_diff = df_diff.sort_values("ABS_DIFF", ascending=False)

df_diff["SIDE"] = np.where(
    df_diff["DIFFERENCE"] > 0,
    "Overweight",
    "Non-overweight"
)

color_ow = {
    "Overweight": "orangered",
    "Non-overweight": "seagreen"
}


fig4 = px.scatter(
    df_diff,
    x="DIFFERENCE",
    y="FACTOR",
    color="SIDE",
    color_discrete_map=color_ow,
    size="ABS_DIFF",
    category_orders={"FACTOR": df_diff["FACTOR"].tolist()},
    title="Difference in behaviour (Overweight − Non-overweight, 2018, normalized 0–1)"
)

# šedé „tyčky“ od 0 k bodu
for _, row in df_diff.iterrows():
    fig4.add_shape(
        type="line",
        x0=0, y0=row["FACTOR"],
        x1=row["DIFFERENCE"], y1=row["FACTOR"],
        line=dict(color="lightgrey", width=2)
    )

fig4.update_traces(
    mode="markers",
    marker_line_width=1,
    marker_line_color="white"
)

fig4.update_layout(
    xaxis_title="Difference (OW − Non-OW, normalized 0–1; >0 = OW worse)",
    yaxis_title="Factor",
    legend_title="Higher risk in",
    xaxis=dict(
        zeroline=True,
        zerolinecolor="black",
        zerolinewidth=1.5
    ),
    height=700,
    margin=dict(l=180, r=60)
)

fig4.show()


In [14]:
# ------------------------------------------------------------
# GRAPH 4 – OW vs Non-OW radar chart (2018, normalized 0–1)
# ------------------------------------------------------------

if df_2018_norm.empty:
    fig4 = None
    st.info("No data for year 2018 with selected filters (for OW vs Non-OW radar).")

else:
    # oddělíme OW a Non-OW děti v roce 2018
    df_2018_ow  = df_2018_norm[df_2018_norm["OVERWEIGHT"] == 1]
    df_2018_non = df_2018_norm[df_2018_norm["OVERWEIGHT"] == 0]

    if df_2018_ow.empty or df_2018_non.empty:
        fig4 = None
        st.info("Not enough data for either overweight or non-overweight group in 2018.")
    else:
        # průměrné hodnoty faktorů pro OW / Non-OW
        ow_means  = df_2018_ow[list_columns].mean()
        non_means = df_2018_non[list_columns].mean()

        # rozdíl OW − Non-OW pro seřazení podle „důležitosti“
        diff = ow_means - non_means
        df_diff = (
            diff.rename("DIFF")
                .reset_index()
                .rename(columns={"index": "FACTOR"})
        )
        df_diff["ABS_DIFF"] = df_diff["DIFF"].abs()
        df_diff = df_diff.sort_values("ABS_DIFF", ascending=False)

        # vybereme TOP N faktorů, aby radar byl čitelný
        TOP_N = 8  # můžeš změnit na 6 / 10 podle toho, jak to bude vypadat
        top_factors = df_diff["FACTOR"].head(TOP_N).tolist()

        # data pro radar – wide form
        radar_df = pd.DataFrame({
            "FACTOR": top_factors,
            "Overweight": ow_means[top_factors].values,
            "Non-overweight": non_means[top_factors].values,
        })

        # long form pro plotly express
        radar_long = radar_df.melt(
            id_vars="FACTOR",
            value_vars=["Overweight", "Non-overweight"],
            var_name="GROUP",
            value_name="VALUE",
        )

        # --- tady nově: dynamický rozsah radiální osy ---
        min_val = radar_long["VALUE"].min()
        max_val = radar_long["VALUE"].max()
        if max_val == min_val:
            # fallback kdyby byly všechny hodnoty shodné
            r_min, r_max = max(min_val - 0.1, 0), min(min_val + 0.1, 1)
        else:
            pad = 0.1 * (max_val - min_val)          # 10 % buffer
            r_min = max(min_val - pad, 0)
            r_max = min(max_val + pad, 1)

        fig4 = px.line_polar(
            radar_long,
            r="VALUE",
            theta="FACTOR",
            color="GROUP",
            line_close=True,
            color_discrete_map={
                "Overweight": "orangered",
                "Non-overweight": "seagreen",
            },
            title="Behaviour profile – Overweight vs Non-overweight (2018, normalized 0–1)",
        )

        fig4.update_traces(fill="toself", opacity=0.55)

        fig4.update_layout(
            polar=dict(
                radialaxis=dict(
                    range=[r_min, r_max],  # <<< místo [0, 1]
                    showticklabels=True,
                    tickfont=dict(size=10),
                ),
                angularaxis=dict(
                    rotation=90,
                    direction="clockwise",
                ),
            ),
            legend_title="Group",
            margin=dict(l=40, r=40, t=80, b=40),
        )
        fig4.show()


In [None]:
# ------------------------------------------------------------
# GRAPH 4 – TOP rozdíly OW vs Non-OW (2018, normalized 0–1)
# výběr z list_columns, seřazeno podle ABS rozdílu
# ------------------------------------------------------------
'''
if df_2018_norm.empty:
    fig4 = None
    st.info("No data for year 2018 with selected filters (OW vs Non-OW).")

else:
'''

df_ow_2018  = df_2018_norm[df_2018_norm["OVERWEIGHT"] == 1]
df_non_2018 = df_2018_norm[df_2018_norm["OVERWEIGHT"] == 0]

if df_ow_2018.empty or df_non_2018.empty:
        fig4 = None
        st.info("Not enough data for both overweight and non-overweight groups in 2018.")
else:
        # průměrné hodnoty všech faktorů z list_columns
        ow_means  = df_ow_2018[list_columns].mean()
        non_means = df_non_2018[list_columns].mean()

        # rozdíl OW − Non-OW
        diff = ow_means - non_means

        df_diff = (
            diff.rename("DIFFERENCE")
                .reset_index()
                .rename(columns={"index": "FACTOR"})
        )

        df_diff["DIFFERENCE"] = df_diff["DIFFERENCE"].fillna(0.0)
        df_diff["ABS_DIFF"] = df_diff["DIFFERENCE"].abs()

        # seřadit podle velikosti rozdílu (největší rozdíl nahoře)
        df_diff = df_diff.sort_values("ABS_DIFF", ascending=False)

        # pokud chceš jen top N faktorů:
        TOP_N = 25   # můžeš změnit na 5/8/12
        df_diff = df_diff.head(TOP_N)

        # kdo má vyšší průměr – jen pro barvu
        df_diff["SIDE"] = np.where(
            df_diff["DIFFERENCE"] > 0,
            "Overweight higher",
            "Non-overweight higher"
        )

        color_ow = {
            "Overweight higher": "orangered",
            "Non-overweight higher": "seagreen"
        }

        fig4 = px.bar(
            df_diff,
            x="DIFFERENCE",
            y="FACTOR",
            orientation="h",
            color="SIDE",
            color_discrete_map=color_ow,
            category_orders={"FACTOR": df_diff["FACTOR"].tolist()},
            title="Top behaviour differences (Overweight − Non-overweight, 2018, normalized 0–1)",
        )

        fig4.update_layout(
            xaxis_title="Difference (OW − Non-OW, normalized 0–1; >0 = OW worse)",
            yaxis_title="Factor",
            legend_title="Higher risk in",
            xaxis=dict(
                zeroline=True,
                zerolinecolor="black",
                zerolinewidth=1.5
            ),
            height=700,
            margin=dict(l=180, r=60),
        )

        fig4.show()