In [10]:
import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.io as pio
pio.renderers.default = "vscode"
px.defaults.template = "plotly"

In [2]:
# load data
df = pd.read_csv("data.csv") 

In [None]:
# spojeni dvou sloupcu country_name Belgium (Flemish) a Belgium (French) do jednoho sloupce Belgium
df['COUNTRY_NAME'] = df['COUNTRY_NAME'].replace({'Belgium (Flemish)': 'Belgium', 'Belgium (French)': 'Belgium'})

In [21]:
# filtrovani datasetu podle vybranych zemi a overweight
df_filtr = df.groupby(["YEAR", "COUNTRY_NAME"],as_index=False)["OVERWEIGHT"].mean()

In [None]:
df_filtr

Unnamed: 0,YEAR,COUNTRY_NAME,OVERWEIGHT
0,2002,Austria,0.173717
1,2002,Belgium,0.138671
2,2002,Canada,0.232289
3,2002,Croatia,0.168396
4,2002,Czech Republic,0.137074
...,...,...,...
188,2018,Sweden,0.185174
189,2018,Switzerland,0.157220
190,2018,Turkey,0.247632
191,2018,Ukraine,0.162013


In [23]:
# plot line chart
fig = px.line(df_filtr, x="YEAR", y="OVERWEIGHT",color="COUNTRY_NAME", title="Overweight Czech republic and selected countries")

In [24]:
fig.show()

In [67]:
# horizontal bar chart Nejaktualnejsi stav pro rok 2018: SWEETS, TOOTH_BRUSHING, BREAKFAST_WEEKDAYS,SOFT_DRINKS, NERVOUS, HEADACHES, PHYS_ACT_60 podle zeme

#filtr roku 2018
df_2018 = df[df["YEAR"] == 2018]

list_columns = ["SWEETS", "TOOTH_BRUSHING", "BREAKFAST_WEEKDAYS", "SOFT_DRINKS", "PHYS_ACT_60", "NERVOUS"]

# dictionary nazev sloupce a maximalni hodnota v datasetu
dictionary = {
    "FRUITS": 7,
    "SOFT_DRINKS": 7,
    "SWEETS": 7,
    "VEGETABLES": 7,
    "FRIEND_TALK": 7,
    "TIME_EXE": 7,
    "PHYS_ACT_60": 7,
    "DRUNK_30": 7,
    "LIFESAT": 10,
    "FAMILY_MEALS_TOGETHER": 6,
    "BREAKFAST_WEEKDAYS": 6,

    "TOOTH_BRUSHING": 5,
    "STUD_TOGETHER": 5,
    "BUL_OTHERS": 5,
    "BUL_BEEN": 5,
    "FIGHT_YEAR": 5,
    "INJURED_YEAR": 5,
    "HEADACHE": 5,
    "STOMACHACHE": 5,
    "FEEL_LOW": 5,
    "NERVOUS": 5,
    "SLEEP_DIF": 5,
    "DIZZY": 5,
    "THINK_BODY": 5,
    "TALK_MOTHER": 5,
    "TALK_FATHER": 5,

    "HEALTH": 4,
    "LIKE_SCHOOL": 4,
    "SCHOOL_PRESSURE": 4,
    "COMPUTER_NO": 4
}


In [68]:
# groupovat podle zeme a vypocet prumeru podle 6 sloupcu
df_2018_grouped = df_2018.groupby("COUNTRY_NAME", as_index=False)[list_columns].mean()

In [80]:
for feature in list_columns:
   df_2018_grouped[feature] = df_2018_grouped[feature] / dictionary[feature]

In [81]:
df_2018_grouped

Unnamed: 0,COUNTRY_NAME,SWEETS,TOOTH_BRUSHING,BREAKFAST_WEEKDAYS,SOFT_DRINKS,PHYS_ACT_60,NERVOUS
0,Albania,0.669041,0.310811,0.669292,0.562829,0.585974,0.719974
1,Armenia,0.796363,0.342229,0.731868,0.556185,0.645237,0.776649
2,Austria,0.601739,0.258493,0.62415,0.501091,0.616625,0.754639
3,Azerbaijan,0.596333,0.327226,0.727715,0.423074,0.459029,0.833246
4,Belgium,0.601461,0.284651,0.812349,0.550087,0.586307,0.71119
5,Bulgaria,0.655544,0.306377,0.754566,0.554963,0.599386,0.625083
6,Canada,0.554882,0.272879,0.766248,0.391481,0.689599,0.669852
7,Croatia,0.601583,0.288301,0.72855,0.506231,0.628139,0.698272
8,Czech Republic,0.578252,0.260815,0.717334,0.482438,0.600115,0.659688
9,Denmark,0.50935,0.245826,0.841194,0.450038,0.516683,0.772838


In [82]:
df_country_long = df_2018_grouped.melt(
id_vars=["COUNTRY_NAME"],
value_vars=list_columns,
var_name="FEATURE",
value_name="VALUE"
)

df_country_long["Y_LABEL"] = df_country_long["FEATURE"] + " - " + df_country_long["COUNTRY_NAME"]

fig_horizontal = px.bar(
df_country_long,
x="VALUE",
y="FEATURE",
color="COUNTRY_NAME", # now the 2 bars are countries
barmode="group",
orientation="h",
title=f"Owerweight: Czech republic vs other country in 2018")

In [83]:
fig_horizontal.show()

In [72]:
#filtr roku 2018 VERTIKALNI, VYBER 10 JINYCH FEATURE
df_2018_ver = df[df["YEAR"] == 2018]

list_columns_2 = ["FRUITS", "VEGETABLES", "TALK_FATHER","TALK_MOTHER", "FRIEND_TALK", "LIFESAT", "SLEEP_DIF", "TIME_EXE", "FIGHT_YEAR", "HEADACHE", "BUL_BEEN"]


In [73]:
# groupovat podle zeme a vypocet prumeru podle 10 sloupcu
df_2018_ver_grouped = df_2018_ver.groupby("COUNTRY_NAME", as_index=False)[list_columns_2].mean()


In [77]:
for feature in list_columns_2:
   df_2018_ver_grouped[feature] = df_2018_ver_grouped[feature] / dictionary[feature]

In [78]:
df_country_long = df_2018_ver_grouped.melt(
    id_vars=["COUNTRY_NAME"],
    value_vars=list_columns_2,
    var_name="FEATURE",
    value_name="VALUE"
)

fig_ver = px.bar(
    df_country_long,
    x="FEATURE",
    y="VALUE",
    color="COUNTRY_NAME",
    barmode="group",
    title="Overweight: Czech Republic vs other country in 2018"
)

In [79]:
fig_ver.show()