In [None]:
import os
import sys

In [5]:
import pandas as pd
# Import data
df_raw = pd.read_excel('../Data/DATA_UFM_combined.xlsx', header=0)

# Columns to keep
cols = [
    'udbud_id',
    'titel',
    'educational_category',
    'displaydocclass',
    'hovedinsttx',
    'instregiontx',
    'instkommunetx',
    'arbejdstid_timer',
    'arbmedstud_likert',
    'ensom_likert',
    'maanedloen_10aar',
    'maanedloen_nyudd',
    'tidsforbrug_p50',
    'stress_daglig_likert'
]

data = df_raw[cols]

# Remove all udbud_id==999999, as this is the education on national level
data_whole_edu = data[data['udbud_id'] == 999999]
data = data[data['udbud_id'] != 999999]

# Remove the udbud_id column
data = data.drop(columns=['udbud_id'])

data_na = data.copy()
# Remove all rows with missing values
data = data.dropna()

In [14]:
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import HoverTool

output_notebook()

# Create figure
p = figure(
    width=700, 
    height=500,
    title="Stress vs Loneliness by Educational Program",
    x_axis_label="Daily Stress (Likert)",
    y_axis_label="Loneliness (Likert)"
)

# Add scatter plot
p.circle(
    x='stress_daglig_likert',
    y='ensom_likert',
    size=8,
    alpha=0.5,
    source=data_whole_edu
)

# Add hover tool to show program title
hover = HoverTool(tooltips=[
    ("Program", "@titel"),
    ("Stress", "@stress_daglig_likert{0.00}"),
    ("Loneliness", "@ensom_likert{0.00}")
])
p.add_tools(hover)

show(p)



In [16]:
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import LinearColorMapper, ColorBar
from bokeh.transform import transform
import pandas as pd

output_notebook()

# Create bins for the heatmap
stress_bins = pd.cut(data_whole_edu['stress_daglig_likert'], bins=10)
ensom_bins = pd.cut(data_whole_edu['ensom_likert'], bins=10)

# Count programs in each bin combination
heatmap_counts = data_whole_edu.groupby([stress_bins, ensom_bins]).size().reset_index(name='count')

# Extract midpoints and create clean dataframe
heatmap_data = pd.DataFrame({
    'stress_mid': heatmap_counts['stress_daglig_likert'].apply(lambda x: x.mid),
    'ensom_mid': heatmap_counts['ensom_likert'].apply(lambda x: x.mid),
    'count': heatmap_counts['count']
})

# Create figure
p = figure(
    width=700,
    height=500,
    title="Heatmap: Stress vs Loneliness (National Level)",
    x_axis_label="Daily Stress (Likert)",
    y_axis_label="Loneliness (Likert)"
)

# Color mapper
mapper = LinearColorMapper(palette="Viridis256", low=heatmap_data['count'].min(), high=heatmap_data['count'].max())

# Add rectangles for heatmap
p.rect(
    x='stress_mid',
    y='ensom_mid',
    width=0.3,
    height=0.3,
    source=heatmap_data,
    fill_color=transform('count', mapper),
    line_color=None
)

# Add color bar
color_bar = ColorBar(color_mapper=mapper, label_standoff=12, location=(0,0), title='Count')
p.add_layout(color_bar, 'right')

show(p)

  heatmap_counts = data_whole_edu.groupby([stress_bins, ensom_bins]).size().reset_index(name='count')
