In [3]:
import os
import sys

In [4]:
import pandas as pd
# Import data
df_raw = pd.read_excel('../Data/DATA_UFM_combined.xlsx', header=0)

# Columns to keep
cols = [
    'udbud_id',
    'titel',
    'educational_category',
    'displaydocclass',
    'hovedinsttx',
    'instregiontx',
    'instkommunetx',
    'arbejdstid_timer',
    'arbmedstud_likert',
    'ensom_likert',
    'maanedloen_10aar',
    'maanedloen_nyudd',
    'tidsforbrug_p50',
    'stress_daglig_likert'
]

data = df_raw[cols]

# Remove all udbud_id==999999, as this is the education on national level
data_whole_edu = data[data['udbud_id'] == 999999]
data = data[data['udbud_id'] != 999999]

# Remove the udbud_id column
data = data.drop(columns=['udbud_id'])

data_na = data.copy()
# Remove all rows with missing values
data = data.dropna()

In [5]:
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import HoverTool

output_notebook()

# Create figure
p = figure(
    width=700, 
    height=500,
    title="Stress vs Loneliness by Educational Program",
    x_axis_label="Daily Stress (Likert)",
    y_axis_label="Loneliness (Likert)"
)

# Add scatter plot
p.circle(
    x='stress_daglig_likert',
    y='ensom_likert',
    size=8,
    alpha=0.5,
    source=data_whole_edu
)

# Add hover tool to show program title
hover = HoverTool(tooltips=[
    ("Program", "@titel"),
    ("Stress", "@stress_daglig_likert{0.00}"),
    ("Loneliness", "@ensom_likert{0.00}")
])
p.add_tools(hover)

show(p)



In [7]:
import pandas as pd
from bokeh.io import output_notebook, show
from bokeh.models import ColumnDataSource, DataTable, TableColumn, HTMLTemplateFormatter

output_notebook()

num_cols = [
    "arbejdstid_timer",
    "maanedloen_nyudd",
    "maanedloen_10aar",
    "tidsforbrug_p50",
    "stress_daglig_likert",
    "ensom_likert",
    "arbmedstud_likert",
]
cat_cols = [
    "titel",
    "educational_category",
    "displaydocclass",
    "hovedinsttx",
    "instregiontx",
    "instkommunetx",
]

# ---------- choose your bin key here ----------
bin_col = "educational_category"  # <- ability to change group type here
# ---------------------------------------------

# 1) Aggregate numeric columns per bin (mean) and add a row count
agg_spec = {c: "mean" for c in num_cols}
df_binned = (
    data.groupby(bin_col, dropna=False)
        .agg(agg_spec)
        .assign(n_programs=lambda d: data.groupby(bin_col, dropna=False).size().reindex(d.index).values)
        .reset_index()
)

# 2) Normalize numeric cols for bar widths
for c in num_cols:
    lo, hi = df_binned[c].min(), df_binned[c].max()
    df_binned[c + "_norm"] = 0 if hi == lo else (df_binned[c] - lo) / (hi - lo) * 100

source = ColumnDataSource(df_binned)

tpl = lambda rgb: f"""
<div style="height:14px;border-radius:2px;background:
linear-gradient(to right, rgba({rgb},0.25), rgba({rgb},0.95));
width:<%= value %>%"></div>"""

palette = {
    "arbejdstid_timer_norm":     "30,136,229",
    "maanedloen_nyudd_norm":     "240,98,146",
    "maanedloen_10aar_norm":     "229,57,53",
    "tidsforbrug_p50_norm":      "67,160,71",
    "stress_daglig_likert_norm": "245,124,0",
    "ensom_likert_norm":         "123,31,162",
    "arbmedstud_likert_norm":    "0,121,107",
}

columns = [TableColumn(field=bin_col, title=bin_col.replace("_"," ").title()),
           TableColumn(field="n_programs", title="Count in Bin")]

for norm_col, rgb in palette.items():
    title = norm_col.replace("_norm","").replace("_"," ").title()
    columns.append(TableColumn(field=norm_col, title=title,
                               formatter=HTMLTemplateFormatter(template=tpl(rgb))))

table = DataTable(source=source, columns=columns, width=1100, height=520,
                  index_position=None, reorderable=False, sortable=True)

show(table)


In [8]:
# pip install "bokeh>=3.4"
import pandas as pd
from bokeh.io import output_notebook, show
from bokeh.plotting import figure
from bokeh.layouts import column, row
from bokeh.models import (
    ColumnDataSource, Select, HoverTool, FixedTicker, CustomJS,
    CheckboxGroup, Legend
)
from bokeh.palettes import Category20

output_notebook()

# -----------------------------
# Columns (your schema)
# -----------------------------
num_cols = [
    "arbejdstid_timer",
    "maanedloen_nyudd",
    "maanedloen_10aar",
    "tidsforbrug_p50",
    "stress_daglig_likert",
    "ensom_likert",
    "arbmedstud_likert",
]
cat_col = "educational_category"

# Ensure you have `data` defined already (DataFrame)
df = data[num_cols + [cat_col, "titel"]].dropna(subset=num_cols).copy()
df[cat_col] = df[cat_col].astype(str)
df["titel"] = df["titel"].astype(str)

# Normalize numeric columns to [0,1] for plotting
mins = df[num_cols].min()
rng = (df[num_cols].max() - mins).replace(0, 1)
df_norm = (df[num_cols] - mins) / rng

# Keep originals for hover + category/title
for c in num_cols:
    df_norm[f"{c}_raw"] = df[c].values
df_norm[cat_col] = df[cat_col].values
df_norm["titel"] = df["titel"].values

# Build multi_line-friendly columns
x_positions = list(range(len(num_cols)))
df_norm["xs"] = [x_positions] * len(df_norm)
df_norm["ys"] = df_norm[num_cols].values.tolist()

# -----------------------------
# Colors per category
# -----------------------------
categories = sorted(df_norm[cat_col].unique().tolist())
palette = Category20[20] if len(categories) > 10 else Category20[20][:len(categories)]
color_map = {cat: palette[i % len(palette)] for i, cat in enumerate(categories)}
df_norm["color"] = df_norm[cat_col].map(color_map)

initial_cat = categories[0] if categories else ""
mask0 = (df_norm[cat_col] == initial_cat)

source_all = ColumnDataSource(df_norm)                 # all rows
source = ColumnDataSource(df_norm.loc[mask0])          # filtered by category / show-all
source_highlight = ColumnDataSource({k: [] for k in df_norm.columns})  # single selected titel

# -----------------------------
# Figure
# -----------------------------
p = figure(
    width=1000,
    height=520,
    y_range=(0, 1),
    title="Parallel Coordinates — by Educational Category",
    tools="pan,wheel_zoom,reset,save"
)

# Vertical axis guides + labels
for xi, name in enumerate(num_cols):
    p.segment(x0=xi, y0=0, x1=xi, y1=1, line_color="#9e9e9e", line_alpha=0.5)
p.xaxis.ticker = FixedTicker(ticks=x_positions)
p.xaxis.major_label_overrides = {i: name for i, name in enumerate(num_cols)}
p.xaxis.major_label_orientation = 0.9

# Base multi-lines (filtered set)
renderer = p.multi_line(
    xs="xs", ys="ys", source=source,
    line_color="color", line_alpha=0.25, line_width=1.5
)

# Highlighted selected titel (thick & opaque)
renderer_hi = p.multi_line(
    xs="xs", ys="ys", source=source_highlight,
    line_color="color", line_alpha=0.95, line_width=4
)

# Hover tooltips (attach to both, so highlight also shows hover)
tooltips = [("Category", f"@{cat_col}"), ("Title", "@titel")]
tooltips += [(c, f"@{c}_raw") for c in num_cols]
p.add_tools(HoverTool(renderers=[renderer, renderer_hi], tooltips=tooltips))

# -----------------------------
# Controls
# -----------------------------
select_cat = Select(
    title="Education type (educational_category):",
    value=initial_cat,
    options=categories,
    width=380
)
show_all = CheckboxGroup(labels=["Show all categories"], active=[])
# second dropdown: specific education (titel) within the selected category
# initial options come from current filtered view
initial_titles = sorted(df_norm.loc[mask0, "titel"].unique().tolist())
select_title = Select(
    title="Specific education (titel):",
    value=(initial_titles[0] if initial_titles else ""),
    options=initial_titles,
    width=500
)

# -----------------------------
# JS callbacks
# -----------------------------
# Update filtered data + the title dropdown options when category/show_all changes
callback_filter = CustomJS(
    args=dict(
        source=source, source_all=source_all, cat=select_cat, show_all=show_all,
        cat_col=cat_col, select_title=select_title, source_hi=source_highlight
    ),
    code="""
    const all = source_all.data;
    const out = {};
    for (const k in all) out[k] = [];

    const showAll = show_all.active.indexOf(0) >= 0;
    const n = all[cat_col].length;

    // Filter rows (either by selected category or all)
    for (let i = 0; i < n; i++) {
      if (showAll || String(all[cat_col][i]) === String(cat.value)) {
        for (const k in all) out[k].push(all[k][i]);
      }
    }
    source.data = out;
    source.change.emit();

    // Update title dropdown options based on the *filtered* set
    const titles = Array.from(new Set(out["titel"])).sort();
    select_title.options = titles;
    if (titles.length === 0) {
      select_title.value = "";
    } else if (titles.indexOf(select_title.value) === -1) {
      select_title.value = titles[0];   // reset to a valid option
    }

    // Also refresh the highlight (reuse the title callback logic inline)
    const out_hi = {};
    for (const k in all) out_hi[k] = [];
    const chosen = select_title.value;

    // find the first match in the *current filtered* data
    const m = out["titel"].length;
    for (let i = 0; i < m; i++) {
      if (String(out["titel"][i]) === String(chosen)) {
        for (const k in out) out_hi[k].push(out[k][i]);
        break;  // single line
      }
    }
    source_hi.data = out_hi;
    source_hi.change.emit();
    """
)

# Update the highlighted line when the specific title changes
callback_highlight = CustomJS(
    args=dict(source=source, source_hi=source_highlight, select_title=select_title),
    code="""
    const from = source.data;
    const out = {};
    for (const k in from) out[k] = [];

    const chosen = select_title.value;
    const n = from["titel"].length;

    for (let i = 0; i < n; i++) {
      if (String(from["titel"][i]) === String(chosen)) {
        for (const k in from) out[k].push(from[k][i]);
        break; // single line
      }
    }
    source_hi.data = out;
    source_hi.change.emit();
    """
)

select_cat.js_on_change("value", callback_filter)
show_all.js_on_change("active", callback_filter)
select_title.js_on_change("value", callback_highlight)

# -----------------------------
# Legend (optional, clickable)
# -----------------------------
legend_items = []
for cat in categories:
    r = p.line(x=[-1, -0.9], y=[-1, -1],
               line_color=color_map[cat], line_width=3, alpha=0.9)
    legend_items.append((cat, [r]))
legend = Legend(items=legend_items, click_policy="mute", label_text_font_size="8pt")
p.add_layout(legend, "right")

# -----------------------------
# Show
# -----------------------------
show(column(row(select_cat, show_all), select_title, p))
