In [2]:
import os
import sys

In [None]:
import pandas as pd
# Import data
df_raw = pd.read_excel('../Data/DATA_UFM_combined_V2.xlsx', header=0)

# Columns to keep
cols = [
    'udbud_id',
    'titel',
    'educational_category',
    'displaydocclass',
    'hovedinsttx',
    'instregiontx',
    'instkommunetx',
    'arbejdstid_timer',
    'arbmedstud_likert',
    'ensom_likert',
    'maanedloen_10aar',
    'maanedloen_nyudd',
    'tidsforbrug_p50',
    'stress_daglig_likert',
    'ledighed_nyudd',
    'kvote_1_kvotient'
]

data = df_raw[cols]

# Remove all udbud_id==999999, as this is the education on national level
data_whole_edu = data[data['udbud_id'] == 999999]
data = data[data['udbud_id'] != 999999]

# Remove the udbud_id column
data = data.drop(columns=['udbud_id'])

data_na = data.copy()
# Remove all rows with missing values
data = data.dropna()

In [14]:
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import HoverTool

output_notebook()

# Create figure
p = figure(
    width=700, 
    height=500,
    title="Stress vs Loneliness by Educational Program",
    x_axis_label="Daily Stress (Likert)",
    y_axis_label="Loneliness (Likert)"
)

# Add scatter plot
p.circle(
    x='stress_daglig_likert',
    y='ensom_likert',
    size=8,
    alpha=0.5,
    source=data_whole_edu
)

# Add hover tool to show program title
hover = HoverTool(tooltips=[
    ("Program", "@titel"),
    ("Stress", "@stress_daglig_likert{0.00}"),
    ("Loneliness", "@ensom_likert{0.00}")
])
p.add_tools(hover)

show(p)



In [15]:
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import HoverTool, ColumnDataSource, MultiChoice, CustomJS
from bokeh.layouts import column
import pandas as pd

output_notebook()

# Dictionary of Danish municipality coordinates
municipality_coords = {
    'København': (12.5683, 55.6761),
    'Aarhus': (10.2039, 56.1629),
    'Odense': (10.4023, 55.4038),
    'Aalborg': (9.9217, 57.0488),
    'Esbjerg': (8.4520, 55.4767),
    'Roskilde': (12.0803, 55.6415),
    'Kolding': (9.4721, 55.4904),
}

def lat_lon_to_mercator(lon, lat):
    import math
    x = lon * 20037508.34 / 180
    y = math.log(math.tan((90 + lat) * math.pi / 360)) / (math.pi / 180)
    y = y * 20037508.34 / 180
    return x, y

# Prepare data with program information
data_with_coords = data.copy()
data_with_coords['coords'] = data_with_coords['instkommunetx'].map(municipality_coords)
data_with_coords = data_with_coords.dropna(subset=['coords'])
data_with_coords[['lon', 'lat']] = pd.DataFrame(data_with_coords['coords'].tolist(), index=data_with_coords.index)
data_with_coords[['x', 'y']] = data_with_coords.apply(lambda row: lat_lon_to_mercator(row['lon'], row['lat']), axis=1, result_type='expand')

# Create sources
source_all = ColumnDataSource(data_with_coords)
source_filtered = ColumnDataSource(data_with_coords)

# Get unique program titles
program_list = sorted(data['titel'].unique().tolist())

# Create MultiChoice widget
program_selector = MultiChoice(
    title="Select Programs:",
    value=[],
    options=program_list,
    width=850
)

# Create figure
p = figure(
    width=900,
    height=700,
    title="Program Distribution Across Denmark",
    x_axis_type="mercator",
    y_axis_type="mercator",
    tools="pan,wheel_zoom,reset"
)

p.add_tile("CartoDB Positron")

# Add circles
p.circle(
    x='x',
    y='y',
    size=10,
    alpha=0.6,
    color='red',
    source=source_filtered
)

# Add hover
hover = HoverTool(tooltips=[
    ("Program", "@titel"),
    ("Municipality", "@instkommunetx"),
    ("Category", "@educational_category")
])
p.add_tools(hover)

# JavaScript callback for filtering
callback = CustomJS(args=dict(source_all=source_all, source_filtered=source_filtered), code="""
    const selected_programs = cb_obj.value;
    const all_data = source_all.data;
    const filtered_data = source_filtered.data;
    
    // Clear filtered data
    for (let key in filtered_data) {
        filtered_data[key] = [];
    }
    
    // If no programs selected, show all
    if (selected_programs.length === 0) {
        for (let key in all_data) {
            filtered_data[key] = all_data[key].slice();
        }
    } else {
        // Filter by selected programs
        for (let i = 0; i < all_data['titel'].length; i++) {
            if (selected_programs.includes(all_data['titel'][i])) {
                for (let key in all_data) {
                    filtered_data[key].push(all_data[key][i]);
                }
            }
        }
    }
    
    source_filtered.change.emit();
""")

program_selector.js_on_change('value', callback)

# Show layout
show(column(program_selector, p))



In [29]:
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import HoverTool
import pandas as pd

output_notebook()

# Clean the data - convert to numeric and drop NaN values
data_clean = data.copy()
data_clean['kvote_1_kvotient'] = pd.to_numeric(data_clean['kvote_1_kvotient'], errors='coerce')
data_clean = data_clean.dropna(subset=['kvote_1_kvotient', 'maanedloen_nyudd'])

# Create figure
p = figure(
    width=700, 
    height=500,
    title="Grades and salary outcomes by Educational Program",
    x_axis_label="Grade average of education",
    y_axis_label="Monthly salary newly graduated"
)

# Add scatter plot
p.circle(
    x='kvote_1_kvotient',
    y='maanedloen_nyudd',
    size=8,
    alpha=0.5,
    source=data_clean
)

# Add hover tool to show program title
hover = HoverTool(tooltips=[
    ("Program", "@titel"),
    ("Grade average", "@kvote_1_kvotient{0.00}"),
    ("Salary", "@maanedloen_nyudd{0.00}")
])
p.add_tools(hover)

show(p)

