In [None]:
import pandas as pd
import numpy as np
import re
import hvplot.pandas
import panel as pn
import holoviews as hv
from datetime import datetime

pn.extension()
pn.config.console_output = 'disable'
pn.config.notifications = False

In [None]:
df = pd.read_csv('temp.csv', parse_dates=['year'])

In [None]:
# Create various selection groups
universities = list(df['university'].unique())
years = list(df['year'].dt.date.unique())
degrees = list(df['degree'].unique())
advanced = list(df['advanced'].unique())

COLOURS = ['#264653', '#e9c46a', '#e76f51', '#2a9d8f', '#f4a261']

In [16]:
# Checkbox group for universities
## Allows users to select multiple universities at the same time to compare them
uni_select = pn.widgets.Select(
  name='Select Universities:',
  value='National University of Singapore',
  options=universities,
)

## Allow users to select the range of years they want to see
year_slider = pn.widgets.DateRangeSlider(
  name='Graduate Date Range',
  start=years[0],
  end=years[-1],
  value=(years[0], years[-1]),
  step=365,
  # format='%Y'
)

## Allow users to select multiple degrees at the same time
degree_multi_select = pn.widgets.MultiChoice(
  name='Degrees to Compare (limit 5)',
  options=degrees,
  value=['Accountancy', 'Business Administration', 'Social Science', 'Art'],
  max_items=5
)

pn.Column(
  uni_select,
  pn.Row(year_slider, degree_multi_select)
)

BokehModel(combine_events=True, render_bundle={'docs_json': {'4de567f0-99d4-43db-974c-f6a32b5dc1f4': {'version…

In [None]:
@pn.depends(uni_select, year_slider, degree_multi_select)
def bar_graphs_plot(uni_select, year_slider, degree_multi_select):
  df3 = df.copy()
  # Prepare the data for the plot
  df3['year'] = df3['year'].dt.strftime("%Y")
  df3 = df3[
    (df3['university'] == uni_select) &
    (df3['year'] == str(year_slider[1].year)) &
    (df3['degree'].isin(degree_multi_select)) &
    (df3['advanced'] == 0) # automatically filter out advanced further studies
  ]
  df3 = df3.sort_values('employment_rate_overall')

  # Plot the bar graph
  plot3 = df3.hvplot.barh(
    y='employment_rate_overall',
    x='degree',
    color='#39b9e8',
    ylabel='Full-time Employment Percentage (%)',
    xlabel='Degree',
    title=f'Full-time Employment Percentage in \n{uni_select} ({year_slider[1].year})'
  )

  return plot3

# Create layout
layout = pn.Column(
  pn.Row(uni_select),
  pn.Row(year_slider, degree_multi_select),
  bar_graphs_plot
)

# Display
layout

In [None]:
@pn.depends(uni_select, year_slider, degree_multi_select)
def error_bars_plot(uni_select, year_slider, degree_multi_select):
  df2 = df.copy()
  # Prepare the data for the plot
  df2['year'] = df2['year'].dt.strftime("%Y")
  df2 = df2[
    (df2['university'] == uni_select) &
    (df2['year'] == str(year_slider[1].year)) &
    (df2['degree'].isin(degree_multi_select)) &
    (df2['advanced'] == 0) # automatically filter out advanced further studies
  ]
  # Get the difference between the 25th/ 75th percentile and the median
  df2['lower_error'] = df2['gross_monthly_median'] - df2['gross_mthly_25_percentile']
  df2['upper_error'] = df2['gross_mthly_75_percentile'] - df2['gross_monthly_median']

  # Create the scatter plot point for median
  median = df2.hvplot.scatter(
    x='degree',
    y='gross_monthly_median',
    color='#39b9e8',
    size=100,
    marker='s',
    ylabel='Gross Monthly Salary',
    xlabel='Degree',
    title=f'Errors for Gross Monthly Salary in {year_slider[1]}',
    legend=False
  )

  # Create the error bars using the calculated errors
  errors = df2.hvplot.errorbars(
    x='degree',
    y='gross_monthly_median',  # Median as central value
    yerr1='lower_error',
    yerr2="upper_error",
    color='black',
    line_width=2,
    legend=False
  )

  plot2 = errors * median

  return plot2

# Create layout
layout = pn.Column(
  pn.Row(uni_select),
  pn.Row(year_slider, degree_multi_select),
  error_bars_plot
)

# Display
layout

In [24]:
degree_multi_select

BokehModel(combine_events=True, render_bundle={'docs_json': {'a8bad5e8-fe07-4e1b-b38b-b1d19e5b2154': {'version…

In [34]:
degree_multi_select.value

['Business and Computing', 'Laws', 'Science', 'Business Administration']

In [37]:
df4 = df.copy()

# Prepare the data for the plot
df4['year'] = df4['year'].dt.strftime("%Y")
df4 = df4[
  (df4['university'] == uni_select.value) &
  (df4['year'] == str(year_slider.value[1].year)) &
  (df4['degree'].isin(degree_multi_select.value))
]

# Filter out degrees that have both advanced degrees
adv_filter = df4.groupby('degree').count().reset_index()
adv_filter = list(adv_filter[adv_filter['advanced'] == 2]['degree'])
df4 = df4[df4['degree'].isin(adv_filter)]

# Pivot dataframe into the advanced and not advanced columns
df4 = df4.pivot(index='degree', values='basic_monthly_mean', columns='advanced').reset_index()
df4 = df4.rename({0: 'start', 1: 'end'}, axis=1)

In [38]:
df4

advanced,degree,start,end
0,Business Administration,3770.0,4272.0


In [None]:
@pn.depends(uni_select, year_slider, degree_multi_select)
def error_bars_plot(uni_select, year_slider, degree_multi_select):
  df4 = df.copy()

  # Prepare the data for the plot
  df4['year'] = df4['year'].dt.strftime("%Y")
  df4 = df4[
    (df4['university'] == uni_select) &
    (df4['year'] == str(year_slider[1].year)) &
    (df4['degree'].isin(degree_multi_select))
  ]

  # Filter out degrees that have both advanced degrees
  adv_filter = df4.groupby('degree').count().reset_index()
  adv_filter = list(adv_filter[adv_filter['advanced'] == 2]['degree'])
  df4 = df4[df4['degree'].isin(adv_filter)]

  # Pivot dataframe into the advanced and not advanced columns
  df4 = df4.pivot(index='degree', values='basic_monthly_mean', columns='advanced').reset_index()
  df4 = df4.rename({0: 'start', 1: 'end'}, axis=1)

  # Create the dumbbell plot
  ## Scatter plot points for advanced and not advanced values
  start_points = df4.hvplot.scatter(
    x='start',
    y='degree',
    color="#39b9e8",
    size=100
  )
  end_points = df4.hvplot.scatter(
    x='end',
    y='degree',
    color="#91dc4c",
    size=100
  )

  # Create line segments connecting the points
  segments = hv.Segments([
    (
      df4.loc[i, "start"],
      df4.loc[i, "degree"], 
      df4.loc[i, "end"],
      df4.loc[i, "degree"]
    ) 
  for i in df4.index]).opts(color="black", line_width=2)

  # Combine plots
  dumbbell = (segments * start_points * end_points) \
    .opts(
      title=f"Difference in Mean Monthly Salary for Jobs \nwith Different Qualification Levels in {year_slider[1].year}",
      xlabel="Degree",
      ylabel="Mean Monthly Salary"
    )

  return dumbbell

# Create layout
layout = pn.Column(
  pn.Row(uni_select),
  pn.Row(year_slider, degree_multi_select),
  error_bars_plot
)

# Display
layout

BokehModel(combine_events=True, render_bundle={'docs_json': {'dcec274b-7fc5-409f-b041-7b70483bce27': {'version…

UnknownReferenceError: can't resolve reference '2ad1140e-144f-489f-bce8-c15b7ee51940'

UnknownReferenceError: can't resolve reference '2ad1140e-144f-489f-bce8-c15b7ee51940'

UnknownReferenceError: can't resolve reference '2ad1140e-144f-489f-bce8-c15b7ee51940'

UnknownReferenceError: can't resolve reference '2ad1140e-144f-489f-bce8-c15b7ee51940'