In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import altair as alt
import json
import requests
import geopandas
import panel as pn

In [None]:
df = pd.read_csv("final_data.csv")

df = df.rename(columns = {"nationality":"name"})
df.head()

In [None]:
def code_mapping(df,lst_of_columns):
    """
    Map labels in columns to integers
    :param df: Dataframe
    :param lst_of_columns: list of column names that we are working with
    :return: Dataframe, labels for our columns
    """
    # get list of distinct labels
    labels = (list(set(list(df[lst_of_columns[0]])+list(df[lst_of_columns[1]]))))

    # create a label-> code mapping
    codes = range(len(labels))
    lc_map = dict(zip(labels,codes))

    # substitute codes for labels in the dataframe
    df = df.replace({lst_of_columns[0]:lc_map,lst_of_columns[1]:lc_map})

    return df,labels



# TASK 2 and 4
def grouping_df(df,lst_of_cols,threshold =20):
    """
    Grouping the data by columns listed in the lst_of_cols
    :param df: DataFrame
    :param lst_of_cols: list of column names based on which our data has to be grouped
    :param threshold: an int to filter out rows whose artist count is below the threshold.
    :return: an aggregated dataframe containing number of artists in each category
    """
    # Converts our simple dataframe to an aggregate dataframe and adds a 'count' column to
    # store number of artists belonging to each grouped category
    df_agg = df.groupby(by=lst_of_cols).size().reset_index(name='count')

    # Filters out rows with low artist count based on our threshold
    #df_agg = df_agg[df_agg['count'] >= threshold]

    # Returns the aggregated dataframe
    return df_agg

def make_sankey(df, lst_of_columns, vals=None,grouping = "no",stacked = "no",threshold = 20,**kwargs):
    """

    :param df: Dataframe
    :param lst_of_columns: list of column names that we want to create our sankey with
    :param vals: values of the connection between our columns
    :param grouping: whether dataframe needs to be grouped or not. default value is no
    :param stacked: whether dataframe needs to be stacked or not. default value is no
    :param threshold: used to filter out rows whose artist count is below some threshold
    :param kwargs: any other parameters such as thickness, pad, color, etc.
    :return: DataFrame and labels
    """
    # doing required grouping if necessary
    if grouping == "yes" and len(lst_of_columns) == 2:
        df = grouping_df(df,lst_of_columns,threshold=threshold)

    # stacking if required to build multi level sankey diagrams
    if stacked == "yes" and len(lst_of_columns)>2:
        # empty list to store bits of dataframes that are to be stacked together
        parts_of_stack = []
        # iterating over columns of dataframe to create bits of dataframes that are to be stacked
        for i in list(range(len(lst_of_columns)-1)):
            part_of_stack_df = grouping_df(df,lst_of_columns[i:i+2])
            # renaming column names to make stacking easier
            part_of_stack_df.columns = ['src', 'targ', 'values']
            # adding bits of dataframe that will be stacked
            parts_of_stack.append(part_of_stack_df)
        # renaming column names to make labelling easier
        lst_of_columns = ['src', 'targ', 'values']
        # creating stacked dataframe
        df = pd.concat(parts_of_stack, axis=0)
        # defining which column name contains the vals
        vals = "values"

    if vals:
        values = df[vals]
    else:
        values = [1] * len(df)
   
    # mapping labels to integers to draw sankey diagrams
    df,labels = code_mapping(df,lst_of_columns)
    # defining required dictionaries to make sankey
    link = {'source':df[lst_of_columns[0]],'target':df[lst_of_columns[1]],'value':values}
    node = {'label':labels}

    # creating sankey diagram
    sk = go.Sankey(link = link, node = node)
    fig = go.Figure(sk)
    fig.show()

    return fig


def get_data(df,selected_modes,selected_courses,selected_outputs):
    
    filtered_df = df[
        (df['application_mode'].isin(selected_modes))&
        (df['course'].isin(selected_courses)) &
        (df['output'].isin(selected_outputs))
    ]
    return filtered_df
    
def create_sankey(df,application_modes,courses,outputs,lst_of_cols,):
    fig = make_sankey(get_data(df,application_modes,courses,outputs),lst_of_cols,stacked="yes")
    return fig
    
    

In [None]:
# Implementation
application_modes = df['application_mode'].unique()
courses = df['course'].unique()
outputs = df['output'].unique()
lst_of_cols = ['application_mode', 'course', 'output']
# Create base figure with default selections (all data)
#plot = create_sankey(df,application_modes,courses,outputs,lst_of_cols)
checkbox_group1 = pn.widgets.CheckBoxGroup(
    name='Mode Checkbox Group', value=["Transfer","International student (bachelor)"], options=list(application_modes),
    inline=False)

checkbox_group2 = pn.widgets.CheckBoxGroup(
    name='Course Checkbox Group', value=["Tourism","Nursing"], options=list(courses),
    inline=False)

checkbox_group3 = pn.widgets.CheckBoxGroup(
    name='Output Checkbox Group', value=["Graduate","Dropout"], options=list(outputs),
    inline=False)

# Ensure at least one value is selected
def enforce_selection(event, checkbox_group):
    if not event.new:
        checkbox_group.value = [event.old[0]]  # Restore the first previously selected value

checkbox_group1.param.watch(lambda event: enforce_selection(event, checkbox_group1), 'value')
checkbox_group2.param.watch(lambda event: enforce_selection(event, checkbox_group2), 'value')
checkbox_group3.param.watch(lambda event: enforce_selection(event, checkbox_group3), 'value')


plot = pn.bind(create_sankey,df,checkbox_group1,checkbox_group2,checkbox_group3,lst_of_cols)
card_width = 320

checkbox_layout = pn.Row(
    pn.Card(
        pn.Column(
            checkbox_group1
        ),
        title="Choose Mode", width=300, collapsed=False
    ),
    pn.Card(
        pn.Column(
            checkbox_group2
        ),
        title="Choose Course", width=300, collapsed=False
    ),
    pn.Card(
        pn.Column(
            checkbox_group3
        ),
        title="Choose Output", width=300, collapsed=False
    ),
)

# Main layout with checkboxes at the bottom
layout = pn.template.FastListTemplate(
    title="Graduation Pathways",
    theme_toggle=False,
    main=[
        pn.Tabs(
            ("Dashboard", pn.Row(checkbox_layout,plot)),
            active=0  
        )
    ],
    header_background='#a93226'
).servable()

layout.show()