# Big Data Dashboarding
---

In this notebook we are going to put our visualizations together into a dashboard

We will also introduce a new tool from the HoloViz suite:

<img src="images/panel_logo.png" width="100">

Panel is a high-level app and dashboarding solution for Python

## Lets reconnect to our Dask Cluster and load our dataset 

In [None]:
import param
import panel as pn

pn.extension()

In [None]:
import dask_gateway
import dask.dataframe as dd
import pandas as pd

import hvplot.dask
import hvplot.pandas
gateway = dask_gateway.Gateway()

In [None]:
class LaunchDaskCluster(param.Parameterized):

    workers = pn.widgets.IntRangeSlider(name='Number of Workers', start=1, end=10, value=(1, 5), step=1)
    
    launch_dask = pn.widgets.Button(name='Launch Dask')
    dask_status = pn.widgets.StaticText(name='Dask Status', value='Not Connected')
    cluster = None
    client = None
    ready = param.Boolean(default=False, precedence=-1)
    
    def __init__(self, **params):
        super().__init__(**params)
        self.launch_dask.on_click(self.dask_launcher)
    
    @param.output(param.String)
    def output(self):
        return self.cluster.name

    def dask_launcher(self, event):
        if len(running_clusters := gateway.list_clusters())>0:
            self.dask_status.value = "Found existing dask cluster, connecting and rescaling"
            self.cluster = gateway.connect(running_clusters[0].name)
            self.cluster.adapt(self.workers.value[0], self.workers.value[1])
        else:
            self.dask_status.value = "Launching new dask cluster"
            self.cluster = gateway.new_cluster(conda_environment="pycon2023/pycon2023-tutorial", profile="Medium Worker")
            self.cluster.adapt(self.workers.value[0], self.workers.value[1])
        
        self.client = self.cluster.get_client()
        self.dask_status.value = "Waiting for at least 1 worker"
        self.client.wait_for_workers(1)
        self.dask_status.value = f"Cluster Ready - {self.client.dashboard_link}"
        #self.ready.constant = True

    def panel(self):
        return pn.Column(
            self.workers, 
            self.launch_dask,
            self.dask_status,
        )

stage1 = LaunchDaskCluster()
stage1.panel()

In [None]:
class Dashboard(param.Parameterized):
    
    years = pn.widgets.IntRangeSlider(name='Years', start=2003, end=2022, value=(2018, 2022), step=1)
    method = pn.widgets.RadioButtonGroup(name='Method', options=['min', 'mean', 'max'])
    field = pn.widgets.RadioButtonGroup(name='Delay Type', options=['DEP_DELAY', 'ARR_DELAY'])
    groupby = pn.widgets.RadioButtonGroup(name='GroupBy', options=['YEAR', 'MONTH', 'DAY_OF_MONTH', 'OP_CARRIER'], value='MONTH')
    cluster_name = param.String()
    
    url = "gcs://quansight-datasets/airline-ontime-performance/sorted/full_dataset.parquet"
    columns = [
        'YEAR', 'MONTH', 'DAY_OF_MONTH', 'DAY_OF_WEEK', 'FL_DATE', 'OP_CARRIER', 
        'TAIL_NUM', 'OP_CARRIER_FL_NUM', 'ORIGIN', 'DEST', 'CRS_DEP_TIME', 
        'DEP_TIME', 'DEP_DELAY', 'ARR_TIME', 'ARR_DELAY', 'CANCELLED', 
        'CANCELLATION_CODE', 'DIVERTED', 'AIR_TIME', 'FLIGHTS', 'DISTANCE',
        'CARRIER_DELAY', 'WEATHER_DELAY', 'NAS_DELAY', 'SECURITY_DELAY', 
        'LATE_AIRCRAFT_DELAY', 'DIV_ARR_DELAY'
    ]
    flights = dd.read_parquet(url, columns=columns)
    grouped_data = None
        
    @param.depends('cluster_name')
    def dask_dashboard(self):
        self.cluster = gateway.connect(self.cluster_name)
        self.client = self.cluster.get_client()
        return pn.pane.HTML(f"""
        <iframe width="800" height="800" src="{self.client.dashboard_link}"
        frameborder="0" scrolling="no" marginheight="0" marginwidth="0"></iframe>
        """)
    
    @param.depends('groupby', 'method', 'field', watch=True)        
    def recompute(self):
        self.grouped_data = (
            self.flights[
                    (self.flights['YEAR'] >= self.years.value[0]) &
                    (self.flights['YEAR'] <= self.years.value[1])
                ]
                .groupby(self.groupby.value)[self.field.value]
                .agg(how=self.method.value)
        )
        
    @param.depends('grouped_data')
    def plot_data(self):
        if self.grouped_data:
            self.grouped_data.hvplot()

    def panel(self):
        return pn.Column(
            self.years,
            pn.Row(self.groupby, self.field, self.method),
            self.plot_data,
            pn.layout.Divider(),
            self.dask_dashboard,
        )
    

In [None]:
stage2 = Dashboard(cluster_name="dev.419198172076424b887157c779def1a3")
stage2.panel()

In [None]:
pn.pane.HTML(f"""
        <iframe width="800" height="800" src="{stage1.client.dashboard_link}"
        frameborder="0" scrolling="no" marginheight="0" marginwidth="0"></iframe>
        """)

In [None]:
gateway.list_clusters()