# Explanatory Data Visualization
**Presentation Visualizations and Dashboards**

Note: goal is to show ease of itegrating RAPIDS and GPUS into production dashboard presentations

## Overview and Requirements
Super short version of intro notebook and restate requirments

## Import

In [None]:
import cudf
import plotly.graph_objects as go
import plotly.express as px
from jupyter_dash import JupyterDash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import pandas as pd
import cugraph
import cuxfilter
from pathlib import Path

DATA_DIR = Path("../data")
FILENAME = Path("modified_trips.parquet")

In [None]:
trips = cudf.read_parquet(DATA_DIR / FILENAME)

In [None]:
trips['time_of_day'] = 0 #day
trips.loc[trips.query('hour>19 or hour<8').index, 'time_of_day'] = 1 #night

In [None]:
# create a day_type string map
day_type_map = {0:'weekday', 1:'weekend', '':'all'}
time_of_day_map = {0:'day(8am-8pm)', 1:'night(8pm-8am)', '':'all'}

## Summary of interesting analytics findings


### Mock Sketch

<img src="../images/notebook_04_dashboard_sketch.jpg" />

### CuXfilter Mockup

In [None]:
cux_df = cuxfilter.DataFrame.from_dataframe(trips)

In [None]:
charts = [
    cuxfilter.charts.scatter(x='x', y='y', tile_provider='CARTODBPOSITRON',
                           point_size=3, pixel_shade_type='linear', pixel_spread='spread',
                          title='All Trips'),
    cuxfilter.charts.bar('all_time_week', title='Rides per week'),
    cuxfilter.charts.multi_select('day_type', label_map=day_type_map),
    cuxfilter.charts.multi_select('hour'),
]

d = cux_df.dashboard(charts, layout=cuxfilter.layouts.feature_and_base, theme=cuxfilter.themes.rapids)

<img src="../images/notebook_04_dashboard_1.png" />

### Plotly Dashboard with Real-time Page Rank Compute

In [None]:
stations = cudf.read_csv(DATA_DIR / "stations.csv")

station_names = trips[['from_station_id', 'from_station_name']].drop_duplicates()
station_names.columns = ['station_id', 'station_name']

total_trips = (trips.groupby('from_station_id').size() + trips.groupby('to_station_id').size()).reset_index()
total_trips.columns = ['station_id', 'total_trips']
stations = stations.merge(total_trips, on='station_id')
stations = stations.merge(station_names, on='station_id')

In [None]:
stations.head()

#### Function to compute page rank real time

In [None]:
def calculate_page_rank(data):
    G = cugraph.Graph()
    G.from_cudf_edgelist(data, source='from_station_id', destination='to_station_id')
    data_page = cugraph.pagerank(G)
    return data_page.merge(stations, left_on='vertex', right_on='station_id').drop(columns=['vertex'])

#### Function to generate plots for the dashboard using plotly express

In [None]:
def get_pagerank_plot(data):
    df = calculate_page_rank(data).to_pandas()
    g = px.scatter_mapbox(df, lat="lat", lon="lon", color="pagerank", size="total_trips",
                             hover_data=["station_name"], mapbox_style="carto-positron",
                  color_continuous_scale=px.colors.cyclical.Edge_r, size_max=15, zoom=10)
    g.layout['uirevision'] = True
    return g

def get_week_bar_chart(data):
    all_time_week_df = data.groupby('all_time_week').size().reset_index()
    all_time_week_df.columns = ['week', 'trips']
    g = px.bar(all_time_week_df.to_pandas(), x="week", y='trips', template=dict(layout={'selectdirection': 'h',}))
    g.layout['dragmode']='select'
    g.layout['uirevision'] = True
    return g

#### Describe the app layout and interaction callbacks

In [None]:
external_stylesheets = ['https://raw.githubusercontent.com/plotly/dash-sample-apps/master/apps/dash-oil-and-gas/assets/s1.css']

app = JupyterDash(__name__, external_stylesheets=external_stylesheets)

app.layout = html.Div([
    html.Div([
       html.H1(["Plotly Dashboard - Chicago Bike Trips (2013-2017)"]) 
    ]),
    html.Div([
        html.H3(["Total Number of Trips"]),
        dcc.Loading(
            dcc.Graph(
                id='number',
                figure= go.Figure(go.Indicator(
                    mode="number",
                    value=trips.shape[0]
                )),
                style={'margin-bottom': '50px', 'height': '250px'},
            ),
            color= '#b0bec5',
        ),
        html.H3(["Day of Week"]),
        dcc.Dropdown(
            id='day', clearable=False,
            value='', options=[
                {'label': day_type_map[c], 'value': c}
                for c in day_type_map
        ]),
        html.H3(["Time of Day"]),
        dcc.Dropdown(
            id='time', clearable=False,
            value='', options=[
                {'label': time_of_day_map[c], 'value': c}
                for c in time_of_day_map
        ])
    ], style={'width': '25%','height': '100%', 'display': 'inline-block', 'borderBottom': 'thin lightgrey solid',
        'backgroundColor': 'rgb(250, 250, 250)',
        'padding': '10px 5px', 'float': 'left'}),
    html.Div([
        html.H3(["Page Rank on Stations(size-total trips)"]),
            dcc.Graph(id='pagerank_plot', ),
    ], style={'width': '70%', 'height':'50%', 'display': 'inline-block', 'float': 'right'}),
    html.Div([
        html.H3(["Trips per week(all time)",]),
        dcc.Graph(id='all_time_week_bar',),
    ], style={'width': '70%', 'height':'40%', 'display': 'inline-block','float': 'right'}),
], style=dict(color= 'rgb(67 86 114)'))

In [None]:
def bar_selection_to_query(selection, column):
    """
    Compute pandas query expression string for selection callback data
    Args:
        selection: selectedData dictionary from Dash callback on a bar trace
        column: Name of the column that the selected bar chart is based on
    Returns:
        String containing a query expression compatible with DataFrame.query. This
        expression will filter the input DataFrame to contain only those rows that
        are contained in the selection.
    """
    point_inds = [p['label'] for p in selection['points']]
    xmin = min(point_inds)  # bin_edges[min(point_inds)]
    xmax = max(point_inds) + 1  # bin_edges[max(point_inds) + 1]
    xmin_op = "<="
    xmax_op = "<="
    return f"{xmin} {xmin_op} {column} and {column} {xmax_op} {xmax}"

# Define callback to update graph
@app.callback(
    [
        Output('pagerank_plot', 'figure'),
        Output('all_time_week_bar', 'figure'),
        Output('number', 'figure')
    ],
    [
        Input("day", "value"), Input("time", "value"),
        Input("all_time_week_bar", "selectedData")
    ]
)
def update_figure(day, time, selected_weeks):
    query = ['day_type == '+str(day) if day != "" else "", 'time_of_day =='+str(time) if time != "" else ""]
    query_str = ' and '.join([x for x in query if x != ""])
    
    data = trips
    if len(query_str) > 0:
        data = trips.query(query_str)

    week_bar_chart = get_week_bar_chart(data)
    
    if selected_weeks is not None:
        query.append(bar_selection_to_query(selected_weeks, 'all_time_week'))
        query_str = ' and '.join([x for x in query if x != ""])
        if len(query) > 0:
            data = trips.query(query_str)
    
    pagerank_plot = get_pagerank_plot(data)
    
    number = go.Figure(go.Indicator(
                mode="number",
                value=data.shape[0]
            ))

    return pagerank_plot, week_bar_chart, number


### Run the plotly dash app within jupyter environment using jupyter_dash

In [None]:
#if in a jupyterlab, you can run app.run_server(mode="jupyterlab")
#for a seperate browser tab, run app.run_server()
app.run_server(mode="inline")