In [1]:
%load_ext autoreload
%autoreload 2

# import modules and load data required for this presentation

import os
from pathlib import Path

from ipywidgets import interact, fixed, HBox, Output
from IPython.display import display
import pandas as pd
import plotly.express as px
import plotly.graph_objs as go

from melbviz.pedestrian import PedestrianDataset
from melbviz.config import DATA_PATH, COUNTS_CSV_PATH, SENSOR_CSV_PATH


data = PedestrianDataset.load(COUNTS_CSV_PATH, sensor_csv_path=SENSOR_CSV_PATH)

<center>
    <h1>Interactive Data Visualisation with Python</h1>
    <br><br><br>
    <img src="img/title_viz.png" width="500"/>
    <br>
    <img src="img/python_logo.svg" width="300"/>
</center>

In [2]:
%%html

<style>
.people {
    display: flex;
    justify-content: space-between;
    text-align: center;
}
.people div {
    margin: 2em;
    
}
.people img {
    box-shadow: 1px 1px 5px 1px black;
    margin: 0 auto;
}
</style>

## Who are we?

<div class="people">
    <div>
        <img src="img/ned.png"/>
        <h4>Ned Letcher</h4>
    </div>
    <div>
        <img src="img/ellen.png"/>
        <h4>Ellen Macpherson</h4>
    </div>
    <div>
        <img src="img/harmeet.png"/>
        <h4>Harmeet Kaur Sokhi</h4>
    </div>
</div>

<center>
    <img src="img/thoughtworks.png"/>
</center>

In [35]:
px.scatter_mapbox?

## Where are we?

In [37]:
px.scatter_mapbox(
    where_df, 
    lat=[-37.8136], 
    lon=[144.9631], 
    hover_name=["Melbourne"], 
    size=[15], 
    zoom=1
)

## What is data visualisation?

* Graphic representation of data

* Visually encoded information

* Reveals patterns, trends, relationships

* Used to discover and communicate insights

<center>
    <h3>Examples of Visualisations</h3>
    <img src="img/plot_types.svg" width="800"/>
</center>

In [None]:
# prep data for the next section

pedestrian_data = data.filter(year=2019, month="March", sensor="Southern Cross Station")
pedestrian_df = pedestrian_data.df

def plot_sensor_traffic(df):
    fig_func = data.get_plot_func("sensor_traffic")
    fig = fig_func(df, width=1500, height=500)
    fig.update_layout(font_size=18, margin_r=170)
    return fig

## Why visualise data?

_Tabular representations of datasets are difficult to interpret_

In [None]:
pedestrian_df.head(15)

## Why visualise data?

_Visual representations help patterns jump out_

In [None]:
plot_sensor_traffic(pedestrian_df)

## Why visualise data?

_Summary statistics mislead_

* mean
* median
* standard deviation
* correlations between variables

In [None]:
def object_as_widget(obj):
    out = Output()
    with out:
        display(obj)
    return out


def show_datasaurus(datasauraus_df, column):
    if column == "all":
        df = datasauraus_df
    else:
        df = datasauraus_df[datasauraus_df["dataset"] == column]
    stats_df = pd.DataFrame({
        "statistic": ["x_mean", "y_mean", "x_std", "y_std", "corr"],
        "value": [
            df["x"].mean(), df["y"].mean(), df["x"].std(), df["y"].std(), df["x"].corr(df["y"])
        ],
    })
    if column == "all":
        fig = px.scatter(datasauraus_df, facet_col_wrap=5, facet_col="dataset", x="x", y="y")
        fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
    else:
        fig = px.scatter(df, x="x", y="y")
    fig.update_layout(
        margin={"r": 10, "t": 40, "b": 10},
        font_size=18
    )
    return HBox([go.FigureWidget(fig), object_as_widget(stats_df)])


def make_datasaurus(path="data/DatasaurusDozen.tsv"):
    datasauraus_df = pd.read_csv(path, delimiter="\t")
    columns = list(datasauraus_df["dataset"].unique())
    columns.append("all")
    widget = interact(show_datasaurus, datasauraus_df=fixed(datasauraus_df), column=columns)
    return widget

## The Datasaurus

https://www.autodesk.com/research/publications/same-stats-different-graphs

In [None]:
make_datasaurus();

## Why visualise data?

_Human visual system is a powerful tool_

<center>
    <img src="img/horizon_plot.jpg" style="height:80vh"/>
    <small><a href="https://twitter.com/xangregg/status/883763762381152256">@xangregg</a></small>
</center>

## Python Data Viz Libraries

<center>
    <img src="img/python_viz_landscape.svg" style="height:90vh"/>
</center>

<center>
    <img src="img/python_viz_libs.svg" style="height:90vh"/>
</center>

## Why interactive visualisations?

* more ergonomic data analysis

* faster time to insights

* self-service insights (eg dashboards)

* agile prototypes

## Why _interactive_ visualisations?

In [None]:
data.filter(year=2019, month="March", sensor="Southern Cross Station").plot("sensor_traffic")

## Why interactive visualisations?

In [None]:
@interact(year=reversed(data.years), month=data.months, sensor=data.sensors)
def plot(year, month, sensor):
    return data.filter(year=year, month=month, sensor=sensor).plot("sensor_traffic")