In [23]:
import pandas as pd
import numpy as np
import altair as alt

In [24]:
## Since we'll have a lot of rows:
alt.data_transformers.disable_max_rows() # Can lead to notebook becoming huge (filesize)
# alt.data_transformers.enable('json') # Will save data as json so notebook isn't huge

DataTransformerRegistry.enable('default')

In [19]:
## Load data
weather_data_url = 'https://raw.githubusercontent.com/cse512-21s/A3-Lost-transit-riders-in-the-post-pandemic-era-who-where-and-why/master/data/pronto/weather_cleaned.csv?token=ACRLTBDNQCPMUYDZ2VEPXPDAUKTEQ'
trips_data_url = 'https://raw.githubusercontent.com/cse512-21s/A3-Lost-transit-riders-in-the-post-pandemic-era-who-where-and-why/master/data/pronto/trips_cleaned_reduced.csv?token=ACRLTBCS3CJM6N3LO6JYC6TAUKVLE'
weather = pd.read_csv(weather_data_url)
trips = pd.read_csv(trips_data_url)

In [20]:
## Select the columns for which we'll want to compute averages
non_numeric_cols = ['Date','Events','year','month','season','dayofweek','workdayofweek']
mean_cols = weather.columns[weather.columns.str.contains(pat='Mean')]

In [21]:
# Create 'dayofweek' so we can compare the trips dataset with the weather dataset
trips['dayofweek'] = trips['start_weekday']

In [22]:
#### Create cross-fitlers
selector_variables = ['year','season','dayofweek','start_wokdayhour']
brushes = []
highlighters = []
backgrounds = []

for selector_var in selector_variables:
    
    # Set up the brush
    brush =  alt.selection(type='multi', encodings=['x'])
    # Set up the bar chart for selector
    bars = alt.Chart(trips).mark_bar().encode(
        x=alt.X('{}:O'.format(selector_var)),
        y='count()'
    ).properties(
        width=160,
        height=130
    )
    # Set gray background until selected
    background = bars.encode(color=alt.value('#ddd')).add_selection(brush)
    # Set up highlighter
    highlight = bars.transform_filter(brush)
    
    # Add everything to their respective lists
    brushes.append(brush)
    highlighters.append(highlight)
    backgrounds.append(background)

# Make every selector plot listen to the brush for every other selector plot
all_idx = np.arange(len(selector_variables))
for idx in all_idx:
    other_idx = np.setdiff1d(all_idx,np.array([idx]))
    for idy in other_idx:
        highlighters[idx] = highlighters[idx].transform_filter(brushes[idy])   

# Create selector plots by layering (gray) background and (colored) highlighter
selectors = []
for idx in all_idx:
    selectors.append(alt.layer(backgrounds[idx], highlighters[idx]))

In [25]:
#### Defint the wather metrics plot
metrics = ['Temperature_F','Humidity','Visibility_Miles']
charts = []
for idx,metric in enumerate(metrics):
    errorBars = alt.Chart(weather).mark_bar(opacity=0.4).encode(
        alt.Y("mean(Min_{}):Q".format(metric), title='{}'.format(metric)),
        alt.Y2("mean(Max_{}):Q".format(metric))
    )
    # Apply filters from brushes
    for id_brush,brush in enumerate(brushes):
        if selector_variables[id_brush] != 'start_wokdayhour':
            errorBars = errorBars.transform_filter(brush)

    meanLine = alt.Chart(weather).mark_rule(color='red').encode(
        y = alt.Y("mean(Mean_{}):Q".format(metric))
    )
    # Apply filters from brushes
    for id_brush,brush in enumerate(brushes):
        if selector_variables[id_brush] != 'start_wokdayhour':
            meanLine = meanLine.transform_filter(brush)
    
    charts.append(errorBars + meanLine)

In the interactive charts below, CLICK to filter data corresponding to a particular year/season/day/hour. Use SHIFT+CLICK to select multiple options. Each selection will apply the filter to all other barcharts. This way the user has information about how the data changes according to each filtering step. 

All the filters are also applied to the weather charts on the right that tell us the average temperature, humidity, visibility for the selected time range. In the final visualization, these filters can be used to update other complicated charts such as maps or chord diagrams.

In [26]:
selector_charts = alt.vconcat(alt.hconcat(selectors[0],selectors[1]),
                              alt.hconcat(selectors[2], selectors[3]))

metrics_charts = alt.hconcat(*charts)

alt.hconcat(selector_charts, metrics_charts)