# Visualizing Aggregated Analytics

We used spark to get the average velocity, average groupsize, and other nice analytics
from the Spark EMR cluster.

In [1]:
import pandas as pd
import numpy as np
import os

from bokeh.plotting import figure, show, output_file
from bokeh.models import ColumnDataSource, HoverTool

In [2]:
df = pd.read_csv('/Users/wfu/data/full-20mins.csv')

In [3]:
df[:3]

Unnamed: 0.1,Unnamed: 0,window,avg_num_people,avg_group_size,avg_velocity,x_centers,y_centers
0,0,"Row(start=datetime.datetime(2019, 4, 10, 0, 20...",0.004167,1.0,0.014,"[0.449, 0.709, 0.691, 0.718, 0.718, 0.455, 0.4...","[0.335, 0.339, 0.362, 0.459, 0.466, 0.731, 0.7..."
1,1,"Row(start=datetime.datetime(2019, 4, 10, 0, 40...",0.044167,1.0,0.018674,"[0.694, 0.691, 0.693, 0.7, 0.688, 0.688, 0.685...","[0.334, 0.337, 0.331, 0.33, 0.334, 0.329, 0.33..."
2,2,"Row(start=datetime.datetime(2019, 4, 10, 1, 0)...",0.044583,1.0,0.02413,"[0.853, 0.846, 0.837, 0.83, 0.823, 0.815, 0.80...","[0.53, 0.532, 0.534, 0.539, 0.542, 0.546, 0.55..."


Lets first plot the average velocity.
Currently the window is in the format of a string.
We want to make it in the units of datetime, where datetime
is when the windows starts.

In [4]:
df.window[0]

'Row(start=datetime.datetime(2019, 4, 10, 0, 20), end=datetime.datetime(2019, 4, 10, 0, 40))'

Let's take off the row and the parens, and `start=` and `end=`.

In [5]:
start, end = df.window[0][4:-1].replace('start=', '').replace('end=', '').split(', datetime.')
start = start.replace("datetime.", "")
end = end.replace('datetime.', '')
print(start)
print(end)

datetime(2019, 4, 10, 0, 20)
datetime(2019, 4, 10, 0, 40)


In [6]:
from datetime import datetime
def filter_window_row(row):
    start, end = row[4:-1].replace('start=', '').replace('end=', '').split(', datetime.')
    start = start.replace("datetime.", "")
    end = end.replace('datetime.', '')
    return eval(start)

In [7]:
df['start'] = df.window.map(filter_window_row)
df['start_string'] = df['start'].map(lambda timestamp: timestamp.strftime("%b %d %Y %H:%M:%S %p"))
df.index.name = 'index'

In [8]:
df[:3]

Unnamed: 0_level_0,Unnamed: 0,window,avg_num_people,avg_group_size,avg_velocity,x_centers,y_centers,start,start_string
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0,0,"Row(start=datetime.datetime(2019, 4, 10, 0, 20...",0.004167,1.0,0.014,"[0.449, 0.709, 0.691, 0.718, 0.718, 0.455, 0.4...","[0.335, 0.339, 0.362, 0.459, 0.466, 0.731, 0.7...",2019-04-10 00:20:00,Apr 10 2019 00:20:00 AM
1,1,"Row(start=datetime.datetime(2019, 4, 10, 0, 40...",0.044167,1.0,0.018674,"[0.694, 0.691, 0.693, 0.7, 0.688, 0.688, 0.685...","[0.334, 0.337, 0.331, 0.33, 0.334, 0.329, 0.33...",2019-04-10 00:40:00,Apr 10 2019 00:40:00 AM
2,2,"Row(start=datetime.datetime(2019, 4, 10, 1, 0)...",0.044583,1.0,0.02413,"[0.853, 0.846, 0.837, 0.83, 0.823, 0.815, 0.80...","[0.53, 0.532, 0.534, 0.539, 0.542, 0.546, 0.55...",2019-04-10 01:00:00,Apr 10 2019 01:00:00 AM


These are now string with the datetime object.

In [9]:
HEIGHT = 360
WIDTH = 640

In [10]:
# Bokeh Library
from bokeh.io import output_file, output_notebook
from bokeh.models.widgets import Tabs, Panel
from bokeh.plotting import show

# Output to file
# output_notebook()

In [11]:
hover = HoverTool()
hover.tooltips = [('Timestamp', '@start_string'),
                  ('Average Number of People', '@avg_num_people'),]

num_people_graph = figure(title="Number of People in Science Center Plaza",
    plot_height=500, 
    plot_width=900,
    # y_range=(0, 20),
    match_aspect=True,
    tools=[hover, "pan,reset,wheel_zoom"])

num_people_graph.vbar(x='index',
       top='avg_num_people',
       width=0.9,
       color='red',
       source=ColumnDataSource(df))

num_people_graph.xaxis.axis_label = "Time Window (Hover for Timestamp)"
num_people_graph.yaxis.axis_label = "Average Number of People in Camera"

# avg_velocity_graph = figure(title="Average Velocity",
#     plot_height=500, 
#     plot_width=900,
#     match_aspect=True,
#     y_range=(0, 1),
#     tools=[hover, "pan,reset,wheel_zoom"])

# avg_velocity_graph.vbar(x='index',
#        top='avg_velocity',
#        width=0.9,
#        color='blue',
#        source=ColumnDataSource(df))

# avg_group_size_graph = figure(title="Stuff",
#     plot_height=500, 
#     plot_width=900,
#     match_aspect=True,
#     y_range=(0, 4),
#     tools=[hover, "pan,reset,wheel_zoom"])

# avg_group_size_graph.vbar(x='index',
#        top='avg_group_size',
#        width=0.9,
#        color='green',
#        source=ColumnDataSource(df))


panel1 = Panel(child=num_people_graph, title='Average Number of People')
# panel2 = Panel(child=avg_velocity_graph, title='Average Velocity')
# panel3 = Panel(child=avg_group_size_graph, title='Average Group Size')

# Assign the panels to Tabs
# tabs = Tabs(tabs=[panel1, panel2, panel3])
tabs = Tabs(tabs=[panel1])

In [12]:
show(tabs)