In [327]:
%matplotlib inline
from app.helper import (load_cities, load_topics)
import os
import pandas as pd
output_notebook()

In [243]:
base_path = os.path.abspath(os.path.curdir)
descriptive_path = os.path.join(base_path, 'results', 'descriptive')

topics = load_topics()
years = list(range(2012, 2018))

file = os.path.join(descriptive_path, 'occurrance', 'yearly')
df = pd.read_pickle(file).loc[:, 'count'].loc[:, years]

In [244]:
city_topics_sum_df = df.sum(axis=1).unstack(-2).fillna(0).loc[topics]

In [246]:
city_topics_pct = pd.DataFrame( (
    city_topics_sum_df.values * city_topics_sum_df.sum().sum() /
    city_topics_sum_df.sum(0).values.reshape(1, -1) /
    city_topics_sum_df.sum(1).values.reshape(-1, 1)
), columns = city_topics_sum_df.columns, index = city_topics_sum_df.index)

In [247]:
city_topics = pd.DataFrame( (
    city_topics_sum_df.values * 100 /
    city_topics_sum_df.sum().values.reshape(1, -1)
), columns = city_topics_sum_df.columns, index = city_topics_sum_df.index)

In [366]:
from bokeh.io import show, output_notebook
from bokeh.models import ColumnDataSource
from bokeh.palettes import Spectral10
from bokeh.plotting import figure
from bokeh.layouts import row
from bokeh.transform import factor_cmap

def popular_top5(city):
    city_top5 = city_topics_pct.loc[:, city].sort_values()[-5:]
    topics = list(city_top5.index)
    pct = city_top5.values
    source = ColumnDataSource(data=dict(topics=topics, pct=pct))
    
    p = figure(y_range=topics, plot_height=200, plot_width=250, toolbar_location=None, title=city)
    p.hbar(y='topics',right='pct', height=0.8, source=source)

    p.xgrid.grid_line_color = None
    p.legend.orientation = "horizontal"
    p.legend.location = "top_center"
    return p

cities = ['Boston', 'Chicago', 'New York', 'San Francisco']

plots = []
for city in cities:
    plots.append(popular_top5(city))

output_notebook()
show(row(plots))

In [293]:
file = os.path.join(descriptive_path, 'occurrance', 'boston_social')

In [368]:
from bokeh.transform import jitter


DAYS = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']

def load_city_topic(city, topic):
    base_path = os.path.abspath(os.path.curdir)
    data_path = os.path.join(base_path, 'results', 'descriptive', 'occurrance', 'city_topic')
    return pd.read_pickle(os.path.join(data_path, f'{city}_{topic}'))

def event_occurrance(city, topic, years):
    df = load_city_topic(city, topic)
    df = df.loc[df.year.isin(years), ['local_time', 'weekday']]
    data = dict(day=df['weekday'] + 0.5, time=df['local_time'].dt.hour 
                + df['local_time'].dt.minute / 60 )
    source = ColumnDataSource(data)

    p = figure(plot_width=800, plot_height=300, y_range=DAYS, x_range=[6, 24],
           title="Events Occurance by Time of Day (US/EST) 2015—2017")
    p.circle(x=jitter('time', width=1),
             y=jitter('day', width=0.6, range=p.y_range),  
             source=source, alpha=0.1)
    p.ygrid.grid_line_color = None
    return p

show(event_occurrance("Boston", "social", [2015, 2016, 2017]))