In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import bokeh
from bokeh.plotting import figure, gridplot, output_file, show, output_notebook
from bokeh.models import ColumnDataSource, HoverTool, CustomJS, BoxZoomTool, BoxSelectTool, ResetTool
output_notebook(resources=bokeh.resources.INLINE)
%matplotlib inline

# Data story example

In [None]:
d = pd.read_csv("data/mbta-locations.csv")
d.columns=[u'message_id', u'service_date_id', u'vehicle', u'timestamp_gmt',
       u'latitude', u'longitude', u'earliness_in_minutes',
       u'odometer_in_miles']
d.shape

In [None]:
d.head()

In [None]:
vehicles = np.unique(d["vehicle"])
len(vehicles), vehicles[:100] 

In [None]:
plt.scatter(d["latitude"], d["longitude"], s=1, alpha=0.2)

In [None]:
d=d[d["latitude"]>40]
d.shape

In [None]:
plt.scatter(d["latitude"], d["longitude"], s=1, alpha=0.2)

In [None]:
v = d[d["vehicle"]==322]
print v.shape
plt.figure(figsize=(15,10))
plt.scatter(v["latitude"], v["longitude"],s=10, color="red", alpha=0.5)
plt.scatter(d["latitude"], d["longitude"], s=1, color="gray", alpha=0.2)

In [None]:
d_lowlo = d[d["longitude"]<-71.4]
print d_lowlo.shape
plt.scatter(d_lowlo["latitude"], d_lowlo["longitude"],s=10,alpha=0.5)

In [None]:
np.unique(d_lowlo["vehicle"])

plot routes for some of the above vehicles

- weird single point at 42.15, -71.5

In [None]:
v = d[d["vehicle"]==545]
print v.shape
plt.scatter(v["latitude"], v["longitude"],s=2,alpha=0.5)

### Undersample to show some points

- try with 10k and 5k points

In [None]:
p = np.random.permutation(len(d))[:5000]

In [None]:
plt.figure(figsize=(15,10))
plt.scatter(d["latitude"].iloc[p], d["longitude"].iloc[p], s=1, alpha=0.2)

In [None]:
s1 = figure(width=650, plot_height=450, title="all mbta lines")

s1.circle(d["latitude"].iloc[p], d["longitude"].iloc[p], size=1, color="navy", alpha=0.5)
show(s1)

In [None]:
di = d.iloc[p]


In [None]:
dp = d.iloc[p][d["earliness_in_minutes"]>=0]
dn = d.iloc[p][d["earliness_in_minutes"]<0]

source_p = ColumnDataSource(
        data=dict(
            x=dp["latitude"],
            y=dp["longitude"],
            em=dp["earliness_in_minutes"]
        )
    )

source_n = ColumnDataSource(
        data=dict(
            x=dn["latitude"],
            y=dn["longitude"],
            em=dn["earliness_in_minutes"]
        )
    )

h = HoverTool(        tooltips=[
            ("index", "$index"),
            ("(lat, lon)", "($x, $y)"),
            ("earliness", "@em")
        
        ])
s1 = figure(width=750, plot_height=450, title="pos and neg earliness", 
            tools=[h, ResetTool(), BoxZoomTool()])
s1.circle('x','y', source=source_p, size=3, color="navy", alpha=.1)

s1.circle('x', 'y', source=source_n, size=3, color="red", alpha=.1)

show(s1)

In [None]:
di = d.iloc[p]
source = ColumnDataSource(data=dict(x=di["latitude"], y=di["longitude"]))
import bokeh 
def update(earliness_min):
    dii = di[di["earliness_in_minutes"]>=earliness_min]
    print "min", earliness_min, "nb of datapoints", len(dii)
    source.data['x'] = dii["latitude"]
    source.data['y'] = dii["longitude"]
    bokeh.io.push_notebook()
    
s1 = figure(width=750, plot_height=450, title="filter by earliness")
s1.circle('x','y', source=source, size=1, color="navy", alpha=.1)

show(s1)

In [None]:
from IPython.html.widgets import interact
interact(update, earliness_min=(np.min(d["earliness_in_minutes"]), np.max(d["earliness_in_minutes"])))