In [1]:
import pandas as pd
import datetime
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import bqplot as bq
import folium
import branca
import ipywidgets
from ipywidgets import interact
import vincent
import json
#bokeh
from bokeh.plotting import figure, output_file 
from bokeh.io import show,output_notebook
from bokeh.palettes import Plasma
from bokeh.models import ColumnDataSource
from bokeh.transform import factor_cmap
from bokeh.models import FuncTickFormatter
from folium import plugins
from folium.plugins import HeatMap

In [2]:
data = pd.read_csv("201712-citibike-tripdata_cleaned.csv")

## Trips density of each station based on arrival and departure by hour.

In [3]:
start_stations = dict(zip(data["start station name"], data["start station id"]))
start_stations["None"] = None
end_stations = dict(zip(data["end station name"], data["end station id"]))
end_stations["None"]=None
hour = dict(zip(data["hour"]+1,data["hour"]))
map_options = {"Mapbox Bright":"Mapbox Bright","Mapbox Control Room":"Mapbox Control Room","OpenStreetMap":"OpenStreetMap","Mapbox Bright":"Mapbox Bright","Stamen Toner":"Stamen Toner","Stamen Terrain":"Stamen Terrain","CartoDBDark":"CartoDB dark_matter"}

In [4]:
def trip_by_hour(hour):
    loc1 = data.groupby("start station id").first()
    location = loc1.loc[:,["start station longitude","start station latitude","start station name"]]
    hour = data[data["hour"]==hour]
    departure=hour.groupby("start station id").count().iloc[:,[0]]
    departure.columns=["Counts of Departure"]
    #print(type(departure))
    arrival= hour.groupby("end station id").count().iloc[:,[0]]
    arrival.columns=["Counts of Arrival"]
    #print(type(arrival))
    #joining arrival and departure
    trip = departure.join(location).join(arrival)
    return plot_station_counts(trip)
    #print(departure)

In [5]:
def plot_station_counts(trip_counts):
    # generate a new map
    folium_map = folium.Map(location=[40.738, -73.98],
                            tiles="Stamen Terrain",
                            zoom_start=13,
                            width='50%')

    # for each row in the data, add a cicle marker
    for index, row in trip_counts.iterrows():
        # calculate net departures
        net_departures = (row["Counts of Departure"]-row["Counts of Arrival"])
        
        # generate the popup message that is shown on click.
        popup_text = "{}<br> total departures: {}<br> total arrivals: {}<br> net departures: {}"
        popup_text = popup_text.format(row["start station name"],
                          row["Counts of Arrival"],
                          row["Counts of Departure"],
                          net_departures)
        
        # radius of circles
        radius = net_departures/20
        
        # choose the color of the marker
        if net_departures>0:
            #color="#FFCE00" # orange
             color="#007849" # green
            #color="#E37222" # tangerine
        else:
             color="#0375B4" # blue
            # color="#FFCE00" # yellow            
            #color="#0A8A9F" # teal
        
        # add marker to the map
        folium.CircleMarker(location=(row["start station latitude"],
                                      row["start station longitude"]),
                            radius=radius,
                            color=color,
                            popup=popup_text,
                            fill=True).add_to(folium_map)
        #data['points'] = list(zip(data["start station latitude"], data["start station longitude"]))
    #trip_counts['points'] = list(zip(trip_counts["start station latitude"], trip_counts["start station longitude"]))
    #for i in trip_counts["points"]:
    #    folium.Marker(i).add_to(folium_map)
    #folium.PolyLine(trip_counts["points"], color="red", weight=2.5, opacity=0.2).add_to(folium_map)
    return folium_map

In [6]:
interact(trip_by_hour,hour = hour)


<function __main__.trip_by_hour>

## Interactive map using start and end stations with hours and tiles option

In [7]:
def plot_station_by_hour(trip_counts,tiles):
    # generate a new map
    folium_map = folium.Map(location=[40.738, -73.98],
                            tiles="CartoDB dark matter",
                            zoom_start=13,
                            width='50%')

    # for each row in the data, add a cicle marker
    for index, row in trip_counts.iterrows():
        # calculate net departures
        net_departures = (row["Counts of Departure"]-row["Counts of Arrival"])
        
        # generate the popup message that is shown on click.
        popup_text = "{}<br> total departures: {}<br> total arrivals: {}<br> net departures: {}"
        popup_text = popup_text.format(row["start station name"],
                          row["Counts of Arrival"],
                          row["Counts of Departure"],
                          net_departures)
        
        # radius of circles
        radius = net_departures/20
        
        # choose the color of the marker
        if net_departures>0:
            #color="#FFCE00" # orange
             color="#007849" # green
            #color="#E37222" # tangerine
        else:
             color="#0375B4" # blue
            # color="#FFCE00" # yellow            
            #color="#0A8A9F" # teal
        
        # add marker to the map
        #folium.CircleMarker(location=(row["start station latitude"],
        #                              row["start station longitude"]),
        #                    radius=radius,
        #                    color=color,
        #                    popup=popup_text,
        #                    fill=True).add_to(folium_map)
        #data['points'] = list(zip(data["start station latitude"], data["start station longitude"]))
    trip_counts['points'] = list(zip(trip_counts["start station latitude"], trip_counts["start station longitude"]))
    for i in trip_counts["points"]:
        folium.Marker(i).add_to(folium_map)
    folium.PolyLine(trip_counts["points"], color="red", weight=2.5, opacity=0.2).add_to(folium_map)
    return folium_map

In [8]:
from bokeh.models import HoverTool

def route(start_station,end_station,fig):
    val = data[(data["start station id"]==start_station) & (data["end station id"]==end_station)]
    bike = val.groupby(["hour"]).count().iloc[:,[11]].reset_index()
    #output_file("line.html")
    #f = branca.element.Figure()
    bike["hour"] = bike["hour"]+1
    scatter_points = {
        'x': bike["hour"],
        'y': bike["bikeid"],
        }
    scatter_chart = vincent.Scatter(scatter_points,
                       iter_idx='x',
                       width=450,
                      height=250)
    scatter_json = scatter_chart.to_json()
    scatter_dict = json.loads(scatter_json)
    #popup = folium.Popup(max_width=650)
    v = folium.Vega(scatter_dict, height='50%', width='25%',left='50%',position='absolute')
    fig.add_child(v)
    return fig

In [9]:
def route_other(start_station,fig):
    val = data[data["start station id"]==start_station]
    bike = val.groupby(["hour"]).count().iloc[:,[11]].reset_index()
    #print(bike)
    bike["hour"] = bike["hour"]+1
    #print(bike)
    #output_file("line.html")
    #f = branca.element.Figure()
    #print(bike)
    scatter_points = {
        'x': bike["hour"],
        'y': bike["bikeid"],
        }
    scatter_chart = vincent.Scatter(scatter_points,
                       iter_idx='x',
                       width=450,
                      height=250)
    scatter_json = scatter_chart.to_json()
    scatter_dict = json.loads(scatter_json)
    #popup = folium.Popup(max_width=650)
    v = folium.Vega(scatter_dict, height='50%', width='25%',left='50%',position='absolute')
    fig.add_child(v)
    return fig

In [10]:
def plot_station(trip_counts,tiles,val):
    # generate a new map
    folium_map = folium.Map(location=[40.738, -73.98],
                            tiles=tiles,
                            zoom_start=10,control_scale=True,left='0%',
                            width='50%',position='absolute',height='50%')
    #print(trip_counts)
    points=[]
    #popup = folium.Popup()
    #bike = val.groupby(["hour"]).count().iloc[:,[0]].reset_index()
    for index, row in trip_counts.iterrows():
        
        #net_departures = (row["Counts of Departure"]-row["Counts of Arrival"])
        #scatter_points = {
        #'x': bike["hour"],
        #'y': bike["bikeid"],
        #'title' : row["start station name"]
        #}
        #scatter_chart = vincent.Scatter(scatter_points,
        #                        iter_idx='x',
        #                        width=600,
        #                        height=300)
        #scatter_json = scatter_chart.to_json()
        #scatter_dict = json.loads(scatter_json)
        #popup = folium.Popup(max_width=650)
        #folium.Vega(scatter_dict, height=350, width=650).add_to(popup)
        
        popup_text = "{}"
        popup_text_end = "{}"
        popup_text = popup_text.format(row["start station name"])
                          #row["Counts of Arrival"],
                          #row["Counts of Departure"],
                          #net_departures)
        
        popup_text_end = popup_text_end.format(row["end station name"])
                          #row["Counts of Arrival"],
                          #row["Counts of Departure"],
                          #net_departures)
        folium.Marker(location=(row["start station latitude"],
                                      row["start station longitude"]),icon=folium.Icon(color='blue',icon='bicycle',prefix='fa'),
                            popup=popup_text).add_to(folium_map)
        folium.Marker(location=(row["end station latitude"],
                                      row["end station longitude"]),icon=folium.Icon(color='green',icon='bicycle',prefix='fa'),
                            popup=popup_text_end).add_to(folium_map)
    #print(trip_counts)    
    points=list(zip(trip_counts["start station latitude"], trip_counts["start station longitude"]))
    #a=len(points)
    val = list(zip(trip_counts["end station latitude"], trip_counts["end station longitude"]))
    try:
           for i in val:
                points.append(i)
    except:
            print("No Route Found!")
    if(len(points)==0):
        print("No Route Found!")
    #for i in points:
    #    folium.Marker(i).add_to(folium_map)
    #points= list(set(points))
    folium.PolyLine(points, color="red", weight=2.5, opacity=1).add_to(folium_map)
    #print("Number of Bike Trips Recorded: " , a)
    return folium_map

In [18]:
!conda remove nbpresent

def f(start_station,end_station,hour,tiles):
    fig = branca.element.Figure()
    if(start_station==None and end_station==None):
        #hour_val = val[val["hour"]==hour]
        val=trip_by_hour(hour)
        #print(val.head())
        return plot_station_by_hour(val,tiles)
    elif(end_station==None):
        val = data[(data["start station id"]==start_station)]
        val = val.loc[:,["bikeid","start station name" , "end station name", "start station longitude","start station latitude","hour","end station latitude","end station longitude","start station id","end station id"]]
        hour_val = val[val["hour"]==hour]
        route_other(start_station,fig)
        #print(hour_val)
        #return(plot_station(hour_val))
    elif(start_station==None):
        val = data[(data["end station id"]==end_station)]
        val = val.loc[:,["bikeid","start station longitude","start station name" , "end station name","start station latitude","hour","end station latitude","end station longitude","start station id","end station id"]]
        hour_val = val[val["hour"]==hour]
        route_other(end_station,fig)
        #return(plot_station(hour_val))
    else:
        val = data[(data["start station id"]==start_station) & (data["end station id"]==end_station)]
        val = val.loc[:,["bikeid","start station longitude","start station name" , "end station name","start station latitude","hour","end station latitude","end station longitude","start station id","end station id"]]
        hour_val = val[val["hour"]==hour]
        route(start_station,end_station,fig)
    return fig.add_child(plot_station(hour_val,tiles,val))
    

interact(f,start_station=start_stations,end_station=end_stations,hour=hour,tiles=map_options)


Solving environment: ...working... failed



PackagesNotFoundError: The following packages are missing from the target environment:
  - nbpresent




<function __main__.f>

## Heat Map using Speed and Day of the Week

In [12]:
data["speed"] = data["speed"].replace(0.000000,5.793497871920003)
data["speed"] = data["speed"].round()
data["speed"] = [int(x) for x in data["speed"]]
data["speed"].value_counts().reset_index()

Unnamed: 0,index,speed
0,6,229745
1,5,192427
2,7,160004
3,4,101657
4,8,83249
5,3,39423
6,9,34270
7,2,18454
8,1,11616
9,10,11497


In [13]:
speed = {"1": 1,"2": 2,"3": 3, "4": 4,"5": 5,"6": 6,"7": 7,"8": 8,"9": 9,"10":10, "11": 11,"12": 12,"13": 13,"14": 14,"15": 15,"16": 16,"41": 41}

In [14]:
data["day"].max()

31

In [15]:
data["dayOfWeek"].value_counts()

4    163082
1    148028
0    138115
2    136786
3    123874
6     95731
5     84351
Name: dayOfWeek, dtype: int64

In [16]:
dayofweek = {"Monday":0,"Tuesday":1,"Wednesday":2,"Thursday":3,"Friday":4,"Saturday":5,"Sunday":6}

In [17]:


def f(Speed,Days_of_Week):
    map_hooray = folium.Map(location=[40.738, -73.98],
                    zoom_start = 13) 

    # Ensure you're handing it floats
    data['start station latitude'] = data['start station latitude'].astype(float)
    data['start station longitude'] = data['start station longitude'].astype(float)

    # Filter the DF for rows, then columns, then remove NaNs
    heat_df = data[data['dayOfWeek']==Days_of_Week] # Reducing data size so it runs faster
    #heat_df = heat_df[data['speed']==Speed]
    heat_df = heat_df[heat_df['speed']==Speed] # Reducing data size so it runs faster
    #print(heat_df)
    heat_df = heat_df[['start station latitude', 'start station longitude']]
    heat_df = heat_df.dropna(axis=0, subset=['start station latitude','start station longitude'])
    #print(heat_df)
    # List comprehension to make out list of lists
    heat_data = [[row['start station latitude'],row['start station longitude']] for index, row in heat_df.iterrows()]
    #print(heat_data)
    # Plot it on the map
    if(len(heat_data)==0):
        print("No speed found!")
    HeatMap(heat_data).add_to(map_hooray)

# Display the map
    return map_hooray
interact(f,Speed=speed,Days_of_Week=dayofweek)

<function __main__.f>