# Geographical demand patterns for Boston 2015 / Bluebikes

Import all important libraries

In [1]:
import numpy as np
import pandas as pd
from datetime import date, time, datetime, timedelta 
from dateutil import parser
import matplotlib.pyplot as plt
import seaborn as sns
import folium
from folium import plugins
from folium.plugins import HeatMap
import math
import random
import timeit
import io
from PIL import Image

import warnings
warnings.filterwarnings("ignore")

## Prepare data

read in original dataset from bluebikes to have also the coordinates of the stations.
Note: please download the full csv file `full_bluebike.csv` provided via OneDrive to run this cell!
https://uzk-my.sharepoint.com/:f:/g/personal/konstantin_dreesen_uzk_onmicrosoft_com/Elc01kvmXnJGt0d6ICsqsHQBe1nmgE5d6ht4syPua4FF6A?e=mKu4sd

In [2]:
mydateparser = lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S")
test_Boston=pd.read_csv("Boston/full_bluebike.csv", parse_dates=['starttime', 'stoptime'], date_parser=mydateparser)
#in order to get the "full_bluebike.csv" we provide Onedrive access for download: https://uzk-my.sharepoint.com/:f:/g/personal/konstantin_dreesen_uzk_onmicrosoft_com/Elc01kvmXnJGt0d6ICsqsHQBe1nmgE5d6ht4syPua4FF6A?e=83WUQq
#drop unnecessary column
test_Boston = test_Boston.drop(columns=['tripduration,"starttime","stoptime","start station id","start station name","start station latitude","start station longitude","end station id","end station name","end station latitude","end station longitude","bikeid","usertype","birth year","gender"'])

FileNotFoundError: [Errno 2] No such file or directory: 'Boston/full_bluebike.csv'

read in prepared dataset

In [None]:
mydateparser = lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S")
df_boston_full=pd.read_csv("Boston/boston_2015.csv", parse_dates=['start_time', "end_time"], date_parser=mydateparser)

make sure datasets have the same number of rides

In [None]:
len(test_Boston["starttime"].unique())

In [None]:
a = (test_Boston["starttime"].unique() == df_boston_full["start_time"].unique())
len(a)

Control if each rented bike has returned 

In [None]:
#original dataset
test_Boston[test_Boston['end station id'] == "\\N"]

In [None]:
#prepared dataset
df_boston_full[df_boston_full['end_station_id'] == "\\N"]

delete all empty elements

In [None]:
test_Boston = test_Boston.drop(test_Boston.index[21566])
df_boston_full = df_boston_full.drop(df_boston_full.index[21566])


In [None]:
df_boston_full.info()

parse each column which was affected by empty data to the right data type

In [None]:
test_Boston['end station latitude'] = test_Boston['end station latitude'].astype(float)
test_Boston['end station longitude'] = test_Boston['end station longitude'].astype(float)


In [None]:
df_boston_full['end_station_id'] = df_boston_full['end_station_id'].astype(int)

In [None]:
#control if everything has worked out
len(test_Boston) == len(df_boston_full)

insert coordinates from the original dataset into the prepared dataset

In [None]:
df_boston_full["start_latitude"] = test_Boston["start station latitude"]
df_boston_full["start_longitude"] = test_Boston["start station longitude"]

df_boston_full["end_latitude"] = test_Boston["end station latitude"]
df_boston_full["end_longitude"] = test_Boston["end station longitude"]

In [None]:
df_boston_full["Coordinates_start"] = list(zip(df_boston_full["start_latitude"].round(4),df_boston_full["start_longitude"].round(4)))
df_boston_full["Coordinates_end"] = list(zip(df_boston_full["end_latitude"].round(4),df_boston_full["end_longitude"].round(4)))

In [None]:
df_boston_full.head(3)

Creating new columns for weekdays  

In [None]:
def get_weekday (ts):
    return ts.weekday()
#0=Monday 6=Sunday
df_boston_full["Weekday"]=df_boston_full["start_time"].apply(lambda x:get_weekday(x))

## Show the data on a map

#### create new map of Boston

new Map of type type 'Stamen Toner' (for a better readibility) with focus on Boston

In [None]:
#define new map

boston_map = folium.Map(location=(42.361145, -71.057083),tiles='Stamen Toner',zoom_start=12, control_scale=True, max_zoom=20)

#### point each station on the map

Display all stations on the map to have a better overview in which areas the stations are mainly located

In [None]:
positions = list(df_boston_full["Coordinates_start"].unique())
names=list(df_boston_full["start_station_name"].unique())
i=0
for position in positions:
    folium.CircleMarker(radius=5, location=position, popup=names[i], 
                                 color='crimson', fill_color='crimson').add_to(boston_map)
    i=i+1

In [None]:
positions = list(df_boston_full["Coordinates_end"].unique())
names=list(df_boston_full["end_station_name"].unique())
i=0
for position in positions:
    folium.CircleMarker(radius=2, location=position, popup=names[i], 
                                 color='blue', fill_color='blue').add_to(boston_map)
    i=i+1

In [None]:
#boston_map.save("boston_map.html")
boston_map

#### create heatmaps
show a heatmap based on every ride in the dataset

In [None]:
boston_map.add_child(plugins.HeatMap(df_boston_full["Coordinates_start"], radius=20))
boston_map

Compare it to heatmaps splitted in working days and weekends to evaluate possible differences during the week.
Show a heatmap based on every ride taken on working days:

In [None]:
boston_map1 = folium.Map(location=(42.361145, -71.057083),tiles='Stamen Toner',zoom_start=12, control_scale=True, max_zoom=20)
df2=df_boston_full[df_boston_full["Weekday"]<5]
boston_map1.add_child(plugins.HeatMap(df2["Coordinates_start"], radius=20))
positions = list(df_boston_full["Coordinates_start"].unique())
names=list(df_boston_full["start_station_name"].unique())
i=0
for position in positions:
    folium.CircleMarker(radius=5, location=position, popup=names[i], 
                                 color='crimson', fill_color='crimson').add_to(boston_map1)
    i=i+1
positions = list(df_boston_full["Coordinates_end"].unique())
names=list(df_boston_full["end_station_name"].unique())
i=0
for position in positions:
    folium.CircleMarker(radius=2, location=position, popup=names[i], 
                                 color='blue', fill_color='blue').add_to(boston_map1)
    i=i+1
#boston_map1.save("boston_map1.html")
boston_map1

Show a heatmap based on every ride taken on the weekends:

In [None]:
boston_map2 = folium.Map(location=(42.361145, -71.057083),tiles='Stamen Toner',zoom_start=12, control_scale=True, max_zoom=20)
df3=df_boston_full[df_boston_full["Weekday"]>5]
positions = list(df_boston_full["Coordinates_start"].unique())
names=list(df_boston_full["start_station_name"].unique())
i=0
for position in positions:
    folium.CircleMarker(radius=5, location=position, popup=names[i], 
                                 color='crimson', fill_color='crimson').add_to(boston_map2)
    i=i+1
positions = list(df_boston_full["Coordinates_end"].unique())
names=list(df_boston_full["end_station_name"].unique())
i=0
for position in positions:
    folium.CircleMarker(radius=2, location=position, popup=names[i], 
                                 color='blue', fill_color='blue').add_to(boston_map2)
    i=i+1
boston_map2.add_child(plugins.HeatMap(df3["Coordinates_start"], radius=20))
#boston_map2.save("boston_map2.html")
boston_map2

As can be seen there is not much variance between the Weekday and Weekend data. In both cases most bikes are rented in the city center. First of all around the Boston South-Station and the Harvard University. The stations are also mainly centered in the city, which increase the bike traffic in this part of Boston.

# Ten most frequently used stations in Boston

 count how often each station is approached

In [None]:
unique_station_name=df_boston_full["start_station_name"].unique()
dictName=[]
for x in unique_station_name:
    count=(df_boston_full[(df_boston_full["start_station_name"]==x)])
    coordinates=count.iloc[0]["Coordinates_start"]
    akt={"Station_Name":x, "Count":len(count), "Coordinates_start":coordinates}
    dictName.append(akt)
most_used=pd.DataFrame.from_dict(dictName)
most_used=most_used.loc[most_used.Count>0]
#sort df by count in descending order
most_used=most_used.sort_values(by="Count", ascending=False)


show the ten most often approached stations in a barplot

In [None]:
most_used_reduced=most_used.head(10)

#plot
f,ax = plt.subplots(1,1,figsize=(20,10))
barplot=sns.barplot(x="Station_Name",y="Count",data=most_used_reduced,ax=ax)
barplot.set(xlabel="Station Name", ylabel="Counts")
plt.xticks(rotation=90)
print()

# Ten least frequently used stations in Boston

show the ten least often approached stations in a barplot

In [None]:
#sort df by count in ascending order
most_used=most_used.sort_values(by="Count", ascending=True)
less_used_reduced=most_used.head(10)

#plot
f,ax = plt.subplots(1,1,figsize=(20,10))
barplot=sns.barplot(x="Station_Name",y="Count",data=less_used_reduced,ax=ax)
barplot.set(xlabel="Station Name", ylabel="Counts")
plt.xticks(rotation=90)
print()

### Compare the positions of most and least frequently used stations

In [None]:
boston_map_frq = folium.Map(location=(42.361145, -71.057083),tiles='OpenStreetMap',zoom_start=12, control_scale=True, max_zoom=20)

positions = list(most_used_reduced["Coordinates_start"].unique())
names=list(most_used_reduced["Station_Name"].unique())
i=0
for position in positions:
    folium.CircleMarker(radius=5, location=position, popup=names[i], 
                                 color='red', fill_color='blue').add_to(boston_map_frq)
    i=i+1
    

positions = list(less_used_reduced["Coordinates_start"].unique())
names=list(less_used_reduced["Station_Name"].unique())
i=0
for position in positions:
    folium.CircleMarker(radius=5, location=position, popup=names[i], 
                                 color='blue', fill_color='crimson').add_to(boston_map_frq)
    i=i+1

<font color='blue'>Blue</font> circles mark the ten most often, <font color='red'>red</font> circles the ten least often used stations

In [None]:
boston_map_frq

As it can be seen the most used stations are in the city center, which was early displayed with the heat map. The least used stations are invariably distributed in outer Boston/the suburbs. 

### Exemplary: Which stations are (most) approached by the most frequently used station? 

create list of all rides which started at `South Station - 700 Atlantic Ave`.

In [None]:
# most frequently used station is South Station - 700 Atlantic Ave.
df_rides_most_used=df_boston_full[df_boston_full["start_station_name"]=="South Station - 700 Atlantic Ave."]

point approached stations on a map

In [None]:
df_rides_most_used[df_rides_most_used["Coordinates_start"]==df_rides_most_used["Coordinates_end"]]
print()

Creation of Polyline between the station with the most traffics

In [None]:
driven_to=df_rides_most_used["end_station_name"].unique()
driven_to_coordinates=df_rides_most_used["Coordinates_end"].unique()

In [None]:
dictL=[]
i=0
for x in driven_to:
    akt=df_rides_most_used[df_rides_most_used["end_station_name"]==x]
    colour=""
    if len(akt)<200 and len(akt)>0:
        colour="midnightblue"
    if len(akt)<300 and len(akt)>200:
        colour="lightskyblue"
    if len(akt)<500 and len(akt)>300:
        colour="thistle"
    if len(akt)<900 and len(akt)>500:
        colour="orangered"
    if len(akt)>900:
        colour="red"
        
    a=driven_to_coordinates[i]
    i=i+1
    dictL.append({"end_station_name":x, "Count":len(akt), "Colour": colour, "Coordinates":a})
    colour=""
df_colour=pd.DataFrame.from_dict(dictL)

In [None]:
boston_map4 = folium.Map(location=(42.361145, -71.057083),tiles='OpenStreetMap',zoom_start=12, control_scale=True, max_zoom=20)
folium.CircleMarker(radius=10, location=(42.3522, -71.0555), popup="South Station - 700 Atlantic Ave.", 
                                 color='red', fill_color='red').add_to(boston_map4)

In [None]:
positions = list(df_colour["Coordinates"].unique())
names=list(df_colour["end_station_name"].unique())
col=df_colour["Colour"]
i=0
for position in positions:
    a=(42.3522, -71.0555)
    p=[]
    p.append(a)
    p.append(position)
    p.append(a)
    q=folium.PolyLine(p,strokeColor=col[i], weight=5, opacity=0.1)
    q.add_to(boston_map4)
    folium.CircleMarker(radius=5, location=position, popup=names[i], 
                                 color=col[i], fill_color='crimson').add_to(boston_map4)

    i=i+1

The color scale ranges from <font color='blue'>blue</font> (least approached stations) to <font color='red'>red</font> (most approached stations).

In [None]:
#boston_map4.add_child(plugins.HeatMap(df_rides_most_used["Coordinates_end"], radius=20))
boston_map4

 To reduce overplotting we only show the 50 most traveled stations

In [None]:
df_colour=df_colour.sort_values(by="Count", ascending=False)

In [None]:
df_colour_reduced=df_colour.head(50)

In [None]:
boston_map5 = folium.Map(location=(42.361145, -71.057083),tiles='OpenStreetMap',zoom_start=12, control_scale=True, max_zoom=20)
folium.CircleMarker(radius=10, location=(42.3522, -71.0555), popup="South Station - 700 Atlantic Ave.", 
                                 color='red', fill_color='red').add_to(boston_map5)

In [None]:
positions_reduced = list(df_colour_reduced["Coordinates"].unique())
names_reduced=list(df_colour_reduced["end_station_name"].unique())
col_reduced=list(df_colour_reduced["Colour"])
i=0
for position in positions_reduced:
    a=(42.3522, -71.0555)
    p=[]
    p.append(a)
    p.append(position)
    p.append(a)
    folium.PolyLine(p, weight=5, opacity=.3).add_to(boston_map5)
    folium.CircleMarker(radius=5, location=position, popup=names_reduced[i], 
                                 color=col_reduced[i]).add_to(boston_map5)

    i=i+1
boston_map5

It's interesting that many people (between 200-300) use their bike for the whole trip and return them at the same station (probably taking their train back).

### Exemplary: Which stations are (most) approached by the least frequently used station? 

In [None]:
boston_map6 = folium.Map(location=(42.361145, -71.057083),tiles='OpenStreetMap',zoom_start=12, control_scale=True, max_zoom=20)
folium.CircleMarker(radius=10, location=(42.3035, -71.0853), popup="Franklin Park Zoo", 
                                 color='red', fill_color='red').add_to(boston_map6)

In [None]:
df_rides_less_used=df_boston_full[df_boston_full["start_station_name"]=="Franklin Park Zoo"]

In [None]:
driven_to_less=df_rides_less_used["end_station_name"].unique()
driven_to_coordinates_less=df_rides_less_used["Coordinates_end"].unique()

In [None]:
dictLess=[]
i=0
for x in driven_to_less:
    akt=df_rides_less_used[df_rides_less_used["end_station_name"]==x]
    colour=""
    if len(akt)<5:
        colour="midnightblue"
    if len(akt)<10 and len(akt)>5:
        colour="deepskyblue"
    if len(akt)<15 and len(akt)>10:
        colour="thistle"
    if len(akt)<20 and len(akt)>15:
        colour="orangered"
    if len(akt)>20:
        colour="red"
        
    a=driven_to_coordinates_less[i]
    i=i+1
    dictLess.append({"end_station_name":x, "Count":len(akt), "Colour": colour, "Coordinates":a})
    colour=""
df_colour_less=pd.DataFrame.from_dict(dictLess)

In [None]:
positions_less = list(df_colour_less["Coordinates"].unique())
names_less=list(df_colour_less["end_station_name"].unique())
col_less=df_colour_less["Colour"]
i=0
for position in positions_less:
    a=(42.3035, -71.0853)
    p=[]
    p.append(a)
    p.append(position)
    p.append(a)
    q=folium.PolyLine(p,strokeColor=col[i], weight=5, opacity=0.5)
    q.add_to(boston_map6)
    folium.CircleMarker(radius=5, location=position, popup=names[i], 
                                 color=col_less[i], fill_color='crimson').add_to(boston_map6)

    i=i+1

In [None]:
boston_map6