# Geo Patterns from Boston 2015 Bluebikes

First we need to import all important libraries

In [None]:
import numpy as np
import pandas as pd
from datetime import date, time, datetime, timedelta 
from dateutil import parser
import matplotlib.pyplot as plt
import seaborn as sns
import folium
from folium import plugins
from folium.plugins import HeatMap
import math
import random
import timeit

import warnings
warnings.filterwarnings("ignore")

## Prepare data

read in original dataset from bluebikes

In [None]:
mydateparser = lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S")
csvBostonFull=pd.read_csv("Boston/Fertig.csv", parse_dates=['starttime', "stoptime"], date_parser=mydateparser)

#drop unnecessary column
csvBostonFull = csvBostonFull.drop(columns=['tripduration,"starttime","stoptime","start station id","start station name","start station latitude","start station longitude","end station id","end station name","end station latitude","end station longitude","bikeid","usertype","birth year","gender"'])

read in prepared dataset

In [None]:
mydateparser = lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S")
testboston=pd.read_csv("Boston/boston_2015.csv", parse_dates=['start_time', "end_time"], date_parser=mydateparser)

make sure datasets have the same number of rides

In [None]:
len(csvBostonFull["starttime"].unique())

In [None]:
a = (csvBostonFull["starttime"].unique() == testboston["start_time"].unique())
len(a)

Control if each rented bike has returned 

In [None]:
#original dataset
csvBostonFull[csvBostonFull['end station id'] == "\\N"]

In [None]:
#prepared dataset
testboston[testboston['end_station_id'] == "\\N"]

delete all empty elements

In [None]:
csvBostonFull = csvBostonFull.drop(csvBostonFull.index[21566])
testboston = testboston.drop(testboston.index[21566])


In [None]:
testboston.info()

parse each column which was affected by empty data to right type

In [None]:
csvBostonFull['end station latitude'] = csvBostonFull['end station latitude'].astype(float)
csvBostonFull['end station longitude'] = csvBostonFull['end station longitude'].astype(float)


In [None]:
testboston['end_station_id'] = testboston['end_station_id'].astype(int)

In [None]:
#control if everything has worked out
len(csvBostonFull) == len(testboston)

insert coordinates from the original dataset into the prepared dataset

In [None]:
testboston["start_latitude"] = csvBostonFull["start station latitude"]
testboston["start_longitude"] = csvBostonFull["start station longitude"]

testboston["end_latitude"] = csvBostonFull["end station latitude"]
testboston["end_longitude"] = csvBostonFull["end station longitude"]

In [None]:
testboston.info()

In [None]:
testboston["Coordinates_start"] = list(zip(testboston["start_latitude"].round(4),testboston["start_longitude"].round(4)))
testboston["Coordinates_end"] = list(zip(testboston["end_latitude"].round(4),testboston["end_longitude"].round(4)))

In [None]:
testboston.head(10)

Creating new columns for weekdays  

In [None]:
def get_weekday (ts):
    return ts.weekday()
#0=Monday 6=Sunday
testboston["Weekday"]=testboston["start_time"].apply(lambda x:get_weekday(x))

## Show data on map

#### create new map of Boston

new OpenStreetMap with focus on Boston

In [None]:
#define new map

boston_map = folium.Map(location=(42.361145, -71.057083),tiles='OpenStreetMap',zoom_start=12, control_scale=True, max_zoom=20)

#### point each station on the map

In [None]:
positions = list(testboston["Coordinates_start"].unique())
names=list(testboston["start_station_name"].unique())
i=0
for position in positions:
    folium.CircleMarker(radius=5, location=position, popup=names[i], 
                                 color='crimson', fill_color='crimson').add_to(boston_map)
    i=i+1

In [None]:
positions = list(testboston["Coordinates_end"].unique())
names=list(testboston["end_station_name"].unique())
i=0
for position in positions:
    folium.CircleMarker(radius=2, location=position, popup=names[i], 
                                 color='blue', fill_color='blue').add_to(boston_map)
    i=i+1

In [None]:
boston_map

#### create heatmaps
show a heatmap based on every ride in the dataset

In [None]:
boston_map.add_child(plugins.HeatMap(testboston["Coordinates_start"], radius=20))
boston_map

show a heatmap based on every ride taken on working days 

In [None]:
boston_map1 = folium.Map(location=(42.361145, -71.057083),tiles='OpenStreetMap',zoom_start=12, control_scale=True, max_zoom=20)
df2=testboston[testboston["Weekday"]<5]
boston_map1.add_child(plugins.HeatMap(df2["Coordinates_start"], radius=20))
positions = list(testboston["Coordinates_start"].unique())
names=list(testboston["start_station_name"].unique())
i=0
for position in positions:
    folium.CircleMarker(radius=5, location=position, popup=names[i], 
                                 color='crimson', fill_color='crimson').add_to(boston_map1)
    i=i+1
positions = list(testboston["Coordinates_end"].unique())
names=list(testboston["end_station_name"].unique())
i=0
for position in positions:
    folium.CircleMarker(radius=2, location=position, popup=names[i], 
                                 color='blue', fill_color='blue').add_to(boston_map1)
    i=i+1
boston_map1

show a heatmap based on every ride taken on the weekends

In [None]:
boston_map2 = folium.Map(location=(42.361145, -71.057083),tiles='OpenStreetMap',zoom_start=12, control_scale=True, max_zoom=20)
df3=testboston[testboston["Weekday"]>5]
boston_map2.add_child(plugins.HeatMap(df3["Coordinates_start"], radius=20))
boston_map2

In [None]:
#folium.PolyLine(testboston["Coordinates_start"],color="blue", weight=5, opacity=0.8, ).add_to(boston_map)
#boston_map

# Ten most frequently used stations in Boston

 count how often each station is approached

In [None]:
unique_station_name=testboston["start_station_name"].unique()
dictName=[]
for x in unique_station_name:
    count=(testboston[(testboston["start_station_name"]==x)])
    coordinates=count.iloc[0]["Coordinates_start"]
    akt={"Station_Name":x, "Count":len(count), "Coordinates_start":coordinates}
    dictName.append(akt)
most_used=pd.DataFrame.from_dict(dictName)
most_used=most_used.loc[most_used.Count>0]
#sort df by count in descending order
most_used=most_used.sort_values(by="Count", ascending=False)


show the ten most often approached stations in a barplot

In [None]:
most_used_reduced=most_used.head(10)

#plot
f,ax = plt.subplots(1,1,figsize=(20,10))
barplot=sns.barplot(x="Station_Name",y="Count",data=most_used_reduced,ax=ax)
barplot.set(xlabel="Station Name", ylabel="Counts")
plt.xticks(rotation=90)
print()

# Ten less frequently used stations in Boston

show the ten less often approached stations in a barplot

In [None]:
#sort df by count in ascending order
most_used=most_used.sort_values(by="Count", ascending=True)
less_used_reduced=most_used.head(10)

#plot
f,ax = plt.subplots(1,1,figsize=(20,10))
barplot=sns.barplot(x="Station_Name",y="Count",data=less_used_reduced,ax=ax)
barplot.set(xlabel="Station Name", ylabel="Counts")
plt.xticks(rotation=90)
print()

### Compare positions of most and less frequently used stations

In [None]:
boston_map_frq = folium.Map(location=(42.361145, -71.057083),tiles='OpenStreetMap',zoom_start=12, control_scale=True, max_zoom=20)

positions = list(most_used_reduced["Coordinates_start"].unique())
names=list(most_used_reduced["Station_Name"].unique())
i=0
for position in positions:
    folium.CircleMarker(radius=5, location=position, popup=names[i], 
                                 color='red', fill_color='blue').add_to(boston_map_frq)
    i=i+1
    

positions = list(less_used_reduced["Coordinates_start"].unique())
names=list(less_used_reduced["Station_Name"].unique())
i=0
for position in positions:
    folium.CircleMarker(radius=5, location=position, popup=names[i], 
                                 color='blue', fill_color='crimson').add_to(boston_map_frq)
    i=i+1

Blue circles mark the ten most often, red circles the ten less often used stations

In [None]:
boston_map_frq

### Exemplary: Which stations are (most) approached by the most frequently used station? 

create list of all rides which started South Station - 700 Atlantic Ave.

In [None]:
# most frequently used station is South Station - 700 Atlantic Ave.
df_rides=testboston[testboston["start_station_name"]=="South Station - 700 Atlantic Ave."]

point approached stations on a map

In [None]:
df_rides[df_rides["Coordinates_start"]==df_rides["Coordinates_end"]]

Creation of Polyline between the station with the most traffics

In [None]:
driven_to=df_rides["end_station_name"].unique()
driven_to_coordinates=df_rides["Coordinates_end"].unique()

In [None]:
dictL=[]
i=0
for x in driven_to:
    akt=df_rides[df_rides["end_station_name"]==x]
    colour=""
    if len(akt)<200:
        colour="blue"
    if len(akt)<300 and len(akt)>200:
        colour="green"
    if len(akt)<500 and len(akt)>300:
        colour="yellow"
    if len(akt)<900 and len(akt)>500:
        colour="orange"
    if len(akt)>900:
        colour="red"
        
    a=driven_to_coordinates[i]
    i=i+1
    dictL.append({"end_station_name":x, "Count":len(akt), "Colour": colour, "Coordinates":a})
    colour=""
df_colour=pd.DataFrame.from_dict(dictL)

In [None]:
df_colour

In [None]:
boston_map4 = folium.Map(location=(42.361145, -71.057083),tiles='OpenStreetMap',zoom_start=12, control_scale=True, max_zoom=20)
folium.CircleMarker(radius=10, location=(42.3522, -71.0555), popup="South Station - 700 Atlantic Ave.", 
                                 color='red', fill_color='red').add_to(boston_map4)
boston_map4

In [None]:
positions = list(df_colour["Coordinates"].unique())
names=list(df_colour["end_station_name"].unique())
col=df_colour["Colour"]
i=0
for position in positions:
    a=(42.3522, -71.0555)
    p=[]
    p.append(a)
    p.append(position)
    p.append(a)
    q=folium.PolyLine(p,strokeColor=col[i], weight=5, opacity=0.1)
    q.add_to(boston_map4)
    folium.CircleMarker(radius=5, location=position, popup=names[i], 
                                 color=col[i], fill_color='crimson').add_to(boston_map4)

    i=i+1

In [None]:
#boston_map4.add_child(plugins.HeatMap(df_rides["Coordinates_end"], radius=20))
boston_map4

### To reduce overfitting we only show the 30 most traveld stations

In [None]:
df_colour=df_colour.sort_values(by="Count", ascending=False)
df_colour

In [None]:
df_colour_reduced=df_colour.head(50)
df_colour_reduced

In [None]:
boston_map5 = folium.Map(location=(42.361145, -71.057083),tiles='OpenStreetMap',zoom_start=12, control_scale=True, max_zoom=20)
folium.CircleMarker(radius=10, location=(42.3522, -71.0555), popup="South Station - 700 Atlantic Ave.", 
                                 color='red', fill_color='red').add_to(boston_map5)
boston_map5

In [None]:
positions_reduced = list(df_colour_reduced["Coordinates"].unique())
names_reduced=list(df_colour_reduced["end_station_name"].unique())
col_reduced=list(df_colour_reduced["Colour"])
i=0
for position in positions_reduced:
    a=(42.3522, -71.0555)
    p=[]
    p.append(a)
    p.append(position)
    p.append(a)
    folium.PolyLine(p, weight=5, opacity=.3).add_to(boston_map5)
    folium.CircleMarker(radius=5, location=position, popup=names_reduced[i], 
                                 color=col_reduced[i]).add_to(boston_map5)

    i=i+1
boston_map5