# Imports 

In [4]:
# needed for data processing  
import pandas as pd


# needed for visualization
import folium
import plotly.express as px

**Task: Geographical Demand Patterns:** Which stations are particularly popular and which are not? Provide
a rationale as to why you observe these patterns.

In [5]:
df = pd.read_pickle('PreprocessedDataframe.pk')

## Popularity of Stations

First, we look at which start stations and which end stations are particularly popular. For this purpose, two dataframes are created in which the total number of bookings per station is summed up.

In [6]:
df_start_station_popularity = df[['start_station_id', 'start_station_name', 'start_latitude', 'start_longitude']].groupby([ 'start_station_name', 'start_latitude', 'start_longitude'])["start_station_id"].count().reset_index(name="count_depatures")
df_start_station_popularity.dropna(axis=0, inplace=True)    
df_start_station_popularity.sort_values(by = 'count_depatures', ascending = False).head()

Unnamed: 0,start_station_name,start_latitude,start_longitude,count_depatures
121,MIT at Mass Ave / Amherst St,42.3581,-71.093198,35638
159,South Station - 700 Atlantic Ave,42.352175,-71.055547,31507
45,Central Square at Mass Ave / Essex St,42.36507,-71.1031,24106
87,Harvard Square at Mass Ave/ Dunster,42.373268,-71.118579,23159
119,MIT Stata Center at Vassar St / Main St,42.361962,-71.092053,21175


In [7]:
df_end_station_popularity = df[['end_station_id', 'end_station_name', 'end_latitude', 'end_longitude']].groupby([ 'end_station_name', 'end_latitude', 'end_longitude'])["end_station_id"].count().reset_index(name="count_arrivals")
df_end_station_popularity.dropna(axis=0, inplace=True)    
df_end_station_popularity.sort_values(by = 'count_arrivals', ascending = False).head()

Unnamed: 0,end_station_name,end_latitude,end_longitude,count_arrivals
121,MIT at Mass Ave / Amherst St,42.3581,-71.093198,36099
159,South Station - 700 Atlantic Ave,42.352175,-71.055547,31800
119,MIT Stata Center at Vassar St / Main St,42.361962,-71.092053,29628
87,Harvard Square at Mass Ave/ Dunster,42.373268,-71.118579,24335
45,Central Square at Mass Ave / Essex St,42.36507,-71.1031,23858


## Departures & Arrival Heatmaps [1][2]

Heatmap of total departures per Station

In [8]:
# Creating heatmap of departures per station_id
fig = px.scatter_mapbox(df_start_station_popularity, lat="start_latitude", lon="start_longitude", hover_name="start_station_name", hover_data=["count_depatures"], center=dict(lat=42.360505392455,lon=-71.06550202408724),
                color="count_depatures", size="count_depatures", color_continuous_scale=[[0, 'rgba(255, 180, 180, 0.85)'],  [1, 'rgba(255,0,0, 0.85)']], zoom=11.5, height=400)
fig = fig.update_layout(mapbox_style="open-street-map")
fig = fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

We can observe that the stations with the most departures (= bigger and brighter red circles) are close to university locations or the train_station.

Heatmap of total arrivals

In [9]:
# Creating heatmap of departures per station_id
fig = px.scatter_mapbox(df_end_station_popularity, lat="end_latitude", lon="end_longitude", hover_name="end_station_name", hover_data=["count_arrivals"], center=dict(lat=42.360505392455,lon=-71.06550202408724),
                color="count_arrivals", size="count_arrivals", color_continuous_scale=[[0, 'rgba(255, 180, 180, 0.85)'],  [1, 'rgba(0,0,255, 0.85)']], zoom=11.5, height=400)
fig = fig.update_layout(mapbox_style="open-street-map")
fig = fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

We can observe that the stations with the most arrivals (with the bigger & brighter blue circles) are close to university locations or the train_station.

**Net Demand Patterns** 
Here we try to show that there are stations from which more bicyclists depart than arrive in the course of time. Since this can result in a lack of bicycles. 

In [10]:
# The net depature of each rental zone is calculated by subtracting total depatures from total arrivals.
net_demand = pd.merge(df_start_station_popularity[['start_station_name', 'count_depatures']], df_end_station_popularity, left_on="start_station_name", right_on="end_station_name", how='left')
net_demand['net_depature'] = net_demand['count_arrivals'] - net_demand['count_depatures']

In [12]:
# Heatmap of net departures per rental zone
fig = px.scatter_mapbox(net_demand, lat="end_latitude", lon="end_longitude", hover_name="start_station_name", hover_data=["net_depature"], center=dict(lat=42.360505392455,lon=-71.06550202408724),
                        color="net_depature", size=net_demand["net_depature"].abs(), color_continuous_scale=[[0, 'rgba(255, 0, 0, 0.85)'], [abs(min(net_demand['net_depature'])/(abs(min(net_demand['net_depature']))+max(net_demand['net_depature']))), 'rgba(255, 255, 255, 0.85)'], [1, 'rgba(0,0,255, 0.85)']], zoom=11.5, height=400)
fig = fig.update_layout(mapbox_style="open-street-map")
fig = fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

We can observe that more trips end at the MIT Stata Center station than start from there. Whereas at stations like *Broadway at Fayette Str* or Beacon Str, more trips start than end. At these stations, we can assume that a lack of bicycles is created. 
At the other stations, departures and arrivals are more or less balanced. 

## Weekly demand patterns[3]


Another demand shift may occur within the week, since e.g. there are no commuters on the weekends. This is implemented by means of an animated graphic. 
For this purpose a weekday column is created and the data is grouped on a daily basis. 

In [13]:
# Creating dataframe weekly_animation from copy of cluster_data and add WEEKDAY column
weekly = df.copy()
weekly['weekday'] = pd.to_datetime(df['start_time']).apply(lambda x: x.weekday())

# Grouping and aggregating 
weekly_group = weekly.groupby(['start_latitude', 'start_longitude', 'start_station_id', 'weekday']).count()
weekly_group = weekly_group.add_suffix('_COUNT').reset_index().sort_values(by='weekday')



In [14]:
# To cover the time aspect the following animated map reveals the rental zone load on a shifting time frame per weekday
fig = px.scatter_mapbox(weekly_group, lat="start_latitude", lon="start_longitude", hover_name="start_station_id", size="start_station_name_COUNT",
               animation_frame="weekday", zoom=11.8, height=600)
fig = fig.update_layout(mapbox_style="open-street-map")
fig = fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

Unfortunately the visualization only shows that the demand is lower on the weekend. There does not seem to be a particular larger/lower demand around certain stations. 

## Hourly demand patterns

Now we also look at the extent to which demand changes within an hour. 

In [15]:
# Creating hourly data frame
hourly = df.copy()
hourly['hour'] = pd.to_datetime(df['start_time']).apply(lambda x: x.hour)

# Grouping and aggregating 
hourly_group = hourly.groupby(['start_latitude', 'start_longitude', 'start_station_id', 'hour']).count()
hourly_group = hourly_group.add_suffix('_COUNT').reset_index().sort_values(by='hour')


In [16]:
# Create hourly animited interactive map
fig = px.scatter_mapbox(hourly_group, lat="start_latitude", lon="start_longitude", hover_name="start_station_id", size="start_station_name_COUNT",
               animation_frame="hour", zoom=11.8, height=600)
fig = fig.update_layout(mapbox_style="open-street-map")
fig = fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

The greatest demand is between 7 am and evening. It can be observed that the demand for stations in the suburbs decreases during lunchtime, while the demand near the city center/universities continues. 

#References


[1] M. Kefei, "Meet Plotly Mapbox. Best Choice for Geographic Data Visualization", June 19, 2020. [Online]. Available: towards Data Science, https://towardsdatascience.com/meet-plotly-mapbox-best-choice-for-geographic-data-visualization-599b514bcd9a. [Accessed January 24, 2022].

[2] Unknown, "Scatter Plots on Mapbox in Python", 2021 [Online]. Available: Plotly, https://plotly.com/python/scattermapbox/ [Accessed January 24, 2022].

[3] U. B. Tariq, "How to animate Scatterplots on Mapbox using Plotly Express?", June 20, 2020. [Online]. Available: towards Data Science, https://towardsdatascience.com/how-to-animate-scatterplots-on-mapbox-using-plotly-express-3bb49fe6a5d. [Accessed January 24, 2022].

