# Analytics and Application - Descriptive Analysis
## Plotting Geodata

In [1]:
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from datetime import datetime as dt
from dateutil.parser import parse
import math

import folium
from folium import plugins
from folium.plugins import HeatMap
import folium.plugins as plugins
import statistics

##  Geodata Preparation

In [2]:
Bike_data = pd.read_csv("data/sf_2018.csv")
Geodata = pd.read_csv("data/geo_data_cleaned.csv")

In [3]:
Geodata

Unnamed: 0,station,lat,lng,result
0,"Mission Bay Kids Park, San Francisco",37.772108,-122.392746,[{'address_components': [{'long_name': 'Missio...
1,"Grand Ave at Webster St, San Francisco",37.811382,-122.265413,[{'address_components': [{'long_name': 'Grand ...
2,"MLK Jr Way at University Ave, San Francisco",37.871609,-122.273027,[{'address_components': [{'long_name': 'Univer...
3,"Broadway at 30th St, San Francisco",37.818598,-122.262269,[{'address_components': [{'long_name': 'San Fr...
4,"16th St Mission BART Station 2, San Francisco",37.765362,-122.419568,[{'address_components': [{'long_name': '16th S...
...,...,...,...,...
343,"Stanford Ave at Hollis St, San Francisco",37.838416,-122.288836,[{'address_components': [{'long_name': 'Stanfo...
344,"2nd St at S Park St, San Francisco",37.782473,-122.392848,[{'address_components': [{'long_name': '2nd St...
345,"10th Ave at E 15th St, San Francisco",37.793032,-122.248608,[{'address_components': [{'long_name': 'San Fr...
346,"Myrtle St at Polk St, San Francisco",37.785439,-122.419668,[{'address_components': [{'long_name': 'Polk S...


In [4]:
Bike_stations = Bike_data[['start_station_id','start_station_name']]

In [5]:
# drop duplicates
Bike_data = Bike_data.drop_duplicates()
Bike_stations = Bike_stations.drop_duplicates()

In [6]:
Bike_stations = Bike_stations.sort_values('start_station_id')

Bike_stations = Bike_stations.set_index('start_station_id')
Bike_stations = Bike_stations.dropna()
Bike_stations

Unnamed: 0_level_0,start_station_name
start_station_id,Unnamed: 1_level_1
3.0,Powell St BART Station (Market St at 4th St)
4.0,Cyril Magnin St at Ellis St
5.0,Powell St BART Station (Market St at 5th St)
6.0,The Embarcadero at Sansome St
7.0,Frank H Ogawa Plaza
...,...
375.0,Grove St at Masonic Ave
377.0,Fell St at Stanyan St
378.0,Empire St at 7th St
380.0,Masonic Ave at Turk St


In [7]:
# drop String part "San Franciso"
Geodata['station'] = Geodata['station'].map(lambda x: x.rstrip(', San Francisco'))
Geodata = Geodata.sort_values('station')
Bike_stations = Bike_stations.sort_values('start_station_name')

In [8]:
# Sort data after Station_name
Bike_stations

Unnamed: 0_level_0,start_station_name
start_station_id,Unnamed: 1_level_1
222.0,10th Ave at E 15th St
201.0,10th St at Fallon St
351.0,10th St at University Ave
88.0,11th St at Bryant St
77.0,11th St at Natoma St
...,...
359.0,Williams Ave at Apollo St
301.0,Willow St at Vine St
272.0,Woolsey St at Sacramento St
272.0,Woolsey St at Sacramento St1


In [9]:
# Add Station_id to Geodata
Geodata['station_id'] = Bike_stations.index

In [10]:
# Geodata with station_id
Geodata

Unnamed: 0,station,lat,lng,result,station_id
345,10th Ave at E 15th St,37.793032,-122.248608,[{'address_components': [{'long_name': 'San Fr...,222.0
196,10th St at Fallon St,37.772823,-122.412850,[{'address_components': [{'long_name': '10th S...,201.0
295,10th St at University Ave,37.772823,-122.412850,[{'address_components': [{'long_name': '10th S...,351.0
329,11th St at Bryant St,37.769435,-122.410878,[{'address_components': [{'long_name': '11th S...,88.0
264,11th St at Natoma St,37.773533,-122.416139,[{'address_components': [{'long_name': 'Natoma...,77.0
...,...,...,...,...,...
174,Williams Ave at Apollo St,37.730185,-122.398997,[{'address_components': [{'long_name': 'Willia...,359.0
11,Willow St at Vine St,37.318623,-121.883094,[{'address_components': [{'long_name': 'San Fr...,301.0
276,Woolsey St at Sacramento St,37.850627,-122.278320,[{'address_components': [{'long_name': 'Sacram...,272.0
136,Woolsey St at Sacramento St1,37.723993,-122.407455,[{'address_components': [{'long_name': 'Woolse...,272.0


In [11]:
Geodata = Geodata.sort_values('station_id')
Geodata = Geodata.set_index('station_id')

# Correcting wrong geodata-entry:
Geodata.at[210,'station'] = '45th St at Manila'
Geodata.at[210,'lat'] = 37.833304
Geodata.at[210,'lng'] = -122.256240
Geodata.drop_duplicates()
Geodata

Unnamed: 0_level_0,station,lat,lng,result
station_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
3.0,Powell St BART Station (Market St at 4th St),37.784369,-122.407998,[{'address_components': [{'long_name': 'Powell...
4.0,Cyril Magnin St at Ellis St,37.785398,-122.408699,[{'address_components': [{'long_name': 'Ellis ...
5.0,Powell St BART Station (Market St at 5th St),37.784369,-122.407998,[{'address_components': [{'long_name': 'Powell...
6.0,The Embarcadero at Sansome St,37.805048,-122.403449,[{'address_components': [{'long_name': 'Sansom...
7.0,Frank H Ogawa Plaz,37.805531,-122.272074,[{'address_components': [{'long_name': 'Frank ...
...,...,...,...,...
375.0,Grove St at Masonic Ave,37.774873,-122.446371,[{'address_components': [{'long_name': 'Masoni...
377.0,Fell St at Stanyan St,37.771911,-122.454095,[{'address_components': [{'long_name': 'Fell S...
378.0,Empire St at 7th St,37.347688,-121.890546,[{'address_components': [{'long_name': '7th St...
380.0,Masonic Ave at Turk St,37.778631,-122.447108,[{'address_components': [{'long_name': 'Turk B...


In [12]:
# Transform date from String to datetime
Zeit_datetime_start = pd.to_datetime(Bike_data['start_time'])
Zeit_datetime_end = pd.to_datetime(Bike_data['end_time'])

In [13]:
Bike_data

Unnamed: 0,start_time,end_time,start_station_id,end_station_id,start_station_name,end_station_name,bike_id,user_type
0,2018-01-31 22:52:36,2018-02-01 19:47:20,120.0,285.0,Mission Dolores Park,Webster St at O'Farrell St,2765,Subscriber
1,2018-01-31 16:13:35,2018-02-01 15:57:18,15.0,15.0,San Francisco Ferry Building (Harry Bridges Pl...,San Francisco Ferry Building (Harry Bridges Pl...,2815,Customer
2,2018-01-31 14:23:56,2018-02-01 10:16:53,304.0,296.0,Jackson St at 5th St,5th St at Virginia St,3039,Customer
3,2018-01-31 14:53:24,2018-02-01 07:51:21,75.0,47.0,Market St at Franklin St,4th St at Harrison St,321,Customer
4,2018-01-31 19:52:25,2018-02-01 06:58:32,74.0,19.0,Laguna St at Hayes St,Post St at Kearny St,617,Subscriber
...,...,...,...,...,...,...,...,...
1863716,2018-12-01 00:11:55,2018-12-01 00:19:49,345.0,81.0,Hubbell St at 16th St,Berry St at 4th St,3035,Subscriber
1863717,2018-12-01 00:02:49,2018-12-01 00:16:50,10.0,58.0,Washington St at Kearny St,Market St at 10th St,2034,Subscriber
1863718,2018-12-01 00:05:28,2018-12-01 00:09:48,245.0,255.0,Downtown Berkeley BART,Virginia St at Shattuck Ave,2243,Subscriber
1863719,2018-12-01 00:03:07,2018-12-01 00:08:00,93.0,126.0,4th St at Mission Bay Blvd S,Esprit Park,545,Subscriber


In [14]:
bike_ids=Bike_data['bike_id'].nunique()

## Split by User_Type

In [15]:
# sort the dataframe
Bike_data.sort_values(by='user_type', axis=0, inplace=True)

# get a list of names
user_types=Bike_data['user_type'].unique().tolist()

# now we can perform a lookup on a 'view' of the dataframe
Subscriber = Bike_data.loc[Bike_data.user_type=='Subscriber']
Customer = Bike_data.loc[Bike_data.user_type=='Customer']

In [16]:
Subscriber

Unnamed: 0,start_time,end_time,start_station_id,end_station_id,start_station_name,end_station_name,bike_id,user_type
1291724,2018-09-18 14:37:53,2018-09-18 14:46:04,5.0,350.0,Powell St BART Station (Market St at 5th St),8th St at Brannan St,1362,Subscriber
1360285,2018-09-07 16:23:51,2018-09-07 16:30:01,98.0,119.0,Valencia St at 16th St,18th St at Noe St,2296,Subscriber
1015179,2018-07-02 02:39:18,2018-07-02 02:58:40,52.0,71.0,McAllister St at Baker St,Broderick St at Oak St,3921,Subscriber
1292044,2018-09-18 13:24:24,2018-09-18 13:28:48,44.0,42.0,Civic Center/UN Plaza BART Station (Market St ...,San Francisco City Hall (Polk St at Grove St),1202,Subscriber
1299885,2018-09-17 12:01:43,2018-09-17 12:04:11,81.0,93.0,Berry St at 4th St,4th St at Mission Bay Blvd S,138,Subscriber
...,...,...,...,...,...,...,...,...
669863,2018-06-24 12:32:13,2018-06-24 12:50:10,6.0,284.0,The Embarcadero at Sansome St,Yerba Buena Center for the Arts (Howard St at ...,2317,Subscriber
669862,2018-06-24 12:31:56,2018-06-24 12:50:15,10.0,74.0,Washington St at Kearny St,Laguna St at Hayes St,3869,Subscriber
669861,2018-06-24 12:45:17,2018-06-24 12:50:27,205.0,172.0,Shafter Ave at Cavour St,College Ave at Taft Ave,3380,Subscriber
676376,2018-06-22 21:36:46,2018-06-22 21:43:08,214.0,180.0,Market St at Brockhurst St,Telegraph Ave at 23rd St,1026,Subscriber


In [17]:
Customer

Unnamed: 0,start_time,end_time,start_station_id,end_station_id,start_station_name,end_station_name,bike_id,user_type
505846,2018-05-21 10:10:15,2018-05-21 10:35:20,6.0,43.0,The Embarcadero at Sansome St,San Francisco Public Library (Grove St at Hyde...,3080,Customer
1213935,2018-09-30 10:30:29,2018-09-30 10:46:31,205.0,205.0,Miles Ave at Cavour St,Miles Ave at Cavour St,1734,Customer
1586692,2018-10-02 16:57:56,2018-10-02 17:06:58,88.0,67.0,11th St at Bryant St,San Francisco Caltrain Station 2 (Townsend St...,3708,Customer
1213933,2018-09-30 10:38:24,2018-09-30 10:46:45,44.0,321.0,Civic Center/UN Plaza BART Station (Market St ...,5th St at Folsom,180,Customer
768116,2018-06-08 17:20:04,2018-06-08 17:36:53,167.0,249.0,College Ave at Harwood Ave,Russell St at College Ave,3505,Customer
...,...,...,...,...,...,...,...,...
881055,2018-07-22 18:34:34,2018-07-22 18:41:53,304.0,279.0,Jackson St at 5th St,Santa Clara St at 7th St,264,Customer
1027696,2018-08-30 15:21:21,2018-08-30 15:37:01,9.0,10.0,Broadway at Battery St,Washington St at Kearny St,2736,Customer
881029,2018-07-22 18:22:07,2018-07-22 18:47:40,23.0,6.0,The Embarcadero at Steuart St,The Embarcadero at Sansome St,3152,Customer
877960,2018-07-23 09:30:28,2018-07-23 09:47:24,92.0,22.0,Mission Bay Kids Park,Howard St at Beale St,803,Customer


In [18]:
Start_Ids=Bike_data.sort_values('start_station_id')['start_station_id'].unique().tolist()
End_Ids=Bike_data.sort_values('end_station_id')['end_station_id'].unique().tolist()

## Data for Plots

In [19]:
Bike_count = Bike_data.groupby('start_station_id').count()
Bike_count = Bike_count.sort_values(by = ('start_station_id') , ascending = True)
Bike_count

Unnamed: 0_level_0,start_time,end_time,end_station_id,start_station_name,end_station_name,bike_id,user_type
start_station_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
3.0,31656,31656,31656,31656,31656,31656,31656
4.0,6058,6058,6058,6058,6058,6058,6058
5.0,25007,25007,25007,25007,25007,25007,25007
6.0,33390,33390,33390,33390,33390,33390,33390
7.0,8658,8658,8658,8658,8658,8658,8658
...,...,...,...,...,...,...,...
375.0,591,591,591,591,591,591,591
377.0,971,971,971,971,971,971,971
378.0,118,118,118,118,118,118,118
380.0,102,102,102,102,102,102,102


In [20]:
Bike_count['end_station_id']
weights = pd.concat([Geodata,Bike_count], axis=1)
weights

Unnamed: 0,station,lat,lng,result,start_time,end_time,end_station_id,start_station_name,end_station_name,bike_id,user_type
3.0,Powell St BART Station (Market St at 4th St),37.784369,-122.407998,[{'address_components': [{'long_name': 'Powell...,31656,31656,31656,31656,31656,31656,31656
4.0,Cyril Magnin St at Ellis St,37.785398,-122.408699,[{'address_components': [{'long_name': 'Ellis ...,6058,6058,6058,6058,6058,6058,6058
5.0,Powell St BART Station (Market St at 5th St),37.784369,-122.407998,[{'address_components': [{'long_name': 'Powell...,25007,25007,25007,25007,25007,25007,25007
6.0,The Embarcadero at Sansome St,37.805048,-122.403449,[{'address_components': [{'long_name': 'Sansom...,33390,33390,33390,33390,33390,33390,33390
7.0,Frank H Ogawa Plaz,37.805531,-122.272074,[{'address_components': [{'long_name': 'Frank ...,8658,8658,8658,8658,8658,8658,8658
...,...,...,...,...,...,...,...,...,...,...,...
375.0,Grove St at Masonic Ave,37.774873,-122.446371,[{'address_components': [{'long_name': 'Masoni...,591,591,591,591,591,591,591
377.0,Fell St at Stanyan St,37.771911,-122.454095,[{'address_components': [{'long_name': 'Fell S...,971,971,971,971,971,971,971
378.0,Empire St at 7th St,37.347688,-121.890546,[{'address_components': [{'long_name': '7th St...,118,118,118,118,118,118,118
380.0,Masonic Ave at Turk St,37.778631,-122.447108,[{'address_components': [{'long_name': 'Turk B...,102,102,102,102,102,102,102


In [21]:
lons = Geodata['lng']
lats = Geodata['lat']
print(lons)
print(lats)


station_id
3.0     -122.407998
4.0     -122.408699
5.0     -122.407998
6.0     -122.403449
7.0     -122.272074
            ...    
375.0   -122.446371
377.0   -122.454095
378.0   -121.890546
380.0   -122.447108
381.0   -122.425874
Name: lng, Length: 348, dtype: float64
station_id
3.0      37.784369
4.0      37.785398
5.0      37.784369
6.0      37.805048
7.0      37.805531
           ...    
375.0    37.774873
377.0    37.771911
378.0    37.347688
380.0    37.778631
381.0    37.758220
Name: lat, Length: 348, dtype: float64


## Heatmaps

In [22]:
meanLons = statistics.mean(lons)
meanLat = statistics.mean(lats)

In [23]:
# Drop unnecessary columns
Bike_start = Bike_data.groupby('start_station_id').describe()
Bike_start = Bike_start.drop( [('end_station_id','std'),('end_station_id','mean'),('end_station_id','min'),('end_station_id','25%'),('end_station_id','50%'),('end_station_id','75%'),('end_station_id','max'),'bike_id'], axis=1)
Bike_start = Bike_start.sort_values(by = ('end_station_id','count') , ascending = False)
Bike_start_Top = Bike_start[Bike_start["end_station_id","count"] > 33500]

In [24]:
for id in Bike_start_Top.index:
    print(id)

15.0
67.0
30.0
58.0
81.0


In [25]:
# Show Map with all stations
mapStations = folium.Map(location=[meanLat, meanLons], zoom_start = 12)
for i in range(0,len(lats)-1):
        folium.CircleMarker([lats.iloc[i], lons.iloc[i]], radius=1, color='blue').add_to(mapStations)
mapStations

In [26]:
# Heatmap with all demands
mapObj = folium.Map(location=[meanLat, meanLons], zoom_start = 12)
# create heatmap layer
#heatmap = HeatMap( list(zip(lats, lons, Start_Fahrten_Summe)),
heatmap = HeatMap( list(zip(lats, lons, weights['end_station_id'])),
                    min_opacity = 0.12,
                   radius=25, blur=15, 
                   max_zoom=1,
                 )
# add heatmap layer to base map

for i in range(0,len(lats)-1):
        folium.CircleMarker([lats.iloc[i], lons.iloc[i]], radius=1, color='blue').add_to(mapObj)

# Mark the 5 Stations with highest Demand 
for id in Bike_start_Top.index:
        folium.CircleMarker([Geodata.loc[id]['lat'],Geodata.loc[id]['lng'] ], radius=10, color='limegreen').add_to(mapObj)
heatmap.add_to(mapObj)
mapObj

In [27]:
# Customer
Bike_count_Customer = Customer.groupby('start_station_id').count()
Bike_count_Customer = Bike_count_Customer.sort_values(by = ('start_station_id') , ascending = True)

weights_Customer = pd.concat([Geodata,Bike_count_Customer], axis=1)

In [28]:
# Heatmap with Customer demands
mapObj_Customer = folium.Map(location=[meanLat, meanLons], zoom_start = 12)
# create heatmap layer
heatmap_Customer = HeatMap( list(zip(lats, lons, weights_Customer['end_station_id'])),
                   min_opacity=0.12,
                   radius=20, blur=15, 
                   max_zoom=1)
# add heatmap layer to base map
for i in range(0,len(lats)-1):
        folium.CircleMarker([lats.iloc[i], lons.iloc[i]], radius=1, color='blue').add_to(mapObj_Customer)
heatmap_Customer.add_to(mapObj_Customer)
mapObj_Customer

In [29]:
# Subscriber
Bike_count_Subscriber = Subscriber.groupby('start_station_id').count()
Bike_count_Subscriber = Bike_count_Subscriber.sort_values(by = ('start_station_id') , ascending = True)

weights_Subscriber = pd.concat([Geodata,Bike_count_Subscriber], axis=1)

In [30]:
# Heatmap with Subscriber demand
mapObj_Subscriber = folium.Map(location=[meanLat, meanLons], zoom_start = 12)
# create heatmap layer
heatmap_Subscriber = HeatMap( list(zip(lats, lons, weights_Subscriber['end_station_id'])),
                   min_opacity=0.12,
                   radius=20, blur=15, 
                   max_zoom=1)
# add heatmap layer to base map
for i in range(0,len(lats)-1):
        folium.CircleMarker([lats.iloc[i], lons.iloc[i]], radius=1, color='blue').add_to(mapObj_Subscriber)
heatmap_Subscriber.add_to(mapObj_Subscriber)
mapObj_Subscriber

Seeing no noticeable difference between Subscriber and Customer (Atleast over a year)

## Heatmaps after Time

## Per Month

In [31]:
# drop unnecessary Columns
Bike_data_Zeit = Bike_data.drop(columns=['bike_id', 'user_type'])
Bike_data_Zeit

Unnamed: 0,start_time,end_time,start_station_id,end_station_id,start_station_name,end_station_name
505846,2018-05-21 10:10:15,2018-05-21 10:35:20,6.0,43.0,The Embarcadero at Sansome St,San Francisco Public Library (Grove St at Hyde...
1213935,2018-09-30 10:30:29,2018-09-30 10:46:31,205.0,205.0,Miles Ave at Cavour St,Miles Ave at Cavour St
1586692,2018-10-02 16:57:56,2018-10-02 17:06:58,88.0,67.0,11th St at Bryant St,San Francisco Caltrain Station 2 (Townsend St...
1213933,2018-09-30 10:38:24,2018-09-30 10:46:45,44.0,321.0,Civic Center/UN Plaza BART Station (Market St ...,5th St at Folsom
768116,2018-06-08 17:20:04,2018-06-08 17:36:53,167.0,249.0,College Ave at Harwood Ave,Russell St at College Ave
...,...,...,...,...,...,...
669863,2018-06-24 12:32:13,2018-06-24 12:50:10,6.0,284.0,The Embarcadero at Sansome St,Yerba Buena Center for the Arts (Howard St at ...
669862,2018-06-24 12:31:56,2018-06-24 12:50:15,10.0,74.0,Washington St at Kearny St,Laguna St at Hayes St
669861,2018-06-24 12:45:17,2018-06-24 12:50:27,205.0,172.0,Shafter Ave at Cavour St,College Ave at Taft Ave
676376,2018-06-22 21:36:46,2018-06-22 21:43:08,214.0,180.0,Market St at Brockhurst St,Telegraph Ave at 23rd St


In [32]:
Bike_data_Zeit['start_time']= pd.to_datetime(Bike_data_Zeit['start_time']) 

Bike_data_Monat = Bike_data_Zeit.groupby([Bike_data_Zeit['start_time'].dt.month, Bike_data_Zeit['start_station_id']]).describe()
Bike_data_Monat = Bike_data_Monat.drop(columns = [('end_station_id','mean'),('end_station_id','std'),('end_station_id','min'),('end_station_id','25%'),('end_station_id','50%'),('end_station_id','75%'),('end_station_id','max')])

In [33]:
result = {}
for i in range(1,13):
    result[i] = pd.concat([Geodata,Bike_data_Monat.loc[i]], axis=1)


In [34]:
maxValue = 0
for i in range(1,13):
    if (maxValue < max(result[i][('end_station_id','count')].tolist())):
        maxValue = max(result[i][('end_station_id','count')].tolist())
maxValue

4712.0

In [35]:
data = {}
betterData ={}
for i in range(1,13):
    #maxIt = max(result[i][('end_station_id','count')].tolist())
    #print(maxIt)
    x = np.array(result[i][('end_station_id','count')])
    x= np.nan_to_num(x)
    data[i] = list(zip(result[i]['lat'].tolist(),result[i]['lng'].tolist(),
                       (x/maxValue).tolist()))
    betterData[i] = [list(entry) for entry in data[i]] 
    #for entry in data[i]:
     #   betterData[i] = betterData[i].append(list(entry))

In [36]:
data_month = []
for i in range(1,13):
    data_month.append(betterData[i])

In [37]:
# Heatmap over months
map_month = folium.Map(location=[meanLat, meanLons], zoom_start = 12)

hm = plugins.HeatMapWithTime(data_month,
                             #max_opacity=1,
                             radius=25, 
                             auto_play=True,
                                blur=0.8
                             
                            )

hm.add_to(map_month)
for i in range(0,len(lats)-1):
        folium.CircleMarker([lats.iloc[i], lons.iloc[i]], radius=1, color='blue').add_to(map_month)
map_month

## Per Hour

In [38]:
Bike_data_Zeit['start_time']= pd.to_datetime(Bike_data_Zeit['start_time']) 

Bike_data_Hour = Bike_data_Zeit.groupby([Bike_data_Zeit['start_time'].dt.hour, Bike_data_Zeit['start_station_id']]).describe()
Bike_data_Hour = Bike_data_Hour.drop(columns = [('end_station_id','mean'),('end_station_id','std'),('end_station_id','min'),('end_station_id','25%'),('end_station_id','50%'),('end_station_id','75%'),('end_station_id','max')])

In [39]:
Bike_data_Hour

Unnamed: 0_level_0,Unnamed: 1_level_0,end_station_id
Unnamed: 0_level_1,Unnamed: 1_level_1,count
start_time,start_station_id,Unnamed: 2_level_2
0,3.0,139.0
0,4.0,45.0
0,5.0,137.0
0,6.0,68.0
0,7.0,64.0
...,...,...
23,372.0,12.0
23,375.0,5.0
23,377.0,10.0
23,380.0,3.0


In [40]:
result_Hour = {}
for i in range(0,24):
    result_Hour[i] = pd.concat([Geodata,Bike_data_Hour.loc[i]], axis=1)


In [41]:
maxValueHour = 0
for i in range(0,24):
    if (maxValueHour < max(result_Hour[i][('end_station_id','count')].tolist())):
        maxValueHour = max(result_Hour[i][('end_station_id','count')].tolist())
maxValueHour

9378.0

In [42]:
dataHour = {}
betterDataHour ={}
for i in range(0,24):
    #maxIt = max(result[i][('end_station_id','count')].tolist())
    #print(maxIt)
    x = np.array(result_Hour[i][('end_station_id','count')])
    x= np.nan_to_num(x)
    dataHour[i] = list(zip(result_Hour[i]['lat'].tolist(),result_Hour[i]['lng'].tolist(),
                       (x/maxValue).tolist()))
    betterDataHour[i] = [list(entry) for entry in dataHour[i]] 
    #for entry in data[i]:
     #   betterData[i] = betterData[i].append(list(entry))
        

In [43]:
dataHour = []
for i in range(0,24):
    dataHour.append(betterDataHour[i])

In [44]:
from datetime import datetime, timedelta

time_index = [
    (datetime.now() + k * timedelta(1)).strftime("%Y-%m-%d") for k in range(len(data))
]

In [45]:
# Heatmap over hours
map_Hour = folium.Map(location=[meanLat, meanLons], zoom_start = 12)

hm = plugins.HeatMapWithTime(dataHour,
                             #max_opacity=1,
                             radius=25, 
                             auto_play=True,
                                blur=0.8,
                             use_local_extrema = False,
                             #gradient = 'red',
                             #scale_radius = True
                            )

hm.add_to(map_Hour)
for i in range(0,len(lats)-1):
        folium.CircleMarker([lats.iloc[i], lons.iloc[i]], radius=1, color='blue').add_to(map_Hour)
map_Hour

In [46]:
Bike_data_Zeit

Unnamed: 0,start_time,end_time,start_station_id,end_station_id,start_station_name,end_station_name
505846,2018-05-21 10:10:15,2018-05-21 10:35:20,6.0,43.0,The Embarcadero at Sansome St,San Francisco Public Library (Grove St at Hyde...
1213935,2018-09-30 10:30:29,2018-09-30 10:46:31,205.0,205.0,Miles Ave at Cavour St,Miles Ave at Cavour St
1586692,2018-10-02 16:57:56,2018-10-02 17:06:58,88.0,67.0,11th St at Bryant St,San Francisco Caltrain Station 2 (Townsend St...
1213933,2018-09-30 10:38:24,2018-09-30 10:46:45,44.0,321.0,Civic Center/UN Plaza BART Station (Market St ...,5th St at Folsom
768116,2018-06-08 17:20:04,2018-06-08 17:36:53,167.0,249.0,College Ave at Harwood Ave,Russell St at College Ave
...,...,...,...,...,...,...
669863,2018-06-24 12:32:13,2018-06-24 12:50:10,6.0,284.0,The Embarcadero at Sansome St,Yerba Buena Center for the Arts (Howard St at ...
669862,2018-06-24 12:31:56,2018-06-24 12:50:15,10.0,74.0,Washington St at Kearny St,Laguna St at Hayes St
669861,2018-06-24 12:45:17,2018-06-24 12:50:27,205.0,172.0,Shafter Ave at Cavour St,College Ave at Taft Ave
676376,2018-06-22 21:36:46,2018-06-22 21:43:08,214.0,180.0,Market St at Brockhurst St,Telegraph Ave at 23rd St


# Hotspots

In [47]:
# Drop unnecessary Columns
Bike_start = Bike_data.groupby('start_station_id').describe()
Bike_start = Bike_start.drop( [('end_station_id','std'),('end_station_id','mean'),('end_station_id','min'),('end_station_id','25%'),('end_station_id','50%'),('end_station_id','75%'),('end_station_id','max'),'bike_id'], axis=1)
Bike_start = Bike_start.sort_values(by = ('end_station_id','count') , ascending = False)
Bike_start_Top = Bike_start[Bike_start["end_station_id","count"] > 33500]
Bike_start_Top

Unnamed: 0_level_0,end_station_id
Unnamed: 0_level_1,count
start_station_id,Unnamed: 1_level_2
15.0,38461.0
67.0,37617.0
30.0,35092.0
58.0,34918.0
81.0,33716.0


In [48]:
Bike_start_Bot = Bike_start[Bike_start["end_station_id","count"] < 5000]
Bike_start_Bot

Unnamed: 0_level_0,end_station_id
Unnamed: 0_level_1,count
start_station_id,Unnamed: 1_level_2
101.0,4982.0
107.0,4939.0
280.0,4935.0
345.0,4852.0
267.0,4845.0
...,...
301.0,93.0
359.0,91.0
358.0,52.0
374.0,25.0
