# Analytics and Application - Descriptive Analysis
## Plotting Geodata

In [1]:
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from datetime import datetime as dt
from dateutil.parser import parse
from mpl_toolkits.basemap import Basemap
import math

import folium
from folium import plugins
from folium.plugins import HeatMap
import folium.plugins as plugins
import statistics

## Vorbereitung der Geodaten

In [2]:
Bike_data = pd.read_csv("sf_2018.csv")
Geodata = pd.read_csv("geo_data.csv")

In [3]:
Bike_stations = Bike_data[['start_station_id','start_station_name']]

In [4]:

# drop duplicates
Bike_data = Bike_data.drop_duplicates()
Bike_stations = Bike_stations.drop_duplicates()

Bike_stations.index

Int64Index([      0,       1,       2,       3,       4,       5,       6,
                  7,       8,       9,
            ...
            1396918, 1397217, 1400394, 1598298, 1598442, 1598523, 1611496,
            1629903, 1732521, 1733596],
           dtype='int64', length=349)

In [5]:
Bike_stations = Bike_stations.sort_values('start_station_id')

Bike_stations = Bike_stations.set_index('start_station_id')
Bike_stations = Bike_stations.dropna()
Bike_stations

Unnamed: 0_level_0,start_station_name
start_station_id,Unnamed: 1_level_1
3.0,Powell St BART Station (Market St at 4th St)
4.0,Cyril Magnin St at Ellis St
5.0,Powell St BART Station (Market St at 5th St)
6.0,The Embarcadero at Sansome St
7.0,Frank H Ogawa Plaza
...,...
375.0,Grove St at Masonic Ave
377.0,Fell St at Stanyan St
378.0,Empire St at 7th St
380.0,Masonic Ave at Turk St


In [6]:
# drop String part "San Franciso"
Geodata['station'] = Geodata['station'].map(lambda x: x.rstrip(', San Francisco'))
Geodata = Geodata.sort_values('station')
Bike_stations = Bike_stations.sort_values('start_station_name')

In [7]:
# Daten sortiert nach Stationsnamen
# hier ist der Index die Id
Bike_stations

Unnamed: 0_level_0,start_station_name
start_station_id,Unnamed: 1_level_1
222.0,10th Ave at E 15th St
201.0,10th St at Fallon St
351.0,10th St at University Ave
88.0,11th St at Bryant St
77.0,11th St at Natoma St
...,...
359.0,Williams Ave at Apollo St
301.0,Willow St at Vine St
272.0,Woolsey St at Sacramento St
272.0,Woolsey St at Sacramento St1


In [8]:
# Hinzufügen der Ids
# Die Ids werden durch den index hinzugefügt, die nach namen sortiert sind
Geodata['station_id'] = Bike_stations.index

In [9]:
# Geodaten mit station_id
Geodata

Unnamed: 0,station,lat,lng,result,station_id
345,10th Ave at E 15th St,37.774929,-122.419415,[{'address_components': [{'long_name': 'San Fr...,222.0
196,10th St at Fallon St,37.772823,-122.412850,[{'address_components': [{'long_name': '10th S...,201.0
295,10th St at University Ave,37.772823,-122.412850,[{'address_components': [{'long_name': '10th S...,351.0
329,11th St at Bryant St,37.769435,-122.410878,[{'address_components': [{'long_name': '11th S...,88.0
264,11th St at Natoma St,37.773533,-122.416139,[{'address_components': [{'long_name': 'Natoma...,77.0
...,...,...,...,...,...
174,Williams Ave at Apollo St,37.730185,-122.398997,[{'address_components': [{'long_name': 'Willia...,359.0
11,Willow St at Vine St,37.774929,-122.419415,[{'address_components': [{'long_name': 'San Fr...,301.0
276,Woolsey St at Sacramento St,37.850627,-122.278320,[{'address_components': [{'long_name': 'Sacram...,272.0
136,Woolsey St at Sacramento St1,37.723993,-122.407455,[{'address_components': [{'long_name': 'Woolse...,272.0


In [10]:
Geodata = Geodata.sort_values('station_id')
#station_id als index
Geodata = Geodata.set_index('station_id')

#Falschen Geodata Eintrag korrigieren

Geodata.at[210,'station'] = '45th St at Manila'
Geodata.at[210,'lat'] = 37.833304
Geodata.at[210,'lng'] = -122.256240
Geodata.drop_duplicates()
Geodata

Unnamed: 0_level_0,station,lat,lng,result
station_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
3.0,Powell St BART Station (Market St at 4th St),37.784369,-122.407998,[{'address_components': [{'long_name': 'Powell...
4.0,Cyril Magnin St at Ellis St,37.785398,-122.408699,[{'address_components': [{'long_name': 'Ellis ...
5.0,Powell St BART Station (Market St at 5th St),37.784369,-122.407998,[{'address_components': [{'long_name': 'Powell...
6.0,The Embarcadero at Sansome St,37.805048,-122.403449,[{'address_components': [{'long_name': 'Sansom...
7.0,Frank H Ogawa Plaz,37.805531,-122.272074,[{'address_components': [{'long_name': 'Frank ...
...,...,...,...,...
375.0,Grove St at Masonic Ave,37.774873,-122.446371,[{'address_components': [{'long_name': 'Masoni...
377.0,Fell St at Stanyan St,37.771911,-122.454095,[{'address_components': [{'long_name': 'Fell S...
378.0,Empire St at 7th St,37.773547,-122.403843,[{'address_components': [{'long_name': '7th St...
380.0,Masonic Ave at Turk St,37.778631,-122.447108,[{'address_components': [{'long_name': 'Turk B...


In [12]:
#  Zeit von String in datetime Typ umwandeln
Zeit_datetime_start = pd.to_datetime(Bike_data['start_time'])
Zeit_datetime_end = pd.to_datetime(Bike_data['end_time'])

In [13]:
Bike_data

Unnamed: 0,start_time,end_time,start_station_id,end_station_id,start_station_name,end_station_name,bike_id,user_type
0,2018-01-31 22:52:36,2018-02-01 19:47:20,120.0,285.0,Mission Dolores Park,Webster St at O'Farrell St,2765,Subscriber
1,2018-01-31 16:13:35,2018-02-01 15:57:18,15.0,15.0,San Francisco Ferry Building (Harry Bridges Pl...,San Francisco Ferry Building (Harry Bridges Pl...,2815,Customer
2,2018-01-31 14:23:56,2018-02-01 10:16:53,304.0,296.0,Jackson St at 5th St,5th St at Virginia St,3039,Customer
3,2018-01-31 14:53:24,2018-02-01 07:51:21,75.0,47.0,Market St at Franklin St,4th St at Harrison St,321,Customer
4,2018-01-31 19:52:25,2018-02-01 06:58:32,74.0,19.0,Laguna St at Hayes St,Post St at Kearny St,617,Subscriber
...,...,...,...,...,...,...,...,...
1863716,2018-12-01 00:11:55,2018-12-01 00:19:49,345.0,81.0,Hubbell St at 16th St,Berry St at 4th St,3035,Subscriber
1863717,2018-12-01 00:02:49,2018-12-01 00:16:50,10.0,58.0,Washington St at Kearny St,Market St at 10th St,2034,Subscriber
1863718,2018-12-01 00:05:28,2018-12-01 00:09:48,245.0,255.0,Downtown Berkeley BART,Virginia St at Shattuck Ave,2243,Subscriber
1863719,2018-12-01 00:03:07,2018-12-01 00:08:00,93.0,126.0,4th St at Mission Bay Blvd S,Esprit Park,545,Subscriber


## Sortieren nach User_Type

In [14]:
# sort the dataframe
Bike_data.sort_values(by='user_type', axis=0, inplace=True)

# get a list of names
user_types=Bike_data['user_type'].unique().tolist()

# now we can perform a lookup on a 'view' of the dataframe
Subscriber = Bike_data.loc[Bike_data.user_type=='Subscriber']
Customer = Bike_data.loc[Bike_data.user_type=='Customer']

In [15]:
Subscriber

Unnamed: 0,start_time,end_time,start_station_id,end_station_id,start_station_name,end_station_name,bike_id,user_type
1291724,2018-09-18 14:37:53,2018-09-18 14:46:04,5.0,350.0,Powell St BART Station (Market St at 5th St),8th St at Brannan St,1362,Subscriber
1360285,2018-09-07 16:23:51,2018-09-07 16:30:01,98.0,119.0,Valencia St at 16th St,18th St at Noe St,2296,Subscriber
1015179,2018-07-02 02:39:18,2018-07-02 02:58:40,52.0,71.0,McAllister St at Baker St,Broderick St at Oak St,3921,Subscriber
1292044,2018-09-18 13:24:24,2018-09-18 13:28:48,44.0,42.0,Civic Center/UN Plaza BART Station (Market St ...,San Francisco City Hall (Polk St at Grove St),1202,Subscriber
1299885,2018-09-17 12:01:43,2018-09-17 12:04:11,81.0,93.0,Berry St at 4th St,4th St at Mission Bay Blvd S,138,Subscriber
...,...,...,...,...,...,...,...,...
669863,2018-06-24 12:32:13,2018-06-24 12:50:10,6.0,284.0,The Embarcadero at Sansome St,Yerba Buena Center for the Arts (Howard St at ...,2317,Subscriber
669862,2018-06-24 12:31:56,2018-06-24 12:50:15,10.0,74.0,Washington St at Kearny St,Laguna St at Hayes St,3869,Subscriber
669861,2018-06-24 12:45:17,2018-06-24 12:50:27,205.0,172.0,Shafter Ave at Cavour St,College Ave at Taft Ave,3380,Subscriber
676376,2018-06-22 21:36:46,2018-06-22 21:43:08,214.0,180.0,Market St at Brockhurst St,Telegraph Ave at 23rd St,1026,Subscriber


In [16]:
Customer

Unnamed: 0,start_time,end_time,start_station_id,end_station_id,start_station_name,end_station_name,bike_id,user_type
505846,2018-05-21 10:10:15,2018-05-21 10:35:20,6.0,43.0,The Embarcadero at Sansome St,San Francisco Public Library (Grove St at Hyde...,3080,Customer
1213935,2018-09-30 10:30:29,2018-09-30 10:46:31,205.0,205.0,Miles Ave at Cavour St,Miles Ave at Cavour St,1734,Customer
1586692,2018-10-02 16:57:56,2018-10-02 17:06:58,88.0,67.0,11th St at Bryant St,San Francisco Caltrain Station 2 (Townsend St...,3708,Customer
1213933,2018-09-30 10:38:24,2018-09-30 10:46:45,44.0,321.0,Civic Center/UN Plaza BART Station (Market St ...,5th St at Folsom,180,Customer
768116,2018-06-08 17:20:04,2018-06-08 17:36:53,167.0,249.0,College Ave at Harwood Ave,Russell St at College Ave,3505,Customer
...,...,...,...,...,...,...,...,...
881055,2018-07-22 18:34:34,2018-07-22 18:41:53,304.0,279.0,Jackson St at 5th St,Santa Clara St at 7th St,264,Customer
1027696,2018-08-30 15:21:21,2018-08-30 15:37:01,9.0,10.0,Broadway at Battery St,Washington St at Kearny St,2736,Customer
881029,2018-07-22 18:22:07,2018-07-22 18:47:40,23.0,6.0,The Embarcadero at Steuart St,The Embarcadero at Sansome St,3152,Customer
877960,2018-07-23 09:30:28,2018-07-23 09:47:24,92.0,22.0,Mission Bay Kids Park,Howard St at Beale St,803,Customer


In [17]:
Start_Ids=Bike_data.sort_values('start_station_id')['start_station_id'].unique().tolist()
End_Ids=Bike_data.sort_values('end_station_id')['end_station_id'].unique().tolist()

## Daten für Plots

### Die folgende Vorbereitung schien fehlerhafte Zahlen wegen Duplikaten aufzuweisen
Ein Abschnitt drunter haben wir eine andere Methode verwendet

In [18]:
Data_Collection = {}
Data_Collection_Subscriber = {}
Data_Collection_Customer = {}

In [19]:
# Sortieren aller Daten nach Start_Id
for Id in Bike_stations.index:
    Data_Collection[Id] = Bike_data.loc[Bike_data.start_station_id==Id]
        
# Analog mit Subscriber und Costumer Fahrten
for Id in Bike_stations.index:
    Data_Collection_Subscriber[Id] = Subscriber.loc[Subscriber.start_station_id==Id]
    
for Id in Bike_stations.index:
    Data_Collection_Customer[Id] = Customer.loc[Customer.start_station_id==Id]
   

In [20]:
# Erstellen eines Dataframes zum ablesen der Fahrten von A nach B
Anzahl_Fahrten = pd.DataFrame(index=Bike_stations.index)
for Id in Bike_stations.index:
    ZwischenList = Data_Collection[Id]['end_station_id'].value_counts().sort_index()
    ZwischenList.name = Id
    Anzahl_Fahrten = pd.concat([Anzahl_Fahrten,ZwischenList], axis=1)
    
Anzahl_Fahrten.index.name = 'end_station_id'
Anzahl_Fahrten.columns.name = 'start_station_id'
Anzahl_Fahrten = Anzahl_Fahrten.T

In [21]:
# Analog für Subscriber
Anzahl_Fahrten_Subscriber = pd.DataFrame(index=Bike_stations.index)
for Id in Bike_stations.index:
    ZwischenList = Data_Collection_Subscriber[Id]['end_station_id'].value_counts().sort_index()
    ZwischenList.name = Id
    Anzahl_Fahrten_Subscriber = pd.concat([Anzahl_Fahrten_Subscriber,ZwischenList], axis=1)
    
Anzahl_Fahrten_Subscriber.index.name = 'end_station_id'
Anzahl_Fahrten_Subscriber.columns.name = 'start_station_id'
Anzahl_Fahrten_Subscriber = Anzahl_Fahrten_Subscriber.T

In [22]:
# Analog für Customer
Anzahl_Fahrten_Customer = pd.DataFrame(index=Bike_stations.index)
for Id in Bike_stations.index:
    ZwischenList = Data_Collection_Customer[Id]['end_station_id'].value_counts().sort_index()
    ZwischenList.name = Id
    Anzahl_Fahrten_Customer = pd.concat([Anzahl_Fahrten_Customer,ZwischenList], axis=1)
    
Anzahl_Fahrten_Customer.index.name = 'end_station_id'
Anzahl_Fahrten_Customer.columns.name = 'start_station_id'
Anzahl_Fahrten_Customer = Anzahl_Fahrten_Customer.T

In [23]:
Anzahl_Fahrten

end_station_id,222.0,201.0,351.0,88.0,77.0,233.0,221.0,338.0,231.0,230.0,...,190.0,262.0,295.0,208.0,358.0,359.0,301.0,272.0,272.0,284.0
start_station_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
222.0,17.0,86.0,1.0,,,10.0,10.0,49.0,,,...,,,,,,,,,,
201.0,68.0,133.0,,,,546.0,106.0,11.0,2.0,13.0,...,1.0,,,,,,,,,
351.0,,,37.0,,,1.0,,,,11.0,...,,10.0,,,,,,8.0,8.0,
88.0,,,,132.0,227.0,,,,,,...,,,,,1.0,1.0,,,,65.0
77.0,,,,248.0,98.0,,,,,,...,,,,,,1.0,,,,68.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
359.0,,,,1.0,1.0,,,,,,...,,,,,8.0,6.0,,,,
301.0,,,,,,,,,,,...,,,,1.0,,,13.0,,,
272.0,,2.0,5.0,,,,,3.0,,,...,,5.0,,,,,,53.0,53.0,
272.0,,2.0,5.0,,,,,3.0,,,...,,5.0,,,,,,53.0,53.0,


In [24]:
Anzahl_Fahrten.sum(axis=1)[15]

38562.0

In [25]:
lons = Geodata['lng']
lats = Geodata['lat']


# Summe der Fahrten (Start)
Start_Fahrten_Summe = Anzahl_Fahrten.sum(axis=1)
Start_Fahrten_Summe = Start_Fahrten_Summe.sort_index()

# Divide each entry by max(Anzahl_Fahrten)
fac = max(Start_Fahrten_Summe)
print(fac)
Start_Fahrten_Summe = Start_Fahrten_Summe.div(fac)
#sizes = sizes.reset_index(drop=True)
#sizes

# Summe der Fahrten (End)
End_Fahrten_Summe = Anzahl_Fahrten.sum(axis=0)
End_Fahrten_Summe = End_Fahrten_Summe.sort_index()

fac_end = max(End_Fahrten_Summe)*(1/1000)
sizes_end = End_Fahrten_Summe.div(fac_end)
sizes_end = sizes_end.reset_index(drop =True)
Start_Fahrten_Summe
#sizes

38635.0


start_station_id
3.0      0.826116
4.0      0.158354
5.0      0.656995
6.0      0.866520
7.0      0.245373
           ...   
375.0    0.015297
377.0    0.025210
378.0    0.003417
380.0    0.002640
381.0    0.006756
Length: 348, dtype: float64

### Alternative Vorbereitung der Daten

In [79]:
Bike_count = Bike_data.groupby('start_station_id').count()
Bike_count = Bike_count.sort_values(by = ('start_station_id') , ascending = True)
Bike_count

Unnamed: 0_level_0,start_time,end_time,end_station_id,start_station_name,end_station_name,bike_id,user_type
start_station_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
3.0,31656,31656,31656,31656,31656,31656,31656
4.0,6058,6058,6058,6058,6058,6058,6058
5.0,25007,25007,25007,25007,25007,25007,25007
6.0,33390,33390,33390,33390,33390,33390,33390
7.0,8658,8658,8658,8658,8658,8658,8658
...,...,...,...,...,...,...,...
375.0,591,591,591,591,591,591,591
377.0,971,971,971,971,971,971,971
378.0,118,118,118,118,118,118,118
380.0,102,102,102,102,102,102,102


In [80]:
Bike_count['end_station_id']
weights = pd.concat([Geodata,Bike_count_Top], axis=1)
weights

Unnamed: 0,station,lat,lng,result,start_time,end_time,end_station_id,start_station_name,end_station_name,bike_id,user_type
3.0,Powell St BART Station (Market St at 4th St),37.784369,-122.407998,[{'address_components': [{'long_name': 'Powell...,31656,31656,31656,31656,31656,31656,31656
4.0,Cyril Magnin St at Ellis St,37.785398,-122.408699,[{'address_components': [{'long_name': 'Ellis ...,6058,6058,6058,6058,6058,6058,6058
5.0,Powell St BART Station (Market St at 5th St),37.784369,-122.407998,[{'address_components': [{'long_name': 'Powell...,25007,25007,25007,25007,25007,25007,25007
6.0,The Embarcadero at Sansome St,37.805048,-122.403449,[{'address_components': [{'long_name': 'Sansom...,33390,33390,33390,33390,33390,33390,33390
7.0,Frank H Ogawa Plaz,37.805531,-122.272074,[{'address_components': [{'long_name': 'Frank ...,8658,8658,8658,8658,8658,8658,8658
...,...,...,...,...,...,...,...,...,...,...,...
375.0,Grove St at Masonic Ave,37.774873,-122.446371,[{'address_components': [{'long_name': 'Masoni...,591,591,591,591,591,591,591
377.0,Fell St at Stanyan St,37.771911,-122.454095,[{'address_components': [{'long_name': 'Fell S...,971,971,971,971,971,971,971
378.0,Empire St at 7th St,37.773547,-122.403843,[{'address_components': [{'long_name': '7th St...,118,118,118,118,118,118,118
380.0,Masonic Ave at Turk St,37.778631,-122.447108,[{'address_components': [{'long_name': 'Turk B...,102,102,102,102,102,102,102


In [26]:
lons = Geodata['lng']
lats = Geodata['lat']
print(lons)
print(lats)


station_id
3.0     -122.407998
4.0     -122.408699
5.0     -122.407998
6.0     -122.403449
7.0     -122.272074
            ...    
375.0   -122.446371
377.0   -122.454095
378.0   -122.403843
380.0   -122.447108
381.0   -122.425874
Name: lng, Length: 348, dtype: float64
station_id
3.0      37.784369
4.0      37.785398
5.0      37.784369
6.0      37.805048
7.0      37.805531
           ...    
375.0    37.774873
377.0    37.771911
378.0    37.773547
380.0    37.778631
381.0    37.758220
Name: lat, Length: 348, dtype: float64


## Heatmaps

In [27]:
meanLons = statistics.mean(lons)
meanLat = statistics.mean(lats)

In [28]:
# Entfernen von unnötigen Spalten
Bike_start = Bike_data.groupby('start_station_id').describe()
Bike_start = Bike_start.drop( [('end_station_id','std'),('end_station_id','mean'),('end_station_id','min'),('end_station_id','25%'),('end_station_id','50%'),('end_station_id','75%'),('end_station_id','max'),'bike_id'], axis=1)
Bike_start = Bike_start.sort_values(by = ('end_station_id','count') , ascending = False)
Bike_start_Top = Bike_start[Bike_start["end_station_id","count"] > 33500]

In [31]:
for id in Bike_start_Top.index:
    print(id)

15.0
67.0
30.0
58.0
81.0


In [39]:
mapObj = folium.Map(location=[meanLat, meanLons], zoom_start = 15)
# create heatmap layer
#heatmap = HeatMap( list(zip(lats, lons, Start_Fahrten_Summe)),
heatmap = HeatMap( list(zip(lats, lons, weights['end_station_id'])),
                    min_opacity = 0,
                   radius=25, blur=15, 
                   max_zoom=1,
                 )
# add heatmap layer to base map
for i in range(0,len(lats)-1):
        folium.CircleMarker([lats.iloc[i], lons.iloc[i]], radius=1, color='blue').add_to(mapObj)

# Mark the 5 Stations with highest Demand 
for id in Bike_start_Top.index:
        folium.CircleMarker([Geodata.loc[id]['lat'],Geodata.loc[id]['lng'] ], radius=10, color='magenta').add_to(mapObj)
heatmap.add_to(mapObj)
mapObj

In [87]:
# Customer
# Summe der Fahrten (Start)
Start_Fahrten_Summe_Customer = Anzahl_Fahrten_Customer.sum(axis=1)
Start_Fahrten_Summe_Customer= Start_Fahrten_Summe_Customer.sort_index()
# Divide each entry by max(Anzahl_Fahrten)
fac_Customer = max(Start_Fahrten_Summe_Customer)*(1/1000)
sizes_Customer = Start_Fahrten_Summe_Customer.div(fac)
sizes_Customer = sizes_Customer.reset_index(drop=True)


#### Alternative
Bike_count_Customer = Customer.groupby('start_station_id').count()
Bike_count_Customer = Bike_count_Customer.sort_values(by = ('start_station_id') , ascending = True)

weights_Customer = pd.concat([Geodata,Bike_count_Customer], axis=1)

In [88]:
#Customer
mapObj_Customer = folium.Map(location=[meanLat, meanLons], zoom_start = 15)
# create heatmap layer
heatmap_Customer = HeatMap( list(zip(lats, lons, weights_Customer['end_station_id'])),
                   min_opacity=0.25,
                   radius=20, blur=5, 
                   max_zoom=1)
# add heatmap layer to base map
for i in range(0,len(lats)-1):
        folium.CircleMarker([lats.iloc[i], lons.iloc[i]], radius=1, color='blue').add_to(mapObj_Customer)
heatmap_Customer.add_to(mapObj_Customer)
mapObj_Customer

In [89]:
# Subscriber
# Summe der Fahrten (Start)
Start_Fahrten_Summe_Subscriber = Anzahl_Fahrten_Subscriber.sum(axis=1)
Start_Fahrten_Summe_Subscriber = Start_Fahrten_Summe_Subscriber.sort_index()
# Divide each entry by max(Anzahl_Fahrten)
fac_Subscriber = max(Start_Fahrten_Summe_Subscriber)*(1/1000)
sizes_Subscriber = Start_Fahrten_Summe_Subscriber.div(fac)
sizes_Subscriber = sizes_Subscriber.reset_index(drop=True)

#### Alternative
Bike_count_Subscriber = Subscriber.groupby('start_station_id').count()
Bike_count_Subscriber = Bike_count_Subscriber.sort_values(by = ('start_station_id') , ascending = True)

weights_Subscriber = pd.concat([Geodata,Bike_count_Subscriber], axis=1)

In [86]:
# Subscriber
mapObj_Subscriber = folium.Map(location=[meanLat, meanLons], zoom_start = 15)
# create heatmap layer
heatmap_Subscriber = HeatMap( list(zip(lats, lons, weights_Subscriber['end_station_id'])),
                   min_opacity=0.25,
                   radius=20, blur=5, 
                   max_zoom=1)
# add heatmap layer to base map
for i in range(0,len(lats)-1):
        folium.CircleMarker([lats.iloc[i], lons.iloc[i]], radius=1, color='blue').add_to(mapObj_Subscriber)
heatmap_Subscriber.add_to(mapObj_Subscriber)
mapObj_Subscriber

Seeing no noticeable difference between Subscriber and Customer (Atleast over a year)

In [48]:
#mTest = folium.Map(location=[meanLat, meanLons], zoom_start = 15)
#route = [ [Geodata.loc[3]['lat'],Geodata.loc[len(Geodata)-1]['lng']] , [Geodata.loc[4]['lat'],Geodata.loc[4]['lng']]]
#route2 = [ [Geodata.loc[10]['lat'],Geodata.loc[10]['lng']] , [Geodata.loc[4]['lat'],Geodata.loc[4]['lng']]]
#folium.PolyLine(route, color="red", weight=2.5, opacity=1).add_to(mTest)
#folium.PolyLine(route2, color="red", weight=2.5, opacity=1).add_to(mTest)
#mTest

In [49]:
Anzahl_Fahrten = Anzahl_Fahrten.sort_index()
Anzahl_Fahrten = Anzahl_Fahrten.T
Anzahl_Fahrten = Anzahl_Fahrten.sort_index()
Anzahl_Fahrten = Anzahl_Fahrten.T

In [50]:
Anzahl_Fahrten

end_station_id,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,13.0,...,370.0,371.0,372.0,373.0,374.0,375.0,377.0,378.0,380.0,381.0
start_station_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
3.0,706.0,102.0,500.0,1086.0,,259.0,135.0,380.0,545.0,273.0,...,34.0,61.0,,1.0,,4.0,14.0,,,2.0
4.0,97.0,211.0,113.0,236.0,,43.0,22.0,63.0,21.0,27.0,...,15.0,15.0,,,,,4.0,,,1.0
5.0,396.0,109.0,679.0,509.0,,105.0,75.0,227.0,71.0,223.0,...,14.0,35.0,,,,10.0,14.0,,2.0,1.0
6.0,776.0,153.0,594.0,2534.0,,1130.0,440.0,368.0,717.0,275.0,...,20.0,211.0,,,,2.0,,,,
7.0,1.0,,1.0,2.0,261.0,,,,,,...,,,76.0,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
375.0,7.0,4.0,24.0,3.0,,1.0,,1.0,,5.0,...,2.0,9.0,,,,43.0,21.0,,4.0,
377.0,20.0,1.0,4.0,14.0,,3.0,,7.0,2.0,13.0,...,,13.0,,1.0,,22.0,226.0,,11.0,
378.0,,,,,,,,,,,...,,,,,,,,3.0,,
380.0,,,3.0,,,,,1.0,,,...,,2.0,,,,9.0,7.0,,11.0,


## Heatmaps after Time

## Per Month

In [51]:
# unnötige Spalten droppen
Bike_data_Zeit = Bike_data.drop(columns=['bike_id', 'user_type'])
Bike_data_Zeit

Unnamed: 0,start_time,end_time,start_station_id,end_station_id,start_station_name,end_station_name
505846,2018-05-21 10:10:15,2018-05-21 10:35:20,6.0,43.0,The Embarcadero at Sansome St,San Francisco Public Library (Grove St at Hyde...
1213935,2018-09-30 10:30:29,2018-09-30 10:46:31,205.0,205.0,Miles Ave at Cavour St,Miles Ave at Cavour St
1586692,2018-10-02 16:57:56,2018-10-02 17:06:58,88.0,67.0,11th St at Bryant St,San Francisco Caltrain Station 2 (Townsend St...
1213933,2018-09-30 10:38:24,2018-09-30 10:46:45,44.0,321.0,Civic Center/UN Plaza BART Station (Market St ...,5th St at Folsom
768116,2018-06-08 17:20:04,2018-06-08 17:36:53,167.0,249.0,College Ave at Harwood Ave,Russell St at College Ave
...,...,...,...,...,...,...
669863,2018-06-24 12:32:13,2018-06-24 12:50:10,6.0,284.0,The Embarcadero at Sansome St,Yerba Buena Center for the Arts (Howard St at ...
669862,2018-06-24 12:31:56,2018-06-24 12:50:15,10.0,74.0,Washington St at Kearny St,Laguna St at Hayes St
669861,2018-06-24 12:45:17,2018-06-24 12:50:27,205.0,172.0,Shafter Ave at Cavour St,College Ave at Taft Ave
676376,2018-06-22 21:36:46,2018-06-22 21:43:08,214.0,180.0,Market St at Brockhurst St,Telegraph Ave at 23rd St


In [52]:
Bike_data_Zeit['start_time']= pd.to_datetime(Bike_data_Zeit['start_time']) 

Bike_data_Monat = Bike_data_Zeit.groupby([Bike_data_Zeit['start_time'].dt.month, Bike_data_Zeit['start_station_id']]).describe()
Bike_data_Monat = Bike_data_Monat.drop(columns = [('end_station_id','mean'),('end_station_id','std'),('end_station_id','min'),('end_station_id','25%'),('end_station_id','50%'),('end_station_id','75%'),('end_station_id','max')])

In [54]:
result = {}
for i in range(1,13):
    result[i] = pd.concat([Geodata,Bike_data_Monat.loc[i]], axis=1)


In [55]:
#maxTest = max(result[1][('end_station_id','count')].tolist())
#(np.array(result[1][('end_station_id','count')].tolist())/max).tolist()
#maxTest
test = max(result[1][('end_station_id','count')].tolist())

In [56]:
maxValue = 0
for i in range(1,13):
    if (maxValue < max(result[i][('end_station_id','count')].tolist())):
        maxValue = max(result[i][('end_station_id','count')].tolist())
maxValue

4712.0

In [57]:
data = {}
betterData ={}
for i in range(1,13):
    #maxIt = max(result[i][('end_station_id','count')].tolist())
    #print(maxIt)
    x = np.array(result[i][('end_station_id','count')])
    x= np.nan_to_num(x)
    data[i] = list(zip(result[i]['lat'].tolist(),result[i]['lng'].tolist(),
                       (x/maxValue).tolist()))
    betterData[i] = [list(entry) for entry in data[i]] 
    #for entry in data[i]:
     #   betterData[i] = betterData[i].append(list(entry))

In [58]:
data

{1: [(37.7843693, -122.4079981, 0.34401528013582344),
  (37.7853977, -122.4086987, 0.07236842105263158),
  (37.7843693, -122.4079981, 0.2720713073005093),
  (37.8050476, -122.4034486, 0.38582342954159593),
  (37.8055309, -122.2720741, 0.12224108658743633),
  (37.7967779, -122.4229762, 0.12224108658743633),
  (37.7985907, -122.4010735, 0.11820882852292021),
  (37.7953329, -122.4049789, 0.06451612903225806),
  (37.7970989, -122.3983614, 0.10547538200339558),
  (37.7942315, -122.4030656, 0.10314091680814941),
  (37.7950194, -122.4003647, 0.13688455008488964),
  (37.7954425, -122.3936136, 0.4598896434634975),
  (37.7944659, -122.3947991, 0.350169779286927),
  (37.7928912, -122.3970156, 0.2640067911714771),
  (37.8501936, -122.260515, 0.02928692699490662),
  (37.7889065, -122.4036866, 0.16765704584040747),
  (37.791456, -122.3991363, 0.1453735144312394),
  (37.7895433, -122.401063, 0.3607809847198642),
  (37.7899114, -122.3943048, 0.3370118845500849),
  (37.7930839, -122.393019, 0.209040747

In [59]:
data_month = []
for i in range(1,13):
    data_month.append(betterData[i])
data_month

[[[37.7843693, -122.4079981, 0.34401528013582344],
  [37.7853977, -122.4086987, 0.07236842105263158],
  [37.7843693, -122.4079981, 0.2720713073005093],
  [37.8050476, -122.4034486, 0.38582342954159593],
  [37.8055309, -122.2720741, 0.12224108658743633],
  [37.7967779, -122.4229762, 0.12224108658743633],
  [37.7985907, -122.4010735, 0.11820882852292021],
  [37.7953329, -122.4049789, 0.06451612903225806],
  [37.7970989, -122.3983614, 0.10547538200339558],
  [37.7942315, -122.4030656, 0.10314091680814941],
  [37.7950194, -122.4003647, 0.13688455008488964],
  [37.7954425, -122.3936136, 0.4598896434634975],
  [37.7944659, -122.3947991, 0.350169779286927],
  [37.7928912, -122.3970156, 0.2640067911714771],
  [37.8501936, -122.260515, 0.02928692699490662],
  [37.7889065, -122.4036866, 0.16765704584040747],
  [37.791456, -122.3991363, 0.1453735144312394],
  [37.7895433, -122.401063, 0.3607809847198642],
  [37.7899114, -122.3943048, 0.3370118845500849],
  [37.7930839, -122.393019, 0.209040747028

In [60]:
map_month = folium.Map(location=[meanLat, meanLons], zoom_start = 12)

hm = plugins.HeatMapWithTime(data_month,
                             #max_opacity=1,
                             radius=25, 
                             auto_play=True,
                                blur=0.8
                             
                            )

hm.add_to(map_month)
for i in range(0,len(lats)-1):
        folium.CircleMarker([lats.iloc[i], lons.iloc[i]], radius=1, color='blue').add_to(map_month)
map_month

In [61]:
help( plugins.HeatMapWithTime)

Help on class HeatMapWithTime in module folium.plugins.heat_map_withtime:

class HeatMapWithTime(folium.elements.JSCSSMixin, folium.map.Layer)
 |  HeatMapWithTime(data, index=None, name=None, radius=15, blur=0.8, min_opacity=0, max_opacity=0.6, scale_radius=False, gradient=None, use_local_extrema=False, auto_play=False, display_index=True, index_steps=1, min_speed=0.1, max_speed=10, speed_step=0.1, position='bottomleft', overlay=True, control=True, show=True)
 |  
 |  Create a HeatMapWithTime layer
 |  
 |  Parameters
 |  ----------
 |  data: list of list of points of the form [lat, lng] or [lat, lng, weight]
 |      The points you want to plot. The outer list corresponds to the various time
 |      steps in sequential order. (weight is in (0, 1] range and defaults to 1 if
 |      not specified for a point)
 |  index: Index giving the label (or timestamp) of the elements of data. Should have
 |      the same length as data, or is replaced by a simple count if not specified.
 |  name : 

## Per Hour

In [62]:
Bike_data_Zeit['start_time']= pd.to_datetime(Bike_data_Zeit['start_time']) 

Bike_data_Hour = Bike_data_Zeit.groupby([Bike_data_Zeit['start_time'].dt.hour, Bike_data_Zeit['start_station_id']]).describe()
Bike_data_Hour = Bike_data_Hour.drop(columns = [('end_station_id','mean'),('end_station_id','std'),('end_station_id','min'),('end_station_id','25%'),('end_station_id','50%'),('end_station_id','75%'),('end_station_id','max')])

In [63]:
Bike_data_Hour

Unnamed: 0_level_0,Unnamed: 1_level_0,end_station_id
Unnamed: 0_level_1,Unnamed: 1_level_1,count
start_time,start_station_id,Unnamed: 2_level_2
0,3.0,139.0
0,4.0,45.0
0,5.0,137.0
0,6.0,68.0
0,7.0,64.0
...,...,...
23,372.0,12.0
23,375.0,5.0
23,377.0,10.0
23,380.0,3.0


In [64]:
result_Hour = {}
for i in range(0,24):
    result_Hour[i] = pd.concat([Geodata,Bike_data_Hour.loc[i]], axis=1)


In [65]:
maxValueHour = 0
for i in range(0,24):
    if (maxValueHour < max(result_Hour[i][('end_station_id','count')].tolist())):
        maxValueHour = max(result_Hour[i][('end_station_id','count')].tolist())
maxValueHour

9378.0

In [66]:
dataHour = {}
betterDataHour ={}
for i in range(0,24):
    #maxIt = max(result[i][('end_station_id','count')].tolist())
    #print(maxIt)
    x = np.array(result_Hour[i][('end_station_id','count')])
    x= np.nan_to_num(x)
    dataHour[i] = list(zip(result_Hour[i]['lat'].tolist(),result_Hour[i]['lng'].tolist(),
                       (x/maxValue).tolist()))
    betterDataHour[i] = [list(entry) for entry in dataHour[i]] 
    #for entry in data[i]:
     #   betterData[i] = betterData[i].append(list(entry))
        

In [67]:
betterDataHour[0]

[[37.7843693, -122.4079981, 0.029499151103565366],
 [37.7853977, -122.4086987, 0.009550084889643463],
 [37.7843693, -122.4079981, 0.029074702886247878],
 [37.8050476, -122.4034486, 0.014431239388794566],
 [37.8055309, -122.2720741, 0.013582342954159592],
 [37.7967779, -122.4229762, 0.013582342954159592],
 [37.7985907, -122.4010735, 0.004244482173174873],
 [37.7953329, -122.4049789, 0.00870118845500849],
 [37.7970989, -122.3983614, 0.00870118845500849],
 [37.7942315, -122.4030656, 0.004244482173174873],
 [37.7950194, -122.4003647, 0.009550084889643463],
 [37.7954425, -122.3936136, 0.01825127334465195],
 [37.7944659, -122.3947991, 0.010823429541595925],
 [37.7928912, -122.3970156, 0.017190152801358233],
 [37.8501936, -122.260515, 0.00233446519524618],
 [37.7889065, -122.4036866, 0.015492359932088286],
 [37.791456, -122.3991363, 0.008064516129032258],
 [37.7895433, -122.401063, 0.018463497453310697],
 [37.7899114, -122.3943048, 0.013157894736842105],
 [37.7930839, -122.393019, 0.004244482

In [68]:
dataHour = []
for i in range(0,24):
    dataHour.append(betterDataHour[i])
dataHour

[[[37.7843693, -122.4079981, 0.029499151103565366],
  [37.7853977, -122.4086987, 0.009550084889643463],
  [37.7843693, -122.4079981, 0.029074702886247878],
  [37.8050476, -122.4034486, 0.014431239388794566],
  [37.8055309, -122.2720741, 0.013582342954159592],
  [37.7967779, -122.4229762, 0.013582342954159592],
  [37.7985907, -122.4010735, 0.004244482173174873],
  [37.7953329, -122.4049789, 0.00870118845500849],
  [37.7970989, -122.3983614, 0.00870118845500849],
  [37.7942315, -122.4030656, 0.004244482173174873],
  [37.7950194, -122.4003647, 0.009550084889643463],
  [37.7954425, -122.3936136, 0.01825127334465195],
  [37.7944659, -122.3947991, 0.010823429541595925],
  [37.7928912, -122.3970156, 0.017190152801358233],
  [37.8501936, -122.260515, 0.00233446519524618],
  [37.7889065, -122.4036866, 0.015492359932088286],
  [37.791456, -122.3991363, 0.008064516129032258],
  [37.7895433, -122.401063, 0.018463497453310697],
  [37.7899114, -122.3943048, 0.013157894736842105],
  [37.7930839, -122

In [69]:
from datetime import datetime, timedelta

time_index = [
    (datetime.now() + k * timedelta(1)).strftime("%Y-%m-%d") for k in range(len(data))
]

In [70]:
map_Hour = folium.Map(location=[meanLat, meanLons], zoom_start = 12)

hm = plugins.HeatMapWithTime(dataHour,
                             #max_opacity=1,
                             radius=25, 
                             auto_play=True,
                                blur=0.8,
                             use_local_extrema = False
                            )

hm.add_to(map_Hour)
for i in range(0,len(lats)-1):
        folium.CircleMarker([lats.iloc[i], lons.iloc[i]], radius=1, color='blue').add_to(map_Hour)
map_Hour

In [71]:
Bike_data_Zeit

Unnamed: 0,start_time,end_time,start_station_id,end_station_id,start_station_name,end_station_name
505846,2018-05-21 10:10:15,2018-05-21 10:35:20,6.0,43.0,The Embarcadero at Sansome St,San Francisco Public Library (Grove St at Hyde...
1213935,2018-09-30 10:30:29,2018-09-30 10:46:31,205.0,205.0,Miles Ave at Cavour St,Miles Ave at Cavour St
1586692,2018-10-02 16:57:56,2018-10-02 17:06:58,88.0,67.0,11th St at Bryant St,San Francisco Caltrain Station 2 (Townsend St...
1213933,2018-09-30 10:38:24,2018-09-30 10:46:45,44.0,321.0,Civic Center/UN Plaza BART Station (Market St ...,5th St at Folsom
768116,2018-06-08 17:20:04,2018-06-08 17:36:53,167.0,249.0,College Ave at Harwood Ave,Russell St at College Ave
...,...,...,...,...,...,...
669863,2018-06-24 12:32:13,2018-06-24 12:50:10,6.0,284.0,The Embarcadero at Sansome St,Yerba Buena Center for the Arts (Howard St at ...
669862,2018-06-24 12:31:56,2018-06-24 12:50:15,10.0,74.0,Washington St at Kearny St,Laguna St at Hayes St
669861,2018-06-24 12:45:17,2018-06-24 12:50:27,205.0,172.0,Shafter Ave at Cavour St,College Ave at Taft Ave
676376,2018-06-22 21:36:46,2018-06-22 21:43:08,214.0,180.0,Market St at Brockhurst St,Telegraph Ave at 23rd St


# Hotspots

In [73]:
# Entfernen von unnötigen Spalten
Bike_start = Bike_data.groupby('start_station_id').describe()
Bike_start = Bike_start.drop( [('end_station_id','std'),('end_station_id','mean'),('end_station_id','min'),('end_station_id','25%'),('end_station_id','50%'),('end_station_id','75%'),('end_station_id','max'),'bike_id'], axis=1)
Bike_start = Bike_start.sort_values(by = ('end_station_id','count') , ascending = False)
Bike_start_Top = Bike_start[Bike_start["end_station_id","count"] > 33500]
Bike_start_Top

Unnamed: 0_level_0,end_station_id
Unnamed: 0_level_1,count
start_station_id,Unnamed: 1_level_2
15.0,38461.0
67.0,37617.0
30.0,35092.0
58.0,34918.0
81.0,33716.0


In [74]:
Bike_start_Bot = Bike_start[Bike_start["end_station_id","count"] < 5000]
Bike_start_Bot

Unnamed: 0_level_0,end_station_id
Unnamed: 0_level_1,count
start_station_id,Unnamed: 1_level_2
101.0,4982.0
107.0,4939.0
280.0,4935.0
345.0,4852.0
267.0,4845.0
...,...
301.0,93.0
359.0,91.0
358.0,52.0
374.0,25.0
