In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta, date, time
import pickle

import folium
from folium import plugins
from folium.plugins import HeatMap

from haversine import haversine

In [2]:
#run in ANACONDA PROMT
#conda install -c conda-forge folium 
#conda install -c conda-forge haversine 

In [3]:
df_LA = pd.read_csv('../data/LA.csv') 
df_LA_geo = pd.read_csv('../data/LA_geo.csv') 

In [4]:
df_LA_geo.head(5)

Unnamed: 0,start_time,end_time,start_station_id,end_station_id,bike_id,user_type,start_station_name,end_station_name,trip_duration,trip_duration_in_hours,start_lat,start_lon,end_lat,end_lon
0,2018-01-01 00:04:00,2018-01-01 00:25:00,3063,3018,5889,Walk-up,Pershing Square,Grand & Olympic,0 days 00:21:00,0.35,34.048326,-118.253513,34.043732,-118.260139
1,2018-01-01 00:05:00,2018-01-01 00:25:00,3063,3018,6311,Walk-up,Pershing Square,Grand & Olympic,0 days 00:20:00,0.333333,34.048326,-118.253513,34.043732,-118.260139
2,2018-01-01 00:06:00,2018-01-01 00:25:00,3063,3018,5753,Walk-up,Pershing Square,Grand & Olympic,0 days 00:19:00,0.316667,34.048326,-118.253513,34.043732,-118.260139
3,2018-01-01 00:13:00,2018-01-01 00:35:00,3018,3031,6220,Monthly Pass,Grand & Olympic,7th & Spring,0 days 00:22:00,0.366667,34.043732,-118.260139,34.044701,-118.252441
4,2018-01-01 00:14:00,2018-01-01 00:59:00,4204,4216,12436,Monthly Pass,Washington & Abbot Kinney,17th St / SMC E Line Station,0 days 00:45:00,0.75,33.988419,-118.45163,34.023392,-118.479637


**Bike Stations**

In [5]:
# define new map function
def bikestation_map(location, df_stations):
    bikestation_map = folium.Map(
        location=location, 
        tiles='OpenStreetMap', 
        zoom_start=11, 
        control_scale=True, 
        max_zoom=20)
    # add station point map
    for station in df_stations["coord"]:
        folium.CircleMarker(
            radius=5,
            location=station,
            popup='The Waterfront', 
            color='crimson', 
            alpha=0.2,
            fill_color='crimson'
        ).add_to(bikestation_map)
    return bikestation_map

In [6]:
#define LA center
center = np.array(['34.053691', '-118.242766'])

#prepeare dataframes
LA_stations = df_LA_geo[['start_station_name', 'start_lat', 'start_lon']]
LA_stations = LA_stations.drop_duplicates().reset_index(drop=True)
LA_stations
print(len(LA_stations), "stations in LA.")

#creating lists with station names and coordinats
namelist = []
coordlist = []
#iterating over the stations and connect lat & lon
for index, row in LA_stations.iterrows():
    #check nan values
    if not (pd.isna(row['start_lat']) | pd.isna(row['start_lon'])):
        #print(row['start_station_name'])
        #print([row['start_lat'],row['start_lon']])
        namelist.append(row['start_station_name'])
        coordlist.append([row['start_lat'],row['start_lon']])    
df_LA_stations = pd.DataFrame(data={'station_name':namelist, 'coord':coordlist})
#df_LA_stations

bikestation_map(center, df_LA_stations)

132 stations in LA.


**Add station ids to "df_LA_stations"**

In [32]:
#get necessary data
df_station_id = df_LA_geo[['start_station_name', 'start_station_id']].drop_duplicates()
#join id to df but prevent "endless merge add" (trough repeated run of notebook)
df_merged = pd.DataFrame()
df_merged = df_LA_stations.merge(df_station_id, how='left', left_on='station_name', right_on='start_station_name').rename(columns={'start_station_id':'id'})
df_merged = df_merged[['station_name', 'coord', 'id']]
df_LA_stations = df_merged.loc[:,~df_merged.columns.duplicated()]
#df_LA_stations

**Heatmap of frequently used stations**

In [33]:
#get the necessary data
df_LA_stations
#count the appereance of the stations
df_start_station = pd.DataFrame(df_LA_geo.groupby('start_station_id')['start_station_name'].count())
df_end_station=pd.DataFrame(df_LA_geo.groupby('end_station_id')['end_station_id'].count())

#sum the counted values
df_joined = df_start_station.join(df_end_station, lsuffix='start_station_id', rsuffix='end_station_id')
df_joined = df_joined.assign(total_count=df_joined['start_station_name']+df_joined['end_station_id'])
df_joined = df_joined.sort_values(by='total_count', ascending = False).head(5)
df_joined 

Unnamed: 0_level_0,start_station_name,end_station_id,total_count
start_station_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
4214,16293,13373,29666
3005,12177,13252,25429
4215,8525,15669,24194
4210,11391,9908,21299
3014,9084,9787,18871


In [34]:
def heat_map(location, df, random_bike):
    # define new map
    heat_map = folium.Map(
        location=location, 
        tiles='OpenStreetMap', 
        zoom_start=11, 
        control_scale=True, 
        max_zoom=20)
    # add heat map
    heat_map.add_child(
        plugins.HeatMap(df_LA_stations[df_LA_stations['station_name']==df_LA_stations['station_name']]["coord"], radius=20))
    # heat map for destination points looks pretty much identical
    return heat_map

In [35]:
heat_map(center, df_LA_geo, df_LA_geo["start_station_id"].unique())

**Get Location of top 5 stations on the Map**

In [59]:
# define new map function
def getLocation(station_id):
    getLocation = folium.Map(
        location=np.array(['34.053691', '-118.242766']), 
        tiles='OpenStreetMap', 
        zoom_start=11, 
        control_scale=True, 
        max_zoom=20)
    # add station point map
    for station in station_id:
        geo = df_LA_stations.coord[df_LA_stations['id'] == station].values[0]
        folium.CircleMarker(
            radius=5,
            location=geo,
            popup='The Waterfront', 
            color='crimson', 
            alpha=0.2,
            fill_color='crimson'
        ).add_to(getLocation)
    return getLocation

In [63]:
#top 5 stations
getLocation([4214, 3005, 4215, 4210, 3014])

**Trip Movement**

In [12]:
# pick random bike and then list all origin and destination points
bike_select_bonn = df_bonn[df_bonn["b_number"]==random_bike_bonn]
origins_bonn = list(bike_select_bonn["orig"])
destinations_bonn = list(bike_select_bonn["dest"])

# pick random bike and then list all origin and destination points
bike_select_potsdam = df_potsdam[df_potsdam["b_number"]==random_bike_potsdam]
origins_potsdam = list(bike_select_potsdam["orig"])
destinations_potsdam = list(bike_select_potsdam["dest"])

NameError: name 'df_bonn' is not defined

In [None]:
def trips_map(location, origins_list, destinations_list): 
    # define new map
    trips_map = folium.Map(
        location=location,  
        tiles='OpenStreetMap', 
        zoom_start=11, 
        control_scale=True, 
        max_zoom=20,
        height=1000)
    # draw trips for selected bike
    for orig, dest in zip(origins_list, destinations_list):
        # Origin point
        folium.CircleMarker(
            radius=5,
            location=orig,
            popup='The Waterfront', 
            color='crimson', 
            alpha=0.2,
            fill_color='crimson').add_to(trips_map)
        # Destination point
        folium.CircleMarker(
            radius=5,
            location=dest,
            popup='The Waterfront', 
            color='blue', 
            alpha=0.2,
            fill_color='blue').add_to(trips_map)
        # Trip movement
        folium.PolyLine(
            (orig, dest),
            color="gray", 
            weight=1, 
            opacity=1).add_to(trips_map)
    return trips_map

In [None]:
trips_map(bonn, origins_bonn, destinations_bonn)