### import lib

In [4]:
import pandas as pd
import geopy.distance
from geopy import Point
import seaborn as sns
import numpy as np
from ipywidgets import interact  



### import data and add col 'start_point' and 'end_point' for calculate distance in mile

In [5]:
df= pd.read_csv('JC-202209-citibike-tripdata.csv')
df=df.head(1000)
df.dropna(inplace=True)

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 999 entries, 0 to 999
Data columns (total 13 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   ride_id             999 non-null    object 
 1   rideable_type       999 non-null    object 
 2   started_at          999 non-null    object 
 3   ended_at            999 non-null    object 
 4   start_station_name  999 non-null    object 
 5   start_station_id    999 non-null    object 
 6   end_station_name    999 non-null    object 
 7   end_station_id      999 non-null    object 
 8   start_lat           999 non-null    float64
 9   start_lng           999 non-null    float64
 10  end_lat             999 non-null    float64
 11  end_lng             999 non-null    float64
 12  member_casual       999 non-null    object 
dtypes: float64(4), object(9)
memory usage: 109.3+ KB


In [7]:
df['start_point']=df.apply(lambda row: Point(latitude=row['start_lat'], longitude=row['start_lng']), axis=1)
df['end_point']=df.apply(lambda row: Point(latitude=row['end_lat'], longitude=row['end_lng']), axis=1)

In [8]:
df['distance_mi']=df.apply(lambda row: geopy.distance.distance(row['start_point'], row['end_point']).mi, axis=1)

### distance by rideable_type and casual

In [9]:
df.groupby(by='rideable_type').distance_mi.mean()

rideable_type
classic_bike     0.773183
docked_bike      0.951317
electric_bike    0.867704
Name: distance_mi, dtype: float64

In [10]:
df.groupby(by=['member_casual','rideable_type']).distance_mi.mean()

member_casual  rideable_type
casual         classic_bike     0.898837
               docked_bike      0.951317
               electric_bike    0.957447
member         classic_bike     0.696861
               electric_bike    0.789249
Name: distance_mi, dtype: float64

### popular route from start_station to end_station

In [11]:
df[['ride_id','start_station_name','end_station_name']].groupby(by=['start_station_name','end_station_name'])\
.count()\
.sort_values(['ride_id'],ascending=False).head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,ride_id
start_station_name,end_station_name,Unnamed: 2_level_1
McGinley Square,Bergen Ave & Sip Ave,58
Baldwin at Montgomery,Bergen Ave & Sip Ave,44
Baldwin at Montgomery,Grove St PATH,43
Hoboken Terminal - River St & Hudson Pl,6 St & Grand St,40
Bergen Ave,Bergen Ave & Sip Ave,39
Astor Place,Bergen Ave & Sip Ave,34
Fairmount Ave,Bergen Ave & Sip Ave,33
Jersey & 3rd,Grove St PATH,28
Bergen Ave & Sip Ave,Fairmount Ave,23
City Hall - Washington St & 1 St,6 St & Grand St,18


### number of ride 24hr distribution by start and end point

In [12]:
start=df.start_station_name.unique()
end=df.start_station_name.unique()
@interact(start=start,end=end)
def start_end(start,end):
    ts=df[['started_at','ended_at','ride_id','member_casual']][(df['start_station_name']==start)& (df['end_station_name']==end)].groupby(by=['started_at','ended_at','member_casual']).count().sort_values('started_at').reset_index()
    ts['started_at']=pd.to_datetime(ts['started_at'])
    ts['ended_at']=pd.to_datetime(ts['ended_at'])
    ts1=ts[['started_at','ended_at','member_casual','ride_id']].groupby(by=[ts['started_at'].dt.hour,ts['ended_at'].dt.hour,'member_casual'])['ride_id'].sum().reset_index()
    sns.set(rc={'figure.figsize':(8,8)})
    sns.set_theme(style="white", palette=None)
    return sns.scatterplot(data=ts1,x='ended_at',y='ride_id',hue='member_casual',size='ride_id')

interactive(children=(Dropdown(description='start', options=('Jersey & 3rd', 'Leonard Gordon Park', 'Jackson S…

### plot on heat map - number of ride

In [13]:
import folium
from folium import plugins
from folium.plugins import HeatMap

In [14]:
def simple_folium(df:pd.DataFrame, lat_col:str, lon_col:str, text_cols:list, map_name:str):
    """
    Descrption
    ----------
        Returns a simple Folium HeatMap with Markers
    ----------
    Parameters
    ----------
        df : padnas DataFrame, required
            The DataFrane with the data to map
        lat_col : str, required
            The name of the column with latitude
        lon_col : str, required
            The name of the column with longitude
        test_cols: list, optional
            A list with the names of the columns to print for each marker

    """
    #Preprocess
    #Drop rows that do not have lat/lon
    df = df[df[lat_col].notnull() & df[lon_col].notnull()]

    # Convert lat/lon to (n, 2) nd-array format for heatmap
    # Then send to list
    df_locs = list(df[[lat_col, lon_col]].values)

    # Add the location name to the markers
    text_cols = ["start_station_name","start_station_id"]
    text_feature_list = list(zip(*[df[col] for col in text_cols]))
    text_formated = []
    for text in text_feature_list:
        text = [str(feat) for feat in text]
        text_formated.append("<br>".join(text))
    marker_info = text_formated

    #Set up folium map
    fol_map = folium.Map([40.723332, -74.045953], zoom_start=15)
    # plot heatmap
    heat_map = plugins.HeatMap(df_locs, name=map_name)
    fol_map.add_child(heat_map)

    # plot markers
    markers = plugins.MarkerCluster(locations = df_locs, popups = marker_info, name="Testing Site")
    fol_map.add_child(markers)

    #Add Layer Control
    folium.LayerControl().add_to(fol_map)

    return fol_map

simple_folium(df, "start_lat", "start_lng", ["start_station_name","start_station_id"], "COVID Testing Sites")
