In [1]:
import plotly.express as px
from utz import *

from geopy.distance import distance

### Load August 2022 data

In [2]:
month = '202208'
path = f'static/assets/{month}.parquet'
if not exists(path):
    path = f's3://ctbk/normalized/{month}.parquet'
d = read_parquet(path)
d

Unnamed: 0,Ride ID,Rideable Type,Start Time,Stop Time,Start Station Name,Start Station ID,End Station Name,End Station ID,Start Station Latitude,Start Station Longitude,End Station Latitude,End Station Longitude,Gender,User Type,Start Region,End Region
0,428051654CF1145B,classic_bike,2022-08-28 00:34:58,2022-08-28 00:55:20,Walker St & Baxter St,5351.07,Lexington Ave & E 24 St,6089.07,40.717411,-73.999573,40.740259,-73.984092,0,Subscriber,NYC,NYC
1,7282036E3ECDDD65,classic_bike,2022-08-31 16:33:38,2022-08-31 16:35:17,Lexington Ave & E 29 St,6164.09,Lexington Ave & E 24 St,6089.07,40.743116,-73.982154,40.740259,-73.984092,0,Subscriber,NYC,NYC
2,375C59BE1271E31C,electric_bike,2022-08-27 23:22:10,2022-08-27 23:34:57,E 132 St & Madison Ave,7748.02,Melrose Ave & E 150 St,7879.01,40.810224,-73.937195,40.816827,-73.917338,0,Subscriber,NYC,NYC
3,7281A7F21CE571AF,classic_bike,2022-08-26 19:01:01,2022-08-26 19:09:26,Lexington Ave & E 29 St,6164.09,E 13 St & 2 Ave,5820.08,40.743116,-73.982154,40.731539,-73.985302,0,Subscriber,NYC,NYC
4,A5093D7D77713F87,classic_bike,2022-08-25 22:16:16,2022-08-25 22:29:13,E 16 St & Irving Pl,5938.11,W 31 St & 7 Ave,6331.01,40.735367,-73.987974,40.749156,-73.991600,0,Subscriber,NYC,NYC
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
115226,D0F075EEE4354A37,electric_bike,2022-08-03 19:00:22,2022-08-03 19:07:13,City Hall - Washington St & 1 St,HB105,11 St & Washington St,HB502,40.737360,-74.030970,40.749985,-74.027150,0,Subscriber,HB,HB
115227,15DE9EFD3E032E69,classic_bike,2022-08-31 18:42:27,2022-08-31 18:47:01,City Hall - Washington St & 1 St,HB105,8 St & Washington St,HB603,40.737360,-74.030970,40.745984,-74.028199,0,Subscriber,HB,HB
115228,899BAFD096248F46,classic_bike,2022-08-24 14:26:28,2022-08-24 14:30:47,City Hall - Washington St & 1 St,HB105,8 St & Washington St,HB603,40.737360,-74.030970,40.745984,-74.028199,0,Subscriber,HB,HB
115229,AB474610295F3330,classic_bike,2022-08-24 18:50:31,2022-08-24 19:01:00,Riverview Park,JC057,11 St & Washington St,HB502,40.744757,-74.043725,40.749985,-74.027150,0,Subscriber,JC,HB


### Parse Station Names, IDs, Lat/Lngs

In [5]:
columns = {
    'Start Station ID': 'Station ID', 
    'Start Station Name': 'Station Name',
    'Start Station Latitude': 'Latitude',
    'Start Station Longitude': 'Longitude',
}
starts = (
    d[columns.keys()]
    .rename(columns=columns)
)
starts['Start'] = True
columns ={
    'End Station ID': 'Station ID', 
    'End Station Name': 'Station Name',
    'End Station Latitude': 'Latitude',
    'End Station Longitude': 'Longitude',
}
ends = (
    d[columns.keys()]
    .rename(columns=columns)
)
ends['Start'] = False

station_entries = pd.concat([starts, ends])
station_entries

Unnamed: 0,Station ID,Station Name,Latitude,Longitude,Start
0,5351.07,Walker St & Baxter St,40.717411,-73.999573,True
1,6164.09,Lexington Ave & E 29 St,40.743116,-73.982154,True
2,7748.02,E 132 St & Madison Ave,40.810224,-73.937195,True
3,6164.09,Lexington Ave & E 29 St,40.743116,-73.982154,True
4,5938.11,E 16 St & Irving Pl,40.735367,-73.987974,True
...,...,...,...,...,...
115226,HB502,11 St & Washington St,40.749985,-74.027150,False
115227,HB603,8 St & Washington St,40.745984,-74.028199,False
115228,HB603,8 St & Washington St,40.745984,-74.028199,False
115229,HB502,11 St & Washington St,40.749985,-74.027150,False


In [6]:
idxd = station_entries.set_index('Station ID')
names = idxd['Station Name']
lats = idxd['Latitude']
lngs = idxd['Longitude']

In [94]:
station_names_hist = (
    (
        station_entries
        [['Station ID', 'Station Name']]
        .groupby(['Station ID', 'Station Name'])
        .size()
        .rename('count')
    )
    .reset_index()
    .sort_values(['Station ID', 'count'], ascending=False)
)
station_names_hist

Unnamed: 0,Station ID,Station Name,count
1671,JC109,Bergen Ave & Sip Ave,4360
1670,JC108,Bergen Ave & Stegman St,327
1669,JC107,Grant Ave & MLK Dr,149
1668,JC106,Columbus Dr at Exchange Pl,4457
1667,JC105,Hoboken Ave at Monmouth St,4888
...,...,...,...
4,2883.03,3 Ave & Wakeman Pl,525
3,2872.02,63 St & 5 Ave,178
2,2832.03,4 Ave & Shore Road Dr,934
1,2782.02,5 Ave & 66 St,921


In [101]:
station_counts = station_names_hist.groupby('Station ID').apply(lambda vs: vs['count'].values).rename('counts')
station_counts[station_counts.apply(len) > 1]

Station ID
4488.09      [6776, 31]
4781.05      [2975, 25]
5329.08      [3899, 36]
5382.07    [18437, 211]
6535.04    [11898, 226]
6708.04    [16538, 511]
Name: counts, dtype: object

In [15]:
station_names = names.groupby(lambda x:x).unique()
station_name_hist = station_names.apply(len).sort_values()
name_dupes = station_name_hist[station_name_hist > 1]
name_dupes

Station ID
6708.04    2
5329.08    2
5382.07    2
4488.09    2
6535.04    2
4781.05    2
Name: Station Name, dtype: int64

In [91]:
station_name_hist

Station ID
2733.03    1
7414.17    1
7409.08    1
7407.13    1
7393.09    1
          ..
5329.08    2
5382.07    2
4488.09    2
6535.04    2
4781.05    2
Name: Station Name, Length: 1666, dtype: int64

In [38]:
len(station_name_hist)

1666

In [17]:
dupe_names = station_names.loc[name_dupes.index]
dupe_names.reset_index()

Unnamed: 0,Station ID,Station Name
0,6708.04,"[Broadway\t& W 48 St, Broadway\t& W 48 St]"
1,5329.08,"[Murray St\t& West St, Murray St\t& West St]"
2,5382.07,"[Forsyth St\t& Grand St, Forsyth St\t& Grand St]"
3,4488.09,"[Boerum Pl\t& Pacific St, Boerum Pl\t& Pacific..."
4,6535.04,"[W 34 St &\tHudson Blvd E, W 34 St &\tHudson B..."
5,4781.05,"[Nassau St\t& Duffield St, Nassau St\t& Duffie..."


In [39]:
normalized_names = names.to_frame().reset_index().sort_values(['Station ID', 'Station Name']).drop_duplicates()
normalized_names

Unnamed: 0,Station ID,Station Name
266718,2733.03,67 St & Erik Pl
385403,2782.02,5 Ave & 66 St
382419,2832.03,4 Ave & Shore Road Dr
3198,2872.02,63 St & 5 Ave
375285,2883.03,3 Ave & Wakeman Pl
...,...,...
3693084,JC105,Hoboken Ave at Monmouth St
3678168,JC106,Columbus Dr at Exchange Pl
3724393,JC107,Grant Ave & MLK Dr
3717657,JC108,Bergen Ave & Stegman St


In [43]:
lat_groups = lats.groupby(lambda x:x)
lng_groups = lngs.groupby(lambda x:x)
bounds = sxs(
    lat_groups.min().rename('lat_min'),
    lat_groups.max().rename('lat_max'),
    lng_groups.min().rename('lng_min'),
    lng_groups.max().rename('lng_max')
)
bounds['lat_span'] = bounds.lat_max - bounds.lat_min
bounds['lng_span'] = bounds.lng_max - bounds.lng_min
bounds

Unnamed: 0_level_0,lat_min,lat_max,lng_min,lng_max,lat_span,lng_span
Station ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2733.03,40.633025,40.633505,-74.017024,-74.016480,0.000481,0.000545
2782.02,40.635679,40.636003,-74.020005,-74.019736,0.000324,0.000269
2832.03,40.636850,40.637096,-74.022304,-74.021483,0.000246,0.000820
2872.02,40.637553,40.637744,-74.017844,-74.017689,0.000191,0.000155
2883.03,40.638202,40.638466,-74.024832,-74.024564,0.000264,0.000268
...,...,...,...,...,...,...
JC105,40.735188,40.735444,-74.047022,-74.046410,0.000256,0.000612
JC106,40.716561,40.717522,-74.033181,-74.032173,0.000961,0.001008
JC107,40.709047,40.709213,-74.080888,-74.080616,0.000165,0.000272
JC108,40.706575,40.706798,-74.086814,-74.086634,0.000223,0.000180


In [44]:
bounds.sort_values('lat_span').iloc[-10:]

Unnamed: 0_level_0,lat_min,lat_max,lng_min,lng_max,lat_span,lng_span
Station ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
5342.1,40.641224,40.74051,-73.992344,-73.98097,0.099286,0.011375
6948.1,40.717022,40.817503,-74.0061,-73.922721,0.100481,0.08338
5159.07,40.709261,40.809962,-73.991869,-73.936244,0.100701,0.055625
6364.1,40.74801,40.853189,-73.989603,-73.905468,0.105179,0.084134
6560.01,40.726019,40.834426,-73.995292,-73.917783,0.108408,0.07751
5610.09,40.723341,40.833303,-74.004962,-73.939326,0.109962,0.065636
5561.06,40.716918,40.833303,-73.996812,-73.939326,0.116385,0.057486
7981.16,40.698082,40.82495,-73.964292,-73.909528,0.126868,0.054765
5065.14,40.706244,40.833303,-74.005782,-73.939326,0.127059,0.066456
4452.01,40.688802,40.833303,-73.988918,-73.939326,0.144501,0.049592


In [45]:
bounds.sort_values('lng_span').iloc[-10:]

Unnamed: 0_level_0,lat_min,lat_max,lng_min,lng_max,lat_span,lng_span
Station ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
7432.09,40.787878,40.869428,-73.970889,-73.891423,0.08155,0.079467
5847.01,40.703758,40.78011,-74.007358,-73.927344,0.076352,0.080014
7188.13,40.732979,40.793375,-74.006081,-73.924628,0.060396,0.081453
6948.1,40.717022,40.817503,-74.0061,-73.922721,0.100481,0.08338
6364.1,40.74801,40.853189,-73.989603,-73.905468,0.105179,0.084134
6140.05,40.677729,40.766682,-74.000285,-73.9159,0.088954,0.084384
6224.05,40.69738,40.743998,-74.000856,-73.9159,0.046618,0.084956
6072.11,40.69738,40.741196,-74.002438,-73.9159,0.043815,0.086537
5267.09,40.654847,40.717252,-74.014316,-73.920753,0.062406,0.093563
6072.06,40.735448,40.813394,-74.006012,-73.909528,0.077946,0.096485


In [110]:
def row_sketch(a):
    restsum = sum(a[1:])
    total = a[0] + restsum
    num = len(a)
    return {
        'mode_count': a[0],
        'second': a[1] if num > 1 else nan,
        'restsum': restsum,
        'total': total,
        'counts': a,
        'first/second': a[0] / a[1] if num > 1 else nan,
        'mode_pct': a[0] / total,
        'num': num,
    }

def mode_sketch(df, groupby, thresh=0.5):
    idx_name = df.index.name
    if not idx_name:
        raise RuntimeError('Index needs a name')
    if isinstance(groupby, str):
        groupby = [groupby]
    row_groups = df.reset_index().groupby([idx_name] + groupby)
    row_hist = row_groups.size().rename('count').reset_index()
    counts = row_hist.groupby(idx_name)['count'].apply(lambda s: list(reversed(sorted(s.values))))
    row_sketches = counts.apply(row_sketch).apply(Series)
    below_thresh = row_sketches[row_sketches.mode_pct < thresh]
    if not below_thresh.empty:
        stderr.write(f'{len(below_thresh)} index entries with mode_pct < {thresh}:\n{below_thresh}\n')
    annotated = (
        row_hist
        .sort_values([idx_name, 'count'], ascending=False)
        .drop_duplicates(subset=idx_name)
        .set_index(idx_name)
    )
    annotated = sxs(annotated, row_sketches).drop(columns=['count']).sort_values('mode_pct')
    return annotated

In [107]:
annotated_station_names = mode_sketch(names.to_frame(), 'Station Name')
annotated_station_names

Unnamed: 0_level_0,Station Name,mode_count,second,restsum,total,counts,first/second,mode_pct,num
Station ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
6708.04,Broadway\t& W 48 St,16538,511.0,511,17049,"[16538, 511]",32.363992,0.970028,2
6535.04,W 34 St &\tHudson Blvd E,11898,226.0,226,12124,"[11898, 226]",52.646018,0.981359,2
5382.07,Forsyth St\t& Grand St,18437,211.0,211,18648,"[18437, 211]",87.379147,0.988685,2
5329.08,Murray St\t& West St,3899,36.0,36,3935,"[3899, 36]",108.305556,0.990851,2
4781.05,Nassau St\t& Duffield St,2975,25.0,25,3000,"[2975, 25]",119.000000,0.991667,2
...,...,...,...,...,...,...,...,...,...
7409.08,W 84 St & Broadway,7321,,0,7321,[7321],,1.000000,1
7414.17,3 Ave & E 100 St,2460,,0,2460,[2460],,1.000000,1
7432.04,W 90 St & Amsterdam Ave,4956,,0,4956,[4956],,1.000000,1
6450.12,8 Ave & W 33 St,18921,,0,18921,[18921],,1.000000,1


In [104]:
annotated_stations = mode_sketch(sxs(lats, lngs), ['Latitude', 'Longitude',])
annotated_stations

Unnamed: 0_level_0,Latitude,Longitude,mode_count,second,restsum,total,counts,first/second,mode_pct,num
Station ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
8356.04,40.848793,-73.903178,1773,2.0,1014,2787,"[1773, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",886.500000,0.636168,1014
8156.03,40.836179,-73.907301,3556,3.0,1972,5528,"[3556, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, ...",1185.333333,0.643271,1954
8160.03,40.835334,-73.915400,1109,2.0,605,1714,"[1109, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, ...",554.500000,0.647025,600
8156.01,40.837213,-73.908345,1316,2.0,697,2013,"[1316, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",658.000000,0.653751,695
6134.13,40.741120,-73.921037,2,1.0,1,3,"[2, 1]",2.000000,0.666667,2
...,...,...,...,...,...,...,...,...,...,...
7271.01,40.779770,-73.923162,2862,1.0,136,2998,"[2862, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",2862.000000,0.954636,137
5779.11,40.730311,-73.980472,16704,24.0,776,17480,"[16704, 24, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1...",696.000000,0.955606,753
6599.01,40.757284,-73.953600,5468,1.0,252,5720,"[5468, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",5468.000000,0.955944,253
5779.09,40.729667,-73.980680,8425,21.0,376,8801,"[8425, 21, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...",401.190476,0.957278,357


In [109]:
stations = sxs(
    annotated_station_names['Station Name'],
    annotated_stations[['Latitude', 'Longitude']],
)
stations

Unnamed: 0_level_0,Station Name,Latitude,Longitude
Station ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
6708.04,Broadway\t& W 48 St,40.760177,-73.984868
6535.04,W 34 St &\tHudson Blvd E,40.755167,-74.000599
5382.07,Forsyth St\t& Grand St,40.717798,-73.993161
5329.08,Murray St\t& West St,40.715030,-74.012460
4781.05,Nassau St\t& Duffield St,40.698477,-73.983840
...,...,...,...
7409.08,W 84 St & Broadway,40.786795,-73.977112
7414.17,3 Ave & E 100 St,40.787721,-73.947283
7432.04,W 90 St & Amsterdam Ave,40.790179,-73.972889
6450.12,8 Ave & W 33 St,40.751551,-73.993934


### Create unifed "dockings" with melted start/end

In [111]:
starts = d[['Start Station ID']].copy()
start_times = d['Start Time']
ends = d[['End Station ID']].copy()
end_times = d['Stop Time']

starts['Day'] = start_times.dt.day
starts['Hour'] = start_times.dt.hour
starts['Weekday'] = start_times.dt.weekday
ends['Day'] = end_times.dt.day
ends['Hour'] = end_times.dt.hour
ends['Weekday'] = end_times.dt.weekday

starts = starts.rename(columns={'Start Station ID': 'Station ID'})
starts['Start'] = True
ends = ends.rename(columns={'End Station ID': 'Station ID'})
ends['Start'] = False
dockings = pd.concat([starts, ends])
dockings

Unnamed: 0,Station ID,Day,Hour,Weekday,Start
0,5351.07,28,0,6,True
1,6164.09,31,16,2,True
2,7748.02,27,23,5,True
3,6164.09,26,19,4,True
4,5938.11,25,22,3,True
...,...,...,...,...,...
115226,HB502,3,19,2,False
115227,HB603,31,18,2,False
115228,HB603,24,14,2,False
115229,HB502,24,19,2,False


#### JC overall start/end counts

In [112]:
jcs = dockings[dockings['Station ID'].str.match('(?:JC|HB)', na=False)]
jcs.Start.value_counts()

False    114840
True     114839
Name: Start, dtype: int64

In [113]:
jc_starts = jcs[jcs.Start].groupby('Station ID').size().rename('Count')
jc_llcs = jc_starts.to_frame().merge(stations, left_index=True, right_index=True)
jc_llcs

Unnamed: 0_level_0,Count,Station Name,Latitude,Longitude
Station ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
HB101,3505,Hoboken Terminal - Hudson St & Hudson Pl,40.735938,-74.030305
HB102,4545,Hoboken Terminal - River St & Hudson Pl,40.736068,-74.029127
HB103,4567,South Waterfront Walkway - Sinatra Dr & 1 St,40.736982,-74.027781
HB105,2911,City Hall - Washington St & 1 St,40.73736,-74.03097
HB201,2437,12 St & Sinatra Dr N,40.750604,-74.02402
HB202,2317,14 St Ferry - 14 St & Shipyard Ln,40.752961,-74.024353
HB203,1355,Bloomfield St & 15 St,40.75453,-74.02658
HB301,1367,4 St & Grand St,40.742258,-74.035111
HB302,1040,6 St & Grand St,40.744398,-74.034501
HB303,1276,Clinton St & 7 St,40.74542,-74.03332


#### Build date range (incl. weekday vs. weekends)

In [114]:
dates = pd.date_range('2022-08-01', '2022-08-31')
week_dts = dates[dates.weekday < 5]
wknd_dts = dates[dates.weekday >= 5]
len(week_dts), len(wknd_dts)

(23, 8)

### Station Counts helper

In [121]:
from typing import Collection

def station_counts(*keys, jc=None, weekdays=None, daily_avg=None):
    keys = list(keys)
    df = dockings
    dts = dates
    if weekdays is not None:
        if weekdays is True:
            df = df[df.Weekday < 5]
            dts = week_dts
        elif weekdays is False:
            df = df[df.Weekday >= 5]
            dts = wknd_dts
        elif isinstance(weekdays, Collection):
            weekdays = set(weekdays)
            df = df[df.Weekday.apply(lambda wd: wd in weekdays)]
            dts = dts[dts.weekday.apply(lambda wd: wd in weekdays)]
        else:
            raise ValueError(f'Unrecognized weekdays: {weekdays}')

    counts = df.groupby(['Station ID'] + keys).size().rename('Count')
    counts = counts.reset_index().merge(stations, left_on='Station ID', right_index=True).sort_values('Count')
    cols = ['Station Name'] + keys + ['Count']
    if daily_avg is None:
        if 'Day' in keys or 'Weekday' in keys:
            daily_avg = False
        else:
            daily_avg = True
    if daily_avg:
        if 'Day' in keys or 'Weekday' in keys:
            raise ValueError("Can only compute daily averages if daily grouping is not applied")
        counts['Avg'] = counts['Count'] / len(dts)
        cols += ['Avg']

    counts = counts.set_index('Station ID')[cols]

    if jc is True:
        counts = counts[counts.index.to_series().str.match('(?:JC|HB)')]
    elif js is False:
        counts = counts[~counts.index.to_series().str.match('(?:JC|HB)')]

    return counts

#### JC counts, by {day,station}

In [122]:
jcd = station_counts('Day', jc=True)
jcd

Unnamed: 0_level_0,Station Name,Day,Count
Station ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
JC082,Manila & 1st,17,1
JC107,Grant Ave & MLK Dr,7,1
JC094,Glenwood Ave,6,1
JC107,Grant Ave & MLK Dr,20,1
JC095,Bergen Ave,1,1
...,...,...,...
JC005,Grove St PATH,3,409
HB102,Hoboken Terminal - River St & Hudson Pl,5,410
JC005,Grove St PATH,17,413
HB103,South Waterfront Walkway - Sinatra Dr & 1 St,13,455


#### Biggest {station,day}s in JC

In [123]:
jcd[jcd.index.to_series().str.startswith('JC')].iloc[-50:]

Unnamed: 0_level_0,Station Name,Day,Count
Station ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
JC098,Washington St,17,216
JC052,Liberty Light Rail,13,216
JC008,Newport Pkwy,21,217
JC066,Newport PATH,7,219
JC008,Newport Pkwy,7,221
JC052,Liberty Light Rail,14,222
JC008,Newport Pkwy,6,222
JC008,Newport Pkwy,28,223
JC066,Newport PATH,6,224
JC106,Columbus Dr at Exchange Pl,6,225


### Monthly sum + Daily avg (all JC+HB)

In [124]:
sc_all = station_counts(jc=True)
sc_all

Unnamed: 0_level_0,Station Name,Count,Avg
Station ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
JC107,Grant Ave & MLK Dr,149,4.806452
JC108,Bergen Ave & Stegman St,327,10.548387
JC063,Jackson Square,411,13.258065
JC051,Union St,559,18.032258
JC065,Dey St,616,19.870968
JC094,Glenwood Ave,646,20.83871
JC018,5 Corners Library,718,23.16129
JC059,Heights Elevator,879,28.354839
JC084,Communipaw & Berry Lane,1031,33.258065
JC080,Leonard Gordon Park,1111,35.83871


### Monthly sum + Daily avg, weekdays only (all JC+HB)

In [125]:
sc_wkdy = station_counts(jc=True, weekdays=True)
sc_wkdy

Unnamed: 0_level_0,Station Name,Count,Avg
Station ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
JC107,Grant Ave & MLK Dr,112,4.869565
JC108,Bergen Ave & Stegman St,235,10.217391
JC063,Jackson Square,296,12.869565
JC051,Union St,391,17.0
JC094,Glenwood Ave,479,20.826087
JC065,Dey St,486,21.130435
JC018,5 Corners Library,493,21.434783
JC059,Heights Elevator,619,26.913043
JC084,Communipaw & Berry Lane,776,33.73913
JC080,Leonard Gordon Park,829,36.043478


### Monthly sum + Daily avg, weekends only (all JC+HB)

In [126]:
sc_wknd = station_counts(jc=True, weekdays=False)
sc_wknd

Unnamed: 0_level_0,Station Name,Count,Avg
Station ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
JC107,Grant Ave & MLK Dr,37,4.625
JC108,Bergen Ave & Stegman St,92,11.5
JC063,Jackson Square,115,14.375
JC065,Dey St,130,16.25
JC094,Glenwood Ave,167,20.875
JC051,Union St,168,21.0
JC018,5 Corners Library,225,28.125
JC095,Bergen Ave,253,31.625
JC084,Communipaw & Berry Lane,255,31.875
JC093,Fairmount Ave,256,32.0


### Busiest 5 stations

In [127]:
top_stations = sc_all.iloc[-5:]['Station Name']
top_stations
# top_stations = pd.concat([sc_all.iloc[-5:], sc_wkdy.iloc[-5:], sc_wknd.iloc[-5:]])
# top_stations.index.drop_duplicates()

Station ID
JC008                                    Newport Pkwy
HB101        Hoboken Terminal - Hudson St & Hudson Pl
HB102         Hoboken Terminal - River St & Hudson Pl
HB103    South Waterfront Walkway - Sinatra Dr & 1 St
JC005                                   Grove St PATH
Name: Station Name, dtype: object

### Daily dockings (start+end), top 5 JC+HB stations

In [138]:
ts = jcd[jcd.index.isin(top_stations.index)].copy()
ts['Date'] = ts.Day.apply(lambda d: to_dt('2022-08-%02d' % d).date()).astype('datetime64')
ts = ts.sort_values('Date')
ts['Weekday'] = 'Weekend'
ts.loc[ts['Date'].dt.weekday < 5, 'Weekday'] = 'Weekday'
ts

Unnamed: 0_level_0,Station Name,Day,Count,Date,Weekday
Station ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
HB103,South Waterfront Walkway - Sinatra Dr & 1 St,1,234,2022-08-01,Weekday
JC005,Grove St PATH,1,330,2022-08-01,Weekday
HB102,Hoboken Terminal - River St & Hudson Pl,1,243,2022-08-01,Weekday
HB101,Hoboken Terminal - Hudson St & Hudson Pl,1,192,2022-08-01,Weekday
JC008,Newport Pkwy,1,170,2022-08-01,Weekday
...,...,...,...,...,...
JC008,Newport Pkwy,31,161,2022-08-31,Weekday
HB102,Hoboken Terminal - River St & Hudson Pl,31,338,2022-08-31,Weekday
HB101,Hoboken Terminal - Hudson St & Hudson Pl,31,219,2022-08-31,Weekday
JC005,Grove St PATH,31,369,2022-08-31,Weekday


In [139]:
fig = px.line(ts, x='Date', y='Count', color='Station Name', labels={'Count': 'Daily ride starts+stops'})
fig.update_layout(legend=dict(
    yanchor="top",
    y=0.99,
    xanchor="left",
    x=0.01,
    bgcolor='rgba(0,0,0,0)' ,
))
fig

### Daily dockings (start+end), Grove St PATH

In [140]:
title = 'Grove St PATH Citibike Station, daily starts+ends'
fig = px.bar(
    ts[ts['Station Name'] == 'Grove St PATH'], 
    x='Date', y='Count', 
    color='Weekday', 
    labels={'Count': 'Daily ride starts+stops', 'Weekday': ''},
)
fig.update_layout(
    title={
        'text': title,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top',
    },
)
fig

In [134]:
fig = px.scatter_mapbox(
    jc_llcs,
    lat="Latitude", lon="Longitude",
    hover_name="Station Name",
    hover_data=["Station Name", "Count"],
    size='Count',
    #color_discrete_sequence=["fuchsia"], zoom=3, height=300
)
fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()