In [1]:
import math
import os
import pathlib
import sys

import pandas as pd

pd.options.mode.copy_on_write = True
pd.options.display.max_columns = None

from transit import vmgo

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [2]:
project_root = pathlib.Path('../..')
project_root.resolve()

PosixPath('/home/jnban/projects/roanoke-transit')

In [3]:
data_path = project_root / 'data'
data_path.resolve()

PosixPath('/home/jnban/projects/roanoke-transit/data')

In [4]:
output_path = project_root / 'web/va/roanoke/ridership'
output_path.mkdir(exist_ok=True, parents=True)
output_path.resolve()

PosixPath('/home/jnban/projects/roanoke-transit/web/va/roanoke/ridership')

In [5]:
plot_path = output_path / 'plots'
plot_path.mkdir(exist_ok=True)
plot_path.resolve()

PosixPath('/home/jnban/projects/roanoke-transit/web/va/roanoke/ridership/plots')

In [6]:
df_alighting = pd.read_csv(
    str(data_path / 'roanoke' / 'Alighting-May24-Apr25.csv'),
    header=[2],
    low_memory=False,
    skip_blank_lines=True,
)
df_alighting

Unnamed: 0,Stop Name,Sum of Passenger Alightings
0,10th at Staunton (no number),906.00
1,10th NB at Courtland (no number),229.00
2,10th NB at Greenhurst (no number),584.00
3,10th SB at Courtland (no number),158.00
4,10th SB at Greenhurst (no number),108.00
...,...,...
816,Wise EB at 18th (no number),301.00
817,Wise EB at Indian Village (no number),1812.00
818,Wise WB at 16th (no number),65.00
819,Wise WB at 18th (no number),217.00


In [7]:
df_boarding = pd.read_csv(
    str(data_path / 'roanoke' / 'Boarding-May24-Apr25.csv'),
    header=[2],
    low_memory=False,
    skip_blank_lines=True,
)
df_boarding

Unnamed: 0,Stop Name,Sum of Passenger Boardings
0,10th at Staunton (no number),443.00
1,10th NB at Courtland (no number),62.00
2,10th NB at Greenhurst (no number),102.00
3,10th SB at Courtland (no number),374.00
4,10th SB at Greenhurst (no number),619.00
...,...,...
816,Wise EB at 18th (no number),459.00
817,Wise EB at Indian Village (no number),220.00
818,Wise WB at 16th (no number),846.00
819,Wise WB at 18th (no number),240.00


In [8]:
df_ridership = (
    df_alighting.set_index('Stop Name')
    .join(
        df_boarding.set_index('Stop Name'),
        on='Stop Name',
        how='outer'
    )
    .reset_index()
)
df_ridership['Stop Name'] = df_ridership['Stop Name'].str.replace(' (no number)', '')
df_ridership

Unnamed: 0,Stop Name,Sum of Passenger Alightings,Sum of Passenger Boardings
0,10th NB at Courtland,229.00,62.00
1,10th NB at Greenhurst,584.00,102.00
2,10th SB at Courtland,158.00,374.00
3,10th SB at Greenhurst,108.00,619.00
4,10th SB at Hanover,49.00,64.00
...,...,...,...
816,Wise EB at 18th,301.00,459.00
817,Wise EB at Indian Village,1812.00,220.00
818,Wise WB at 16th,65.00,846.00
819,Wise WB at 18th,217.00,240.00


In [9]:
regions = vmgo.load_from_datadir(data_path / 'vmgo')
regions

{0: <transit.vmgo.Region at 0x7f72183a3230>}

In [10]:
# create dataframe rows
rows = []

for region in regions.values():
    for route in region.routes:
        for directions in route.directions:
            for stop in directions.stops:
                rows.append({
                    "region": region.name,
                    "route": route.name,
                    "direction": directions.name,
                    "stop": stop.name,
                    "lat": stop.lat,
                    "lon": stop.lon,
                })
df_routes = pd.DataFrame(rows)
df_routes

Unnamed: 0,region,route,direction,stop,lat,lon
0,No Region,11- 3rd St Station to Valley View Mall,11-Third Street Valley View Mall,3rd St Station Slip Q,37.272643,-79.946005
1,No Region,11- 3rd St Station to Valley View Mall,11-Third Street Valley View Mall,Salem Avenue WB at 5th,37.272404,-79.949678
2,No Region,11- 3rd St Station to Valley View Mall,11-Third Street Valley View Mall,5th St at Gilmer Ave,37.276955,-79.948074
3,No Region,11- 3rd St Station to Valley View Mall,11-Third Street Valley View Mall,5th St at Harrison,37.279345,-79.946998
4,No Region,11- 3rd St Station to Valley View Mall,11-Third Street Valley View Mall,McDowell at 5th,37.281332,-79.946445
...,...,...,...,...,...,...
996,No Region,Star Line Trolley,Star Line Trolley,Jefferson & Walnut,37.260543,-79.941436
997,No Region,Star Line Trolley,Star Line Trolley,Radford University Carilion,37.265781,-79.940977
998,No Region,Star Line Trolley,Star Line Trolley,Jefferson & Bullitt (Northbound),37.268357,-79.940632
999,No Region,Star Line Trolley,Star Line Trolley,Jefferson & Luck,37.269959,-79.940551


In [11]:
df = df_routes.groupby(['stop']).count()
df['route_count'] = df['direction']
df = df.reset_index()[['stop', 'route_count']]
df_stop_route_count = df
df_stop_route_count

Unnamed: 0,stop,route_count
0,10th NB at Courtland,1
1,10th NB at Greenhurst,1
2,10th SB at Courtland,1
3,10th SB at Greenhurst,1
4,10th SB at Hanover,1
...,...,...
807,Wise EB at 18th,1
808,Wise EB at Indian Village,1
809,Wise WB at 16th,1
810,Wise WB at 18th,1


In [12]:
df = df_routes.merge(
    df_ridership.rename(
        columns={
            "Stop Name": "stop",
            "Sum of Passenger Alightings": "alightings",
            "Sum of Passenger Boardings": "boardings",
        }
    ),
    on='stop',
    how='left',
).merge(
    df_stop_route_count,
    on='stop',
    how='left',
)
df['alightings'] = df['alightings'].str.replace(',', '').astype(float)
df['boardings'] = df['boardings'].str.replace(',', '').astype(float)
df = df.fillna(0)
df_route_ridership = df
df_route_ridership

Unnamed: 0,region,route,direction,stop,lat,lon,alightings,boardings,route_count
0,No Region,11- 3rd St Station to Valley View Mall,11-Third Street Valley View Mall,3rd St Station Slip Q,37.272643,-79.946005,33225.0,39040.0,2
1,No Region,11- 3rd St Station to Valley View Mall,11-Third Street Valley View Mall,Salem Avenue WB at 5th,37.272404,-79.949678,192.0,683.0,3
2,No Region,11- 3rd St Station to Valley View Mall,11-Third Street Valley View Mall,5th St at Gilmer Ave,37.276955,-79.948074,445.0,875.0,2
3,No Region,11- 3rd St Station to Valley View Mall,11-Third Street Valley View Mall,5th St at Harrison,37.279345,-79.946998,1470.0,744.0,1
4,No Region,11- 3rd St Station to Valley View Mall,11-Third Street Valley View Mall,McDowell at 5th,37.281332,-79.946445,829.0,628.0,2
...,...,...,...,...,...,...,...,...,...
996,No Region,Star Line Trolley,Star Line Trolley,Jefferson & Walnut,37.260543,-79.941436,0.0,0.0,4
997,No Region,Star Line Trolley,Star Line Trolley,Radford University Carilion,37.265781,-79.940977,0.0,0.0,2
998,No Region,Star Line Trolley,Star Line Trolley,Jefferson & Bullitt (Northbound),37.268357,-79.940632,0.0,0.0,2
999,No Region,Star Line Trolley,Star Line Trolley,Jefferson & Luck,37.269959,-79.940551,0.0,0.0,3


In [13]:
ridership_path = output_path
ridership_path.mkdir(exist_ok=True)

df_route_ridership.sort_values('boardings', ascending=False).to_csv(
    ridership_path / 'ridership-with-stops-sorted-boarding.csv')
df_route_ridership.sort_values('alightings', ascending=False).to_csv(
    ridership_path / 'ridership-with-stops-sorted-alighting.csv')

In [14]:
df_stop_lat_lons = df_route_ridership[['stop', 'lat', 'lon']].drop_duplicates()
df_stop_lat_lons

Unnamed: 0,stop,lat,lon
0,3rd St Station Slip Q,37.272643,-79.946005
1,Salem Avenue WB at 5th,37.272404,-79.949678
2,5th St at Gilmer Ave,37.276955,-79.948074
3,5th St at Harrison,37.279345,-79.946998
4,McDowell at 5th,37.281332,-79.946445
...,...,...,...
979,Jefferson & Walnut,37.260543,-79.941436
980,Radford University Carilion,37.265781,-79.940977
981,Jefferson & Bullitt (Northbound),37.268357,-79.940632
983,Church & Market,37.270458,-79.939109


In [15]:
df = df_stop_lat_lons.merge(
    df_ridership.rename(
        columns={
            "Stop Name": "stop",
            "Sum of Passenger Alightings": "alightings",
            "Sum of Passenger Boardings": "boardings",
        }
    ),
    on='stop',
    how='left',
)
df['alightings'] = df['alightings'].str.replace(',', '').astype(float)
df['boardings'] = df['boardings'].str.replace(',', '').astype(float)
#df = df.fillna(0)
df_stop_ridership = df
df_stop_ridership

Unnamed: 0,stop,lat,lon,alightings,boardings
0,3rd St Station Slip Q,37.272643,-79.946005,33225.0,39040.0
1,Salem Avenue WB at 5th,37.272404,-79.949678,192.0,683.0
2,5th St at Gilmer Ave,37.276955,-79.948074,445.0,875.0
3,5th St at Harrison,37.279345,-79.946998,1470.0,744.0
4,McDowell at 5th,37.281332,-79.946445,829.0,628.0
...,...,...,...,...,...
827,Jefferson & Walnut,37.260543,-79.941436,,
828,Radford University Carilion,37.265781,-79.940977,,
829,Jefferson & Bullitt (Northbound),37.268357,-79.940632,,
830,Church & Market,37.270458,-79.939109,,


In [16]:
import ipyleaflet.basemaps
import ipywidgets


def create_map(df_stop_ridership, sort_by):
    pymap = ipyleaflet.Map(
        center=(df_routes['lat'].mean(), df_routes['lon'].mean()), zoom=12, min_zoom=1, max_zoom=20,
        scroll_wheel_zoom=True,
        layout=ipywidgets.Layout(width='100%', min_height='800px')
    )
    
    markers = []
    for index, row in df_stop_ridership.sort_values(sort_by, ascending=False)[:40].iterrows():
        html = ''.join([
            '<span style="color:#000; font-size:8pt;">',
            row['stop'],
            '<br/>',
            f"{int(row['boardings'])} boarded / {int(row['alightings'])} alighted",
            '</span',
        ])
        icon = ipyleaflet.DivIcon(html=html, bg_pos=[0, 0], icon_size=[100, 70])
        marker = ipyleaflet.Marker(
            location=(row['lat'], row['lon']),
            title=row['stop'],
            icon=icon,
            draggable=False,
        )
        markers.append(marker)
    
    marker_cluster = ipyleaflet.MarkerCluster(markers=markers)
    pymap.add(marker_cluster)
    pymap.save(output_path / f'map-top-40-{sort_by}.html', title=f'Top 40 Stops by {sort_by[0].upper() + sort_by[1:-1]} Numbers')
    return pymap

create_map(df_stop_ridership, 'alightings')
create_map(df_stop_ridership, 'boardings')

Map(center=[np.float64(37.276543736454485), np.float64(-79.9713701920397)], controls=(ZoomControl(options=['po…

In [34]:
import math

import ipyleaflet.basemaps
import ipywidgets

def ridership_points(df, label):
    df = df.dropna()
    df[label] = df[label] + 1
    df[label] = df[label].apply(lambda x: math.log(x, 10))
    df[label] = 35 * df[label] / df[label].max()
    return [
        [row['lat'], row['lon'], row[label]]
        for index, row in df.iterrows()   
    ]

def create_heat_map(df, label):
    pymap = ipyleaflet.Map(
        center=(df['lat'].mean(), df['lon'].mean()), zoom=13, min_zoom=1, max_zoom=20,
        scroll_wheel_zoom=True,
        layout=ipywidgets.Layout(width='100%', min_height='800px')
    )
    
    pymap.add(ipyleaflet.Heatmap(
        locations=ridership_points(df_stop_ridership, label.lower()),
        radius=30,
    ))
    
    pymap.save(output_path / f'map-heat-{label.lower()}.html', title=f'Roanoke, VA - {label} Heat Map')
    return pymap

pymap = create_heat_map(df_stop_ridership, 'Alightings')
pymap = create_heat_map(df_stop_ridership, 'Boardings')

pymap

Map(center=[np.float64(37.2774887525337), np.float64(-79.96441511632254)], controls=(ZoomControl(options=['pos…

In [19]:
import matplotlib.pyplot as plt
import shutil

In [20]:
routes_and_directions = df_route_ridership[['route', 'direction']].drop_duplicates()

In [21]:
plot_dir = plot_path / 'ridership-full-raw'
shutil.rmtree(plot_dir, ignore_errors=True)
plot_dir.mkdir(exist_ok=True)

for index, row in routes_and_directions.iterrows():
    df = df_route_ridership
    df = df[(df['route'] == row['route']) & (df['direction'] == row['direction'])]

    fig, ax = plt.subplots(layout='constrained')
    fig.set_size_inches(12, 8)

    ax.set_title('\n'.join([
        'Roanoke, VA - Boarding/Alighting: May 1, 2024 - April 30, 2025',
        row['route'],
        row['direction'],
        '(some stops are shared with other routes)',
    ]))
        
    ax.bar(df['stop'], df['boardings'], color='blue', label='boardings')
    ax.bar(df['stop'], -df['alightings'], color='red', label='alightings')
    ax.legend(loc="best")

    ax.tick_params(axis='x', labelrotation=90)

    # plt.show()
    plt.savefig(plot_dir / f'{row["route"].strip().replace('/', ' or ')} --- {row["direction"].strip().replace('/', ' or ')}.png')
    plt.close()

In [22]:
plot_dir = plot_path / 'ridership-full-adjusted'
shutil.rmtree(plot_dir, ignore_errors=True)
plot_dir.mkdir(exist_ok=True)

for index, row in routes_and_directions.iterrows():
    df = df_route_ridership
    df = df[(df['route'] == row['route']) & (df['direction'] == row['direction'])]

    fig, ax = plt.subplots(layout='constrained')
    fig.set_size_inches(12, 8)

    ax.set_title('\n'.join([
        'Roanoke, VA - Adjusted Boarding/Alighting: May 1, 2024 - April 30, 2025',
        row['route'],
        row['direction'],
        '(stops that are shared by multiple routes have their totals divided by the number of shared routes)',
    ]))
        
    ax.bar(df['stop'], df['boardings'] / df['route_count'], color='blue', label='boardings')
    ax.bar(df['stop'], -df['alightings'] / df['route_count'], color='red', label='alightings')
    ax.legend(loc="best")

    ax.tick_params(axis='x', labelrotation=90)

    # plt.show()
    plt.savefig(plot_dir / f'{row["route"].strip().replace('/', ' or ')} --- {row["direction"].strip().replace('/', ' or ')}.png')
    plt.close()

In [23]:
plot_dir = plot_path / 'ridership-less-than-10000'
shutil.rmtree(plot_dir, ignore_errors=True)
plot_dir.mkdir(exist_ok=True)

for index, row in routes_and_directions.iterrows():
    df = df_route_ridership
    df = df[(df['route'] == row['route']) & (df['direction'] == row['direction'])]
    df = df[df['boardings'] < 10000]
    df = df[df['alightings'] < 10000]

    fig, ax = plt.subplots(layout='constrained')
    fig.set_size_inches(12, 6)

    ax.set_title('Roanoke, VA Boarding/Alighting: May 1, 2024 - April 30, 2025\n' + row['route'] + '\n' + row[
        'direction'] + '\n(Stops with < 10,0000 boarders/alighters)')
    ax.bar(df['stop'], df['boardings'], color='blue', label='boardings')
    ax.bar(df['stop'], -df['alightings'], color='red', label='alightings')
    ax.legend(loc="best")

    ax.tick_params(axis='x', labelrotation=90)

    #plt.show()
    plt.savefig(plot_dir / f'{row["route"].strip().replace('/', ' or ')} --- {row["direction"].strip().replace('/', ' or ')}.png')
    plt.close()

In [35]:
import urllib.parse

def ridership_links(base_path, routes):
    url_quote = lambda x: urllib.parse.quote(x.strip(), safe="")
    return [
        {
            'href': f'{base_path}/{url_quote(row['route'])} --- {url_quote(row['direction'])}.png',
            'text': f'{row['route']} --- {row['direction']}',
        }
        for _, row in routes.iterrows()
    ]

def ridership_section(title, note, base_path):
    return [   
        f"<h2>{title}</h2>",
        f"<p>{note}</p>",
        f"May 1, 2024 - April 30, 2025",
        f"<ul>",
        *[
            f'<li><a href="{link['href']}">{link['text']}</a></li>'
            for link in ridership_links(base_path, routes_and_directions)
        ],
        "</ul>",
    ]

ridership_index = [
    """
    <!DOCTYPE html>
    <html lang="en">
    <head>
        <meta charset="UTF-8">
        <title>Rider FYI - Roanoke, VA - Ridership</title>
    </head>
    <body>
    <h2>External Links</h2>
    <ul>
    <li><a href="https://sites.google.com/view/brrag">BRRAG - Bus Riders of Roanoke Advocacy Group - Google Site</a></li>
    <li><a href="https://www.facebook.com/BusridersofRoanoke">BRRAG - Bus Riders of Roanoke Advocacy Group - Facebook</a></li>
    <li><a href="https://vmgoapp.com/">Valley Metro Roanoke (Live Bus Map)</a></li>
    </ul>
    <h2>Ridership Data</h2>
    <ul>
        <li><a href="ridership-with-stops-sorted-boarding.csv">May 1, 2024 - April 30, 2025</a></li>
    </ul>
    <h2>Interactive Maps</h2>
    <p>May 1, 2024 - April 30, 2025</p>
    <ul>
        <li><a href="map-top-40-alightings.html">Top 40 Stops with the most Alightings</a></li>
        <li><a href="map-top-40-boardings.html">Top 40 Stops with the most Boardings</a></li>
        <li><a href="map-heat-alightings.html">Heat Map - Alightings</a></li>
        <li><a href="map-heat-boardings.html">Heat Map - Boardings</a></li>
    </ul>
    """,
    *ridership_section(
        title="Ridership by Route (Adjusted)",
        note="Some stops are shared by multiple routes. In this set of graphs, ridership for each shared stop has been divided among the routes that go through it.",
        base_path='plots/ridership-full-adjusted'
    ),
    *ridership_section(
        title="Ridership by Route (Raw)",
        note="The ridership numbers in this set have not been adjusted.",
        base_path='plots/ridership-full-raw'
    ),
    """
    </body>
    </html>
    """,
]
ridership_index = '\n'.join(ridership_index)
with open(output_path / 'index.html', 'w') as f:
    f.write(ridership_index)