In [1]:
import os
os.chdir("../../")

In [2]:
import sqlite3
import pandas as pd
from api.utils.database import rows_to_dicts

In [3]:
con = sqlite3.connect("./pipeline/database.db")

### RAW CTA Stops Data

In [21]:
cur = con.cursor()
rows = cur.execute("""
SELECT *
FROM cta_train_stops
LIMIT 5

""").fetchall()

rows

pd.DataFrame(rows_to_dicts(cur, rows))

Unnamed: 0,stop_id,direction_id,stop_name,station_name,station_descriptive_name,station_id,ada,red,blue,green,brown,purple,purple_express,yellow,pink,orange,longitude,latitude
0,30162,W,18th (54th/Cermak-bound),18th,18th (Pink Line),40830,1,0,0,0,0,0,0,0,1,0,-87.669147,41.857908
1,30161,E,18th (Loop-bound),18th,18th (Pink Line),40830,1,0,0,0,0,0,0,0,1,0,-87.669147,41.857908
2,30022,N,35th/Archer (Loop-bound),35th/Archer,35th/Archer (Orange Line),40120,1,0,0,0,0,0,0,0,0,1,-87.680622,41.829353
3,30023,S,35th/Archer (Midway-bound),35th/Archer,35th/Archer (Orange Line),40120,1,0,0,0,0,0,0,0,0,1,-87.680622,41.829353
4,30214,S,35-Bronzeville-IIT (63rd-bound),35th-Bronzeville-IIT,35th-Bronzeville-IIT (Green Line),41120,1,0,0,1,0,0,0,0,0,0,-87.625826,41.831677


### RAW CTA Ridership Data

In [71]:
cur = con.cursor()
rows = cur.execute("""
SELECT *
FROM cta_train_ridership
LIMIT 5

""").fetchall()

rows

pd.DataFrame(rows_to_dicts(cur, rows))

Unnamed: 0,station_id,stationname,date,daytype,rides
0,40010,Austin-Forest Park,2019-01-01T00:00:00.000,U,576
1,40020,Harlem-Lake,2019-01-01T00:00:00.000,U,981
2,40030,Pulaski-Lake,2019-01-01T00:00:00.000,U,835
3,40040,Quincy/Wells,2019-01-01T00:00:00.000,U,929
4,40050,Davis,2019-01-01T00:00:00.000,U,1207


## ADA vs Non-ADA Daily Stop Ridership Comparison

This looks at average trips a year before COVID and a year after COVID
* Before Covid: March 2019 - Feb 2020
* After Covid: March 2020 - Feb 2021

In [87]:
cur = con.cursor()
rows = cur.execute("""
SELECT 
    c.station_id,
    c.ada,
    r.*
FROM cta_train_stops c
    LEFT JOIN (
        SELECT
            station_id,
            stationname,
            SUM(CASE WHEN date < '2021-03-01' THEN rides ELSE 0 END) / 365 AS avg_trips_before,
            SUM(CASE WHEN date >= '2020-03-01' THEN rides ELSE 0 END) / 365 AS avg_trips_since
        FROM cta_train_ridership
        WHERE 
            date >= '2019-03-01'
            AND date < '2021-03-01'
        GROUP BY
            station_id
        ) r
    ON c.station_id = r.station_id
    GROUP BY station_id

""").fetchall()

rows

pd.DataFrame(rows_to_dicts(cur, rows))

Unnamed: 0,station_id,ada,stationname,avg_trips_before,avg_trips_since
0,40010,0,Austin-Forest Park,1831,348
1,40020,1,Harlem-Lake,3913,877
2,40030,1,Pulaski-Lake,1703,468
3,40040,0,Quincy/Wells,6711,643
4,40050,1,Davis,3615,609
...,...,...,...,...,...
138,41660,1,Lake/State,20252,2553
139,41670,1,Conservatory,1117,332
140,41680,1,Oakton-Skokie,918,160
141,41690,1,Cermak-McCormick Place,1876,302


In [88]:
cur = con.cursor()
rows = cur.execute("""

SELECT
    station_id,
    stationname,
    SUM(CASE WHEN date < '2021-03-01' THEN rides ELSE 0 END) / 365 AS avg_trips_before,
    SUM(CASE WHEN date >= '2020-03-01' THEN rides ELSE 0 END) / 365 AS avg_trips_since,
    trim(date,'T00:00:00.000') AS date
FROM cta_train_ridership
WHERE 
    date >= '2019-03-01'
    AND date < '2021-03-01'
GROUP BY
    station_id
LIMIT 5

""").fetchall()

rows

pd.DataFrame(rows_to_dicts(cur, rows))

Unnamed: 0,station_id,stationname,avg_trips_before,avg_trips_since,date
0,40010,Austin-Forest Park,1831,348,2020-12-31
1,40020,Harlem-Lake,3913,877,2020-12-31
2,40030,Pulaski-Lake,1703,468,2020-12-31
3,40040,Quincy/Wells,6711,643,2020-12-31
4,40050,Davis,3615,609,2020-12-31


## Questions
**Is it ok to group by station ID? Can we assume if one entrance at a station is ADA compliant, any other entrance can be assumed to be as well?**

I think this is an ok assumption to make for this analysis. If an individual plans on using a station, there is a good chance they plan ahead and know that there is an ADA accessible entrance available to them (elevator, ramp).

**Are there factors, other than an accessible entrance, that a station needs to be ADA compliant? (ie. ramp to get on trian)**