# Inflows and Outflows
- This notebook studies how sections differ based on their 'inflows' and 'outflows'
- An inflow is when a ride enters a location that it did not start in 
- An outflow is when a ride leaves its starting location
- A 'local' ride begins and ends in the same location

## Findings
- Locations can differ significantly in their net inflows and outflows
- The majority of zones with the highest outflow rates are parks
- Airports feature a higher outflow rate than inflow rate

In [1]:
from sqlalchemy import create_engine
engine = create_engine('postgresql://root:root@localhost:5432/uber')
engine.connect()

%load_ext sql
%sql postgresql://root:root@localhost:5432/uber

<sqlalchemy.engine.base.Connection at 0x10581aac0>

In [3]:
%%sql
SELECT * FROM main LIMIT 10;

 * postgresql://root:***@localhost:5432/uber
10 rows affected.


index,hvfhs_license_num,dispatching_base_num,originating_base_num,request_datetime,on_scene_datetime,pickup_datetime,dropoff_datetime,PULocationID,DOLocationID,trip_miles,trip_time,base_passenger_fare,tolls,bcf,sales_tax,congestion_surcharge,airport_fee,tips,driver_pay,shared_request_flag,shared_match_flag,access_a_ride_flag,wav_request_flag,wav_match_flag,pickup_hour,pickup_dayofweek,platform,has_tips,driver_pay_per_mile,driver_pay_per_minute
10840404,HV0005,B02510,,2019-08-17 15:59:10,,2019-08-17 16:02:35,2019-08-17 16:32:28,143,145,4.239,1793,19.41,0.16,0.49,1.74,2.75,,0.0,19.44,N,N,N,N,N,16,Saturday,Lyft,0,4.585987,0.65052986
2252549,HV0003,B02870,B02870,2019-08-04 03:15:51,2019-08-04 03:24:27,2019-08-04 03:24:27,2019-08-04 03:31:54,80,198,1.54,447,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,N,N,,N,N,3,Sunday,Uber,0,0.0,0.0
13745783,HV0003,B02879,B02879,2019-08-22 07:42:04,2019-08-22 07:43:54,2019-08-22 07:45:32,2019-08-22 07:50:41,69,119,1.32,308,3.85,0.0,0.0,0.33,0.0,,0.0,5.39,N,N,,N,N,7,Thursday,Uber,0,4.0833335,1.05
2620430,HV0005,B02510,,2019-08-04 17:25:30,,2019-08-04 17:29:54,2019-08-04 17:43:10,144,162,2.85,796,19.74,0.0,0.49,1.75,2.75,,3.71,9.7,N,N,N,N,N,17,Sunday,Lyft,1,3.4035087,0.73115575
3601407,HV0003,B02875,B02875,2019-08-06 12:08:15,2019-08-06 12:08:25,2019-08-06 12:10:25,2019-08-06 12:21:24,244,119,3.1,636,9.34,0.0,0.0,0.81,0.0,,0.0,8.83,N,N,,N,N,12,Tuesday,Uber,0,2.848387,0.83301884
2863573,HV0003,B02869,B02869,2019-08-05 05:00:06,2019-08-05 05:00:12,2019-08-05 05:02:47,2019-08-05 05:13:18,167,213,3.5,632,17.76,0.0,0.0,1.54,0.0,,0.0,13.57,N,N,,N,N,5,Monday,Uber,0,3.877143,1.2882911
7773871,HV0003,B02883,B02883,2019-08-12 19:49:50,2019-08-12 19:50:01,2019-08-12 19:52:26,2019-08-12 20:03:52,125,148,1.64,675,11.5,0.0,0.0,1.0,2.75,,0.0,7.45,N,N,,N,N,19,Monday,Uber,0,4.542683,0.6622222
13497465,HV0005,B02510,,2019-08-21 18:57:26,,2019-08-21 19:02:31,2019-08-21 19:36:55,186,229,2.796,2064,11.35,0.0,0.28,1.01,0.75,,0.0,0.0,Y,Y,N,N,N,19,Wednesday,Lyft,0,0.0,0.0
12036795,HV0003,B02877,B02877,2019-08-19 09:28:33,2019-08-19 09:30:20,2019-08-19 09:31:26,2019-08-19 09:34:50,119,247,0.84,203,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,N,N,,N,N,9,Monday,Uber,0,0.0,0.0
13943106,HV0005,B02510,,2019-08-22 14:00:58,,2019-08-22 14:05:32,2019-08-22 15:06:48,36,48,9.696,3676,31.48,0.0,0.86,3.06,0.75,,0.0,0.0,Y,Y,N,N,N,14,Thursday,Lyft,0,0.0,0.0


In [4]:
%%sql
DROP TABLE IF EXISTS inflows ;

CREATE TEMPORARY TABLE inflows AS 
SELECT "PULocationID" "location",
COALESCE(COUNT(*), 0)::numeric count
FROM main
WHERE "PULocationID" != "DOLocationID"
GROUP BY "PULocationID"
ORDER BY "PULocationID";

SELECT * FROM inflows LIMIT 10;

 * postgresql://root:***@localhost:5432/uber
Done.
261 rows affected.
10 rows affected.


location,count
1,10
2,1
3,329
4,838
5,32
6,73
7,1713
8,12
9,166
10,557


In [5]:
%%sql
DROP TABLE IF EXISTS outflows;

CREATE TEMPORARY TABLE outflows AS 
SELECT "DOLocationID" "location",
COALESCE(COUNT(*), 0)::numeric count
FROM main
WHERE "PULocationID" != "DOLocationID"
GROUP BY "DOLocationID"
ORDER BY "DOLocationID";

SELECT * FROM outflows LIMIT 10;

 * postgresql://root:***@localhost:5432/uber
Done.
260 rows affected.
10 rows affected.


location,count
1,1141
2,1
3,358
4,682
5,37
6,91
7,1736
8,15
9,154
10,452


In [6]:
%%sql
DROP TABLE IF EXISTS locals;

CREATE TEMPORARY TABLE locals AS 
SELECT "DOLocationID" "location",
COALESCE(COUNT(*), 0)::numeric count
FROM main
WHERE "PULocationID" = "DOLocationID"
GROUP BY "DOLocationID"
ORDER BY "DOLocationID";

SELECT * FROM locals LIMIT 10;

 * postgresql://root:***@localhost:5432/uber
Done.
241 rows affected.
10 rows affected.


location,count
3,30
4,4
5,4
6,4
7,245
9,11
10,37
11,11
13,11
14,227


In [7]:
%%sql
SELECT * FROM zones LIMIT 10;

 * postgresql://root:***@localhost:5432/uber
10 rows affected.


index,LocationID,Borough,Zone,service_zone
0,1,EWR,Newark Airport,EWR
1,2,Queens,Jamaica Bay,Boro Zone
2,3,Bronx,Allerton/Pelham Gardens,Boro Zone
3,4,Manhattan,Alphabet City,Yellow Zone
4,5,Staten Island,Arden Heights,Boro Zone
5,6,Staten Island,Arrochar/Fort Wadsworth,Boro Zone
6,7,Queens,Astoria,Boro Zone
7,8,Queens,Astoria Park,Boro Zone
8,9,Queens,Auburndale,Boro Zone
9,10,Queens,Baisley Park,Boro Zone


In [8]:
%%sql
DROP TABLE IF EXISTS flows;

CREATE TEMPORARY TABLE flows AS 
SELECT 
zones."Zone", 
zones."Borough", 
COALESCE(inflows.count, 0) inflows,
COALESCE(outflows.count, 0) outflows,
COALESCE(locals.count, 0) locals
FROM zones 
    LEFT JOIN inflows 
    ON zones."LocationID" = inflows.location
    LEFT JOIN outflows 
    ON zones."LocationID" = outflows.location
    LEFT JOIN locals 
    ON zones."LocationID" = locals.location
;

SELECT * FROM flows LIMIT 10;

 * postgresql://root:***@localhost:5432/uber
Done.
263 rows affected.
10 rows affected.


Zone,Borough,inflows,outflows,locals
Allerton/Pelham Gardens,Bronx,329,358,30
Alphabet City,Manhattan,838,682,4
Arden Heights,Staten Island,32,37,4
Arrochar/Fort Wadsworth,Staten Island,73,91,4
Astoria,Queens,1713,1736,245
Auburndale,Queens,166,154,11
Baisley Park,Queens,557,452,37
Bath Beach,Brooklyn,172,187,11
Battery Park City,Manhattan,1110,1232,11
Bay Ridge,Brooklyn,704,840,227


In [9]:
%%sql
ALTER TABLE flows 
ADD total int;

UPDATE flows
SET total = inflows + outflows + locals;

SELECT * FROM flows LIMIT 10;

 * postgresql://root:***@localhost:5432/uber
Done.
263 rows affected.
10 rows affected.


Zone,Borough,inflows,outflows,locals,total
Allerton/Pelham Gardens,Bronx,329,358,30,717
Alphabet City,Manhattan,838,682,4,1524
Arden Heights,Staten Island,32,37,4,73
Arrochar/Fort Wadsworth,Staten Island,73,91,4,168
Astoria,Queens,1713,1736,245,3694
Auburndale,Queens,166,154,11,331
Baisley Park,Queens,557,452,37,1046
Bath Beach,Brooklyn,172,187,11,370
Battery Park City,Manhattan,1110,1232,11,2353
Bay Ridge,Brooklyn,704,840,227,1771


In [17]:
%%sql
SELECT percentile_cont(0.05) WITHIN GROUP (ORDER BY total ASC)
FROM flows

 * postgresql://root:***@localhost:5432/uber
1 rows affected.


percentile_cont
50.70000000000001


### Highest Percent of Inflows

In [19]:
%%sql
SELECT 
flows."Zone",
flows."Borough",
ROUND(
    100*inflows / (total)
,2) inflow_percent, 
inflows, outflows
FROM flows
WHERE (inflows + outflows > 0)
AND total > (SELECT percentile_cont(0.05) WITHIN GROUP (ORDER BY total ASC) FROM flows)
ORDER BY (inflows / (total)) DESC
LIMIT 20;

 * postgresql://root:***@localhost:5432/uber
20 rows affected.


Zone,Borough,inflow_percent,inflows,outflows
Crotona Park,Bronx,59.09,39,26
Sutton Place/Turtle Bay North,Manhattan,57.61,1396,1016
Hollis,Queens,57.3,212,152
Cambria Heights,Queens,57.1,181,124
Stuy Town/Peter Cooper Village,Manhattan,56.94,632,477
Springfield Gardens North,Queens,56.54,359,256
Marble Hill,Manhattan,56.5,187,141
City Island,Bronx,55.67,54,43
Little Italy/NoLiTa,Manhattan,55.38,1653,1308
Greenwich Village North,Manhattan,55.29,1401,1123


### Highest Percent of Outflows

In [20]:
%%sql
SELECT 
flows."Zone",
flows."Borough",
ROUND(
    100*outflows / (total)
,2) outflow_percent, 
inflows, outflows
FROM flows
WHERE (total > 0)
AND total > (SELECT percentile_cont(0.05) WITHIN GROUP (ORDER BY total ASC) FROM flows)
ORDER BY (outflows / (total)) DESC
LIMIT 20;

 * postgresql://root:***@localhost:5432/uber
20 rows affected.


Zone,Borough,outflow_percent,inflows,outflows
Newark Airport,EWR,99.13,10,1141
Randalls Island,Manhattan,70.92,41,100
Pelham Bay Park,Bronx,60.9,52,81
Breezy Point/Fort Tilden/Riis Beach,Queens,59.14,36,55
Battery Park,Manhattan,58.59,41,58
Flushing Meadows-Corona Park,Queens,57.22,238,321
Penn Station/Madison Sq West,Manhattan,56.54,1695,2225
Prospect Park,Brooklyn,55.95,178,235
Inwood Hill Park,Manhattan,54.78,52,63
Marine Park/Floyd Bennett Field,Brooklyn,54.67,34,41


### Airports

In [12]:
%%sql
SELECT 
flows."Zone",
ROUND(
    100*inflows / (total)
,2) inflow_percent, 
ROUND(
    100*outflows / (total)
,2) outflow_percent, 
ROUND(
    100*locals / (total)
,2) local_percent, 
total
FROM flows
WHERE "Zone" LIKE '%Airport%'
ORDER BY (inflows / (total)) ASC
LIMIT 10;

 * postgresql://root:***@localhost:5432/uber
3 rows affected.


Zone,inflow_percent,outflow_percent,local_percent,total
Newark Airport,0.87,99.13,0.0,1151
JFK Airport,44.84,53.69,1.46,7321
LaGuardia Airport,45.55,54.24,0.21,7592
