# Inflows and Outflows
- This notebook studies how sections differ based on their 'inflows' and 'outflows'
- An inflow is when a ride enters a location that it did not start in 
- An outflow is when a ride leaves its starting location
- A 'local' ride begins and ends in the same location

## Findings
- Locations can differ significantly in their net inflows and outflows
- The majority of zones with the highest outflow rates are parks
- Airports feature a higher outflow rate than inflow rate

In [1]:
from sqlalchemy import create_engine
engine = create_engine('postgresql://root:root@localhost:5432/uber')
engine.connect()

<sqlalchemy.engine.base.Connection at 0x12114e8e0>

In [2]:
%load_ext sql
%sql postgresql://root:root@localhost:5432/uber

In [3]:
%%sql
SELECT * FROM main LIMIT 10;

 * postgresql://root:***@localhost:5432/uber
10 rows affected.


index,hvfhs_license_num,dispatching_base_num,originating_base_num,request_datetime,on_scene_datetime,pickup_datetime,dropoff_datetime,PULocationID,DOLocationID,trip_miles,trip_time,base_passenger_fare,tolls,bcf,sales_tax,congestion_surcharge,airport_fee,tips,driver_pay,shared_request_flag,shared_match_flag,access_a_ride_flag,wav_request_flag,wav_match_flag,pickup_hour,pickup_dayofweek,platform,has_tips
3761537,HV0005,B02510,,2019-07-07 13:45:13,,2019-07-07 13:50:14,2019-07-07 14:31:38,74,78,5.798,2484,16.13,0.62,0.42,1.49,0.0,,0.0,26.86,N,N,N,N,N,13,Sunday,Lyft,0
4514778,HV0003,B02872,B02872,2019-07-08 20:55:07,2019-07-08 20:55:26,2019-07-08 20:58:37,2019-07-08 21:10:55,14,22,3.75,738,11.26,0.0,0.0,0.98,0.0,,0.0,10.2,N,N,,N,,20,Monday,Uber,0
13459379,HV0005,B02510,,2019-07-21 19:38:31,,2019-07-21 19:43:25,2019-07-21 19:56:26,18,247,2.503,781,13.0,0.0,0.24,0.84,0.0,,0.0,0.0,Y,N,N,N,N,19,Sunday,Lyft,0
12008064,HV0003,B02877,B02877,2019-07-19 23:04:25,2019-07-19 23:07:19,2019-07-19 23:07:31,2019-07-19 23:10:59,108,108,1.08,208,5.15,0.0,0.0,0.45,0.0,,0.0,5.39,N,N,,N,,23,Friday,Uber,0
3745204,HV0003,B02889,B02889,2019-07-07 13:23:57,2019-07-07 13:25:35,2019-07-07 13:27:19,2019-07-07 13:38:54,35,61,1.52,695,9.54,0.0,0.0,0.83,0.0,,0.0,7.39,N,N,,N,,13,Sunday,Uber,0
2053896,HV0005,B02510,,2019-07-04 16:42:43,,2019-07-04 16:45:14,2019-07-04 17:00:21,256,97,2.924,907,16.53,0.0,0.31,1.11,0.0,,0.0,10.69,N,N,N,N,N,16,Thursday,Lyft,0
19436197,HV0005,B02510,,2019-07-30 18:54:46,,2019-07-30 18:56:05,2019-07-30 19:04:04,173,95,1.934,479,10.75,0.0,0.27,0.95,0.0,,0.0,6.07,N,N,N,N,N,18,Tuesday,Lyft,0
17805213,HV0003,B02883,B02883,2019-07-28 02:47:12,2019-07-28 02:48:19,2019-07-28 02:50:02,2019-07-28 02:59:39,164,224,1.87,578,8.18,0.0,0.0,0.78,2.75,,0.0,6.82,N,N,,N,N,2,Sunday,Uber,0
6741838,HV0003,B02764,B02764,2019-07-12 12:29:10,2019-07-12 12:29:51,2019-07-12 12:32:26,2019-07-12 13:06:56,7,182,8.31,2071,25.13,6.12,0.0,2.71,0.0,,0.0,32.3,N,N,,N,,12,Friday,Uber,0
3420349,HV0003,B02836,B02836,2019-07-06 22:46:18,2019-07-06 22:47:23,2019-07-06 22:49:12,2019-07-06 22:57:10,225,49,1.18,477,7.71,0.0,0.0,0.67,0.0,,0.0,5.39,N,N,,N,,22,Saturday,Uber,0


In [4]:
%%sql
DROP TABLE IF EXISTS inflows ;

CREATE TEMPORARY TABLE inflows AS 
SELECT "PULocationID" "location",
COALESCE(COUNT(*), 0)::numeric count
FROM main
WHERE "PULocationID" != "DOLocationID"
GROUP BY "PULocationID"
ORDER BY "PULocationID";

SELECT * FROM inflows LIMIT 10;

 * postgresql://root:***@localhost:5432/uber
Done.
259 rows affected.
10 rows affected.


location,count
1,4
2,1
3,181
4,442
5,18
6,36
7,872
8,5
9,79
10,253


In [5]:
%%sql
DROP TABLE IF EXISTS outflows;

CREATE TEMPORARY TABLE outflows AS 
SELECT "DOLocationID" "location",
COALESCE(COUNT(*), 0)::numeric count
FROM main
WHERE "PULocationID" != "DOLocationID"
GROUP BY "DOLocationID"
ORDER BY "DOLocationID";

SELECT * FROM outflows LIMIT 10;

 * postgresql://root:***@localhost:5432/uber
Done.
259 rows affected.
10 rows affected.


location,count
1,549
2,1
3,163
4,351
5,21
6,46
7,836
8,8
9,81
10,201


In [6]:
%%sql
DROP TABLE IF EXISTS locals;

CREATE TEMPORARY TABLE locals AS 
SELECT "DOLocationID" "location",
COALESCE(COUNT(*), 0)::numeric count
FROM main
WHERE "PULocationID" = "DOLocationID"
GROUP BY "DOLocationID"
ORDER BY "DOLocationID";

SELECT * FROM locals LIMIT 10;

 * postgresql://root:***@localhost:5432/uber
Done.
231 rows affected.
10 rows affected.


location,count
3,19
4,1
5,1
6,2
7,143
9,9
10,20
11,5
13,7
14,118


In [7]:
%%sql
SELECT * FROM zones LIMIT 10;

 * postgresql://root:***@localhost:5432/uber
10 rows affected.


index,LocationID,Borough,Zone,service_zone
0,1,EWR,Newark Airport,EWR
1,2,Queens,Jamaica Bay,Boro Zone
2,3,Bronx,Allerton/Pelham Gardens,Boro Zone
3,4,Manhattan,Alphabet City,Yellow Zone
4,5,Staten Island,Arden Heights,Boro Zone
5,6,Staten Island,Arrochar/Fort Wadsworth,Boro Zone
6,7,Queens,Astoria,Boro Zone
7,8,Queens,Astoria Park,Boro Zone
8,9,Queens,Auburndale,Boro Zone
9,10,Queens,Baisley Park,Boro Zone


In [8]:
%%sql
DROP TABLE IF EXISTS flows;

CREATE TEMPORARY TABLE flows AS 
SELECT 
zones."Zone", 
zones."Borough", 
COALESCE(inflows.count, 0) inflows,
COALESCE(outflows.count, 0) outflows,
COALESCE(locals.count, 0) locals
FROM zones 
    LEFT JOIN inflows 
    ON zones."LocationID" = inflows.location
    LEFT JOIN outflows 
    ON zones."LocationID" = outflows.location
    LEFT JOIN locals 
    ON zones."LocationID" = locals.location
;

SELECT * FROM flows LIMIT 10;

 * postgresql://root:***@localhost:5432/uber
Done.
263 rows affected.
10 rows affected.


Zone,Borough,inflows,outflows,locals
Allerton/Pelham Gardens,Bronx,181,163,19
Alphabet City,Manhattan,442,351,1
Arden Heights,Staten Island,18,21,1
Arrochar/Fort Wadsworth,Staten Island,36,46,2
Astoria,Queens,872,836,143
Auburndale,Queens,79,81,9
Baisley Park,Queens,253,201,20
Bath Beach,Brooklyn,97,90,5
Battery Park City,Manhattan,569,616,7
Bay Ridge,Brooklyn,380,392,118


In [9]:
%%sql
ALTER TABLE flows 
ADD total int;

UPDATE flows
SET total = inflows + outflows + locals;

SELECT * FROM flows LIMIT 10;

 * postgresql://root:***@localhost:5432/uber
Done.
263 rows affected.
10 rows affected.


Zone,Borough,inflows,outflows,locals,total
Allerton/Pelham Gardens,Bronx,181,163,19,363
Alphabet City,Manhattan,442,351,1,794
Arden Heights,Staten Island,18,21,1,40
Arrochar/Fort Wadsworth,Staten Island,36,46,2,84
Astoria,Queens,872,836,143,1851
Auburndale,Queens,79,81,9,169
Baisley Park,Queens,253,201,20,474
Bath Beach,Brooklyn,97,90,5,192
Battery Park City,Manhattan,569,616,7,1192
Bay Ridge,Brooklyn,380,392,118,890


### Highest Percent of Inflows

In [10]:
%%sql
SELECT 
flows."Zone",
flows."Borough",
ROUND(
    100*inflows / (total)
,2) inflow_percent, 
inflows, outflows
FROM flows
WHERE (inflows + outflows > 0)
ORDER BY (inflows / (total)) DESC
LIMIT 20;

 * postgresql://root:***@localhost:5432/uber
20 rows affected.


Zone,Borough,inflow_percent,inflows,outflows
Willets Point,Queens,66.67,14,7
Green-Wood Cemetery,Brooklyn,66.67,10,5
Bellerose,Queens,61.96,57,33
Rossville/Woodrow,Staten Island,61.11,22,13
Highbridge Park,Manhattan,60.0,15,10
New Dorp/Midland Beach,Staten Island,58.21,39,24
Pelham Bay,Bronx,57.98,109,75
Columbia Street,Brooklyn,57.52,65,48
Flatiron,Manhattan,57.48,711,518
Oakland Gardens,Queens,57.36,74,51


### Highest Percent of Outflows

In [11]:
%%sql
SELECT 
flows."Zone",
flows."Borough",
ROUND(
    100*outflows / (total)
,2) outflow_percent, 
inflows, outflows
FROM flows
WHERE (total > 0)
ORDER BY (outflows / (total)) DESC
LIMIT 20;

 * postgresql://root:***@localhost:5432/uber
20 rows affected.


Zone,Borough,outflow_percent,inflows,outflows
Newark Airport,EWR,99.28,4,549
Saint Michaels Cemetery/Woodside,Queens,75.0,1,3
Freshkills Park,Staten Island,75.0,2,6
Randalls Island,Manhattan,63.22,32,55
Inwood Hill Park,Manhattan,62.96,20,34
Astoria Park,Queens,61.54,5,8
Flushing Meadows-Corona Park,Queens,59.14,76,110
Broad Channel,Queens,58.82,7,10
Roosevelt Island,Manhattan,58.2,48,71
Breezy Point/Fort Tilden/Riis Beach,Queens,57.41,22,31


### Airports

In [12]:
%%sql
SELECT 
flows."Zone",
ROUND(
    100*inflows / (total)
,2) inflow_percent, 
ROUND(
    100*outflows / (total)
,2) outflow_percent, 
ROUND(
    100*locals / (total)
,2) local_percent, 
total
FROM flows
WHERE "Zone" LIKE '%Airport%'
ORDER BY (inflows / (total)) ASC
LIMIT 10;

 * postgresql://root:***@localhost:5432/uber
3 rows affected.


Zone,inflow_percent,outflow_percent,local_percent,total
Newark Airport,0.72,99.28,0.0,553
JFK Airport,43.32,54.78,1.9,3525
LaGuardia Airport,47.53,52.22,0.25,3541
