In [181]:
from shapely.geometry import shape, Point
from pyproj import Proj, transform
from geopy.distance import great_circle
from collections import Counter
from ast import literal_eval as make_tuple

import shapefile
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt, mpld3
import csv
mpld3.enable_notebook()

In [29]:
newark_coordinates = {
    'terminal_a': [40.687794, -74.182307],
    'terminal_b': [40.690627, -74.177544],
    'terminal_c': [40.695558, -74.178063]
}

laguardia_coordinates = {
    'terminal_a': [40.772375, -73.885976],
    'terminal_b': [40.774444, -73.872006],
    'terminal_c': [40.770674, -73.865307],
    'terminal_d': [40.768628, -73.862134]
}

jfk_coordinates = {
    'terminal_1': [40.643325, -73.789939],
    'terminal_2': [40.641595, -73.788767],
    'terminal_4': [40.644193, -73.782554],
    'terminal_5': [40.645807, -73.776774],
    'terminal_7': [40.648798, -73.782922],
    'terminal_8': [40.646934, -73.789874]
}

In [30]:
jfk_green_dropoffs = pd.DataFrame.from_csv('./2016/modified/mod-tlc-green-jfk-dropoffs-2016.csv')
jfk_yellow_dropoffs = pd.DataFrame.from_csv('./2016/modified/mod-tlc-yellow-jfk-dropoffs-2016.csv')
newark_green_dropoffs = pd.DataFrame.from_csv('./2016/modified/mod-tlc-green-newark-dropoffs-2016.csv')
newark_yellow_dropoffs = pd.DataFrame.from_csv('./2016/modified/mod-tlc-yellow-newark-dropoffs-2016.csv')
laguardia_green_dropoffs = pd.DataFrame.from_csv('./2016/modified/mod-tlc-green-laguardia-dropoffs-2016.csv')
laguardia_yellow_dropoffs = pd.DataFrame.from_csv('./2016/modified/mod-tlc-yellow-laguardia-dropoffs-2016.csv')

In [127]:
sf = shapefile.Reader("taxi_zones/taxi_zones")
shapes = sf.shapes()

In [128]:
NEWARK_ID = 1
LAGUARDIA_ID = 138
JFK_ID = 132

In [184]:
def check_bounds(raw_coords, shapefile):
    in_proj = Proj(init='epsg:4326')
    out_proj= Proj(init='epsg:2263', preserve_units = True)
    coords = make_tuple(raw_coords)
    
    point = Point(transform(in_proj, out_proj, coords[1], coords[0]))
    polygon = shape(shapefile)
    return polygon.contains(point)

In [189]:
newark_yellow_filter = newark_yellow_dropoffs['coords'].apply(check_bounds, args=(shapes[NEWARK_ID - 1],))

In [None]:
newark_green_filter = newark_green_dropoffs['coords'].apply(check_bounds, args=(shapes[NEWARK_ID - 1],))
laguardia_yellow_filter = laguardia_yellow_dropoffs['coords'].apply(check_bounds, args=(shapes[LAGUARDIA_ID - 1],))
laguardia_green_filter = laguardia_green_dropoffs['coords'].apply(check_bounds, args=(shapes[LAGUARDIA_ID - 1],))
jfk_yellow_filter = jfk_yellow_dropoffs['coords'].apply(check_bounds, args=(shapes[JFK_ID - 1],))
jfk_green_filter = jfk_green_dropoffs['coords'].apply(check_bounds, args=(shapes[JFK_ID - 1],))

In [193]:
newark_yellow_dropoffs = newark_yellow_dropoffs.loc[newark_yellow_filter]
newark_green_dropoffs = newark_green_dropoffs.loc[newark_green_filter]
laguardia_yellow_dropoffs = laguardia_yellow_dropoffs.loc[laguardia_yellow_filter]
laguardia_green_dropoffs = laguardia_green_dropoffs.loc[laguardia_green_filter]
jfk_yellow_dropoffs = jfk_yellow_dropoffs.loc[jfk_yellow_filter]
jfk_green_dropoffs = jfk_green_dropoffs.loc[jfk_green_filter]

(117734, 20)

In [31]:
total_count = len(jfk_green_dropoffs) + len(jfk_yellow_dropoffs) + len(newark_green_dropoffs) + len(newark_yellow_dropoffs) \
+ len(laguardia_green_dropoffs) + len(laguardia_yellow_dropoffs)
total_count

1807965

In [56]:
def count_passengers(df):
    terminal_counts = pd.Series(np.zeros(len(df['terminal'].unique())), index=df['terminal'].unique(), name='terminal')
    for _, dropoff in df.iterrows():
        if not np.isnan(dropoff['passenger_count']):
            terminal_counts[dropoff['terminal']] += dropoff['passenger_count']
        else:
            terminal_counts[dropoff['terminal']] += 1
    return terminal_counts

In [57]:
newark_yellow_terminal_counts = count_passengers(newark_yellow_dropoffs)

In [58]:
newark_green_terminal_counts = count_passengers(newark_green_dropoffs)

In [59]:
total_newark_counts = newark_green_terminal_counts + newark_yellow_terminal_counts

In [61]:
total_newark_counts

terminal_c    108937.0
terminal_b     50937.0
terminal_a     50795.0
Name: terminal, dtype: float64

In [62]:
plt = total_newark_counts.plot(kind='bar')
mpld3.display()

In [63]:
laguardia_yellow_terminal_counts = count_passengers(laguardia_yellow_dropoffs)
laguardia_green_terminal_counts = count_passengers(laguardia_green_dropoffs)
total_laguardia_counts = laguardia_green_terminal_counts + laguardia_yellow_terminal_counts

In [64]:
total_laguardia_counts

terminal_c    382164.0
terminal_b    846503.0
terminal_d    278406.0
terminal_a     96282.0
Name: terminal, dtype: float64

In [65]:
plt = total_laguardia_counts.plot(kind='bar')
mpld3.display()

In [66]:
jfk_yellow_terminal_counts = count_passengers(jfk_yellow_dropoffs)
jfk_green_terminal_counts = count_passengers(jfk_green_dropoffs)
total_jfk_counts = jfk_green_terminal_counts + jfk_yellow_terminal_counts

In [67]:
plt = total_jfk_counts.plot(kind='bar')
mpld3.display()

In [68]:
total_jfk_counts

terminal_1    180306.0
terminal_2     61657.0
terminal_4    320670.0
terminal_5    183052.0
terminal_7    113307.0
terminal_8    255764.0
Name: terminal, dtype: float64

In [91]:
def add_counts(total_dropoff_count, dropoff_counts, terminal_airline_list):
    average_dropoff_count = total_dropoff_count/len(terminal_airline_list)
    for airline in terminal_airline_list:
        dropoff_counts[airline] += average_dropoff_count
    return dropoff_counts

In [107]:
def make_airline_array(raw_string):
    return raw_string.split('\n')

In [108]:
dropoff_counts = add_counts(50795, Counter(), [
            'Southwest',
            'Air Canada',
            'Virgin America',
            'JetBlue Airways',
            'American',
            'Alaska'
        ])

In [109]:
dropoff_counts = add_counts(50937, dropoff_counts, [
            'Austrian',
            'Cathay Pacific Airways',
            'TAP Air Portugal',
            'Allegiant Air',
            'OpenSkies',
            'Aer Lingus',
            'Porter',
            'Delta',
            'SAS Scandinavian',
            'Spirit',
            'Swiss International',
            'Virgin Atlantic Airways',
            'Ethiopian',
            'Wow Air',
            'Lufthansa',
            'Air India',
            'El Al',
            'Icelandair',
            'Air China',
            'British Airways'
        ])

In [110]:
dropoff_counts = add_counts(108937, dropoff_counts, ['United'] )

In [111]:
dropoff_counts = add_counts(96282, dropoff_counts, ['Delta'] )

In [112]:
dropoff_counts = add_counts(846503 , dropoff_counts, make_airline_array("""American
JetBlue
Spirit
Southwest
Air Canada
Virgin America
Frontier
United"""))

In [113]:
dropoff_counts = add_counts(382164, dropoff_counts, make_airline_array("""Delta
America"""))

In [114]:
dropoff_counts = add_counts(278406, dropoff_counts, make_airline_array("""Delta
WestJet"""))

In [115]:
dropoff_counts = add_counts(180306, dropoff_counts, make_airline_array("""Cayman Airways
Air France
Norwegian Air Shuttle
Azerbaijan Hava Yollary
Fly Jamaica
Saudia
Austrian
Aeromexico
Turkish
EVA Air
Interjet
Alitalia
Japan
Aeroflot Russian
Korean Air Lines
Brussels
China Eastern
Lufthansa
Air China
Meridiana
Philippine
Royal Air Maroc"""))

In [116]:
dropoff_counts = add_counts(61657, dropoff_counts, make_airline_array("""Delta"""))

In [117]:
dropoff_counts = add_counts(320670, dropoff_counts, make_airline_array("""Air Serbia
Arik Air
El Al
Volaris
Egyptair
Air Jamaica
XL Airways
China
Thomas Cook
Uzbekistan
Air Europa
Virgin America
Singapore
Etihad
China Southern
Avianca
Virgin Atlantic Airways
Pakistan
COPA
Kuwait Airways
Carribean
Emirates
KLM
Asiana
Sun Country
Swiss International
WestJet
Air India
Miami Air
Delta
South African Airways"""))

In [118]:
dropoff_counts = add_counts(183052, dropoff_counts, make_airline_array("""JetBlue
Hawaiian
Aer Lingus
TAP Air Portugal"""))

In [119]:
dropoff_counts = add_counts(113307, dropoff_counts, make_airline_array("""Qatar Airways
Ukraine International
Cathay Pacific Airways
Iberia
Icelandair
OpenSkies
LOT Polish
Qantas
Interjet
British Airways
ANA
Aerolineas Argentinas"""))

In [120]:
dropoff_counts = add_counts(255764, dropoff_counts, make_airline_array("""American Eagle
American
Alaska
Air Berlin
Qatar Airways
Royal Jordanian
Finnair"""))

In [121]:
total_dropoffs = sum(dropoff_counts.values())

In [122]:
dropoff_counts.most_common()

[('Delta', 501114),
 ('United', 214749),
 ('America', 191082),
 ('JetBlue', 151575),
 ('American', 150814),
 ('WestJet', 149547),
 ('Virgin America', 124621),
 ('Southwest', 114277),
 ('Air Canada', 114277),
 ('Spirit', 108358),
 ('Frontier', 105812),
 ('TAP Air Portugal', 48309),
 ('Aer Lingus', 48309),
 ('Qatar Airways', 45979),
 ('Hawaiian', 45763),
 ('Alaska', 45002),
 ('Air Berlin', 36537),
 ('Finnair', 36537),
 ('Royal Jordanian', 36537),
 ('American Eagle', 36537),
 ('Interjet', 17637),
 ('Virgin Atlantic Airways', 12890),
 ('Air India', 12890),
 ('El Al', 12890),
 ('Swiss International', 12890),
 ('OpenSkies', 11988),
 ('British Airways', 11988),
 ('Icelandair', 11988),
 ('Cathay Pacific Airways', 11988),
 ('Austrian', 10741),
 ('Air China', 10741),
 ('Lufthansa', 10741),
 ('China Southern', 10344),
 ('Egyptair', 10344),
 ('Pakistan', 10344),
 ('Thomas Cook', 10344),
 ('Kuwait Airways', 10344),
 ('Singapore', 10344),
 ('China', 10344),
 ('Miami Air', 10344),
 ('Carribean', 1034

In [123]:
market_shares = Counter()

In [124]:
for airline, dropoffs in dropoff_counts.most_common():
    market_shares[airline] = dropoffs/float(total_dropoffs) * 100

In [125]:
market_shares

Counter({'ANA': 0.3223932904499951,
         'Aer Lingus': 1.6494913650019922,
         'Aeroflot Russian': 0.2798149772545763,
         'Aerolineas Argentinas': 0.3223932904499951,
         'Aeromexico': 0.2798149772545763,
         'Air Berlin': 1.2475411621660104,
         'Air Canada': 3.901942178855548,
         'Air China': 0.36674712272012255,
         'Air Europa': 0.3531917174766733,
         'Air France': 0.2798149772545763,
         'Air India': 0.4401238629422195,
         'Air Jamaica': 0.3531917174766733,
         'Air Serbia': 0.3531917174766733,
         'Alaska': 1.5365751807700359,
         'Alitalia': 0.2798149772545763,
         'Allegiant Air': 0.08693214546554622,
         'America': 6.5244179967979195,
         'American': 5.149483341021559,
         'American Eagle': 1.2475411621660104,
         'Arik Air': 0.3531917174766733,
         'Asiana': 0.3531917174766733,
         'Austrian': 0.36674712272012255,
         'Avianca': 0.3531917174766733,
         'Azerba

In [126]:
market_shares.most_common()

[('Delta', 17.11033587699204),
 ('United', 7.3325181879735215),
 ('America', 6.5244179967979195),
 ('JetBlue', 5.1754673797879684),
 ('American', 5.149483341021559),
 ('WestJet', 5.106222135874329),
 ('Virgin America', 4.255133896332222),
 ('Southwest', 3.901942178855548),
 ('Air Canada', 3.901942178855548),
 ('Spirit', 3.699840305717069),
 ('Frontier', 3.6129081602515227),
 ('TAP Air Portugal', 1.6494913650019922),
 ('Aer Lingus', 1.6494913650019922),
 ('Qatar Airways', 1.5699344526160053),
 ('Hawaiian', 1.5625592195364462),
 ('Alaska', 1.5365751807700359),
 ('Air Berlin', 1.2475411621660104),
 ('Finnair', 1.2475411621660104),
 ('Royal Jordanian', 1.2475411621660104),
 ('American Eagle', 1.2475411621660104),
 ('Interjet', 0.6022082677045714),
 ('Virgin Atlantic Airways', 0.4401238629422195),
 ('Air India', 0.4401238629422195),
 ('El Al', 0.4401238629422195),
 ('Swiss International', 0.4401238629422195),
 ('OpenSkies', 0.4093254359155413),
 ('British Airways', 0.4093254359155413),
 ('I