In [1]:
from collections import Counter
from ast import literal_eval as make_tuple
from pyproj import Proj, transform
from shapely.geometry import shape, Point

import shapefile
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt, mpld3
import csv
mpld3.enable_notebook()

In [2]:
newark_coordinates = {
    'terminal_a': [40.687794, -74.182307],
    'terminal_b': [40.690627, -74.177544],
    'terminal_c': [40.695558, -74.178063]
}

laguardia_coordinates = {
    'terminal_a': [40.772375, -73.885976],
    'terminal_b': [40.774444, -73.872006],
    'terminal_c': [40.770674, -73.865307],
    'terminal_d': [40.768628, -73.862134]
}

jfk_coordinates = {
    'terminal_1': [40.643325, -73.789939],
    'terminal_2': [40.641595, -73.788767],
    'terminal_4': [40.644193, -73.782554],
    'terminal_5': [40.645807, -73.776774],
    'terminal_7': [40.648798, -73.782922],
    'terminal_8': [40.646934, -73.789874]
}

In [3]:
jfk_green_dropoffs = pd.DataFrame.from_csv('./2016/modified/mod-tlc-green-jfk-dropoffs-2016.csv')
jfk_yellow_dropoffs = pd.DataFrame.from_csv('./2016/modified/mod-tlc-yellow-jfk-dropoffs-2016.csv')
newark_green_dropoffs = pd.DataFrame.from_csv('./2016/modified/mod-tlc-green-newark-dropoffs-2016.csv')
newark_yellow_dropoffs = pd.DataFrame.from_csv('./2016/modified/mod-tlc-yellow-newark-dropoffs-2016.csv')
laguardia_green_dropoffs = pd.DataFrame.from_csv('./2016/modified/mod-tlc-green-laguardia-dropoffs-2016.csv')
laguardia_yellow_dropoffs = pd.DataFrame.from_csv('./2016/modified/mod-tlc-yellow-laguardia-dropoffs-2016.csv')

In [4]:
sf = shapefile.Reader("taxi_zones/taxi_zones")
shapes = sf.shapes()

In [5]:
NEWARK_ID = 1
LAGUARDIA_ID = 138
JFK_ID = 132

In [11]:
def check_bounds(raw_coords, shapefile):
    in_proj = Proj(init='epsg:4326')
    out_proj= Proj(init='epsg:2263', preserve_units = True)
    coords = make_tuple(raw_coords)
    
    point = Point(transform(in_proj, out_proj, coords[1], coords[0]))
    polygon = shape(shapefile)
    return polygon.contains(point)

In [12]:
newark_yellow_filter = newark_yellow_dropoffs['coords'].apply(check_bounds, args=(shapes[NEWARK_ID - 1],))

In [13]:
newark_green_filter = newark_green_dropoffs['coords'].apply(check_bounds, args=(shapes[NEWARK_ID - 1],))

In [14]:
laguardia_green_filter = laguardia_green_dropoffs['coords'].apply(check_bounds, args=(shapes[LAGUARDIA_ID - 1],))

In [26]:
laguardia_yellow_filter = laguardia_yellow_dropoffs['coords'].apply(check_bounds, args=(shapes[LAGUARDIA_ID - 1],))

In [27]:
jfk_green_filter = jfk_green_dropoffs['coords'].apply(check_bounds, args=(shapes[JFK_ID - 1],))

In [28]:
jfk_yellow_filter = jfk_yellow_dropoffs['coords'].apply(check_bounds, args=(shapes[JFK_ID - 1],))

In [29]:
newark_yellow_dropoffs = newark_yellow_dropoffs.loc[newark_yellow_filter]
newark_green_dropoffs = newark_green_dropoffs.loc[newark_green_filter]
laguardia_yellow_dropoffs = laguardia_yellow_dropoffs.loc[laguardia_yellow_filter]
laguardia_green_dropoffs = laguardia_green_dropoffs.loc[laguardia_green_filter]
jfk_yellow_dropoffs = jfk_yellow_dropoffs.loc[jfk_yellow_filter]
jfk_green_dropoffs = jfk_green_dropoffs.loc[jfk_green_filter]

In [None]:
jfk_green_dropoffs.to_csv('./2016/modified/mod-tlc-green-jfk-dropoffs-2016.csv')
jfk_yellow_dropoffs.to_csv('./2016/modified/mod-tlc-yellow-jfk-dropoffs-2016.csv')
newark_green_dropoffs.to_csv('./2016/modified/mod-tlc-green-newark-dropoffs-2016.csv')
newark_yellow_dropoffs.to_csv('./2016/modified/mod-tlc-yellow-newark-dropoffs-2016.csv')
laguardia_green_dropoffs.to_csv('./2016/modified/mod-tlc-green-laguardia-dropoffs-2016.csv')
laguardia_yellow_dropoffs.to_csv('./2016/modified/mod-tlc-yellow-laguardia-dropoffs-2016.csv')

In [4]:
total_count = len(jfk_green_dropoffs) + len(jfk_yellow_dropoffs) + len(newark_green_dropoffs) + len(newark_yellow_dropoffs) \
+ len(laguardia_green_dropoffs) + len(laguardia_yellow_dropoffs)
total_count

1729401

In [7]:
def count_passengers(df):
    terminal_counts = pd.Series(np.zeros(len(df['terminal'].unique())), index=df['terminal'].unique(), name='terminal')
    for _, dropoff in df.iterrows():
        if not np.isnan(dropoff['passenger_count']):
            terminal_counts[dropoff['terminal']] += dropoff['passenger_count']
        else:
            terminal_counts[dropoff['terminal']] += 1
    return terminal_counts

In [8]:
newark_yellow_terminal_counts = count_passengers(newark_yellow_dropoffs)

In [9]:
newark_green_terminal_counts = count_passengers(newark_green_dropoffs)

In [10]:
total_newark_counts = newark_green_terminal_counts + newark_yellow_terminal_counts

In [11]:
total_newark_counts

terminal_c    106524.0
terminal_b     50910.0
terminal_a     49529.0
Name: terminal, dtype: float64

In [252]:
plt = total_newark_counts.plot(kind='bar')
mpld3.display()

In [253]:
laguardia_yellow_terminal_counts = count_passengers(laguardia_yellow_dropoffs)
laguardia_green_terminal_counts = count_passengers(laguardia_green_dropoffs)
total_laguardia_counts = laguardia_green_terminal_counts + laguardia_yellow_terminal_counts

In [256]:
total_laguardia_counts

terminal_c    353063.0
terminal_b    827711.0
terminal_d    273830.0
terminal_a     57285.0
Name: terminal, dtype: float64

In [257]:
plt = total_laguardia_counts.plot(kind='bar')
mpld3.display()

In [258]:
jfk_yellow_terminal_counts = count_passengers(jfk_yellow_dropoffs)
jfk_green_terminal_counts = count_passengers(jfk_green_dropoffs)
total_jfk_counts = jfk_green_terminal_counts + jfk_yellow_terminal_counts

In [262]:
plt = total_jfk_counts.plot(kind='bar')
mpld3.display()

In [263]:
total_jfk_counts

terminal_1    180289.0
terminal_2     61629.0
terminal_4    320668.0
terminal_5    177312.0
terminal_7    110284.0
terminal_8    253179.0
Name: terminal, dtype: float64

In [264]:
def add_counts(total_dropoff_count, dropoff_counts, terminal_airline_list):
    average_dropoff_count = total_dropoff_count/len(terminal_airline_list)
    for airline in terminal_airline_list:
        dropoff_counts[airline] += average_dropoff_count
    return dropoff_counts

In [265]:
def make_airline_array(raw_string):
    return raw_string.split('\n')

In [266]:
dropoff_counts = add_counts(49529, Counter(), [
            'Southwest',
            'Air Canada',
            'Virgin America',
            'JetBlue Airways',
            'American',
            'Alaska'
        ])

In [267]:
dropoff_counts = add_counts(50910, dropoff_counts, [
            'Austrian',
            'Cathay Pacific Airways',
            'TAP Air Portugal',
            'Allegiant Air',
            'OpenSkies',
            'Aer Lingus',
            'Porter',
            'Delta',
            'SAS Scandinavian',
            'Spirit',
            'Swiss International',
            'Virgin Atlantic Airways',
            'Ethiopian',
            'Wow Air',
            'Lufthansa',
            'Air India',
            'El Al',
            'Icelandair',
            'Air China',
            'British Airways'
        ])

In [268]:
dropoff_counts = add_counts(106524, dropoff_counts, ['United'] )

In [269]:
dropoff_counts = add_counts(57285, dropoff_counts, ['Delta'] )

In [270]:
dropoff_counts = add_counts(827711 , dropoff_counts, make_airline_array("""American
JetBlue
Spirit
Southwest
Air Canada
Virgin America
Frontier
United"""))

In [271]:
dropoff_counts = add_counts(353063, dropoff_counts, make_airline_array("""Delta
American"""))

In [272]:
dropoff_counts = add_counts(273830, dropoff_counts, make_airline_array("""Delta
WestJet"""))

In [273]:
dropoff_counts = add_counts(180289, dropoff_counts, make_airline_array("""Cayman Airways
Air France
Norwegian Air Shuttle
Azerbaijan Hava Yollary
Fly Jamaica
Saudia
Austrian
Aeromexico
Turkish
EVA Air
Interjet
Alitalia
Japan
Aeroflot Russian
Korean Air Lines
Brussels
China Eastern
Lufthansa
Air China
Meridiana
Philippine
Royal Air Maroc"""))

In [274]:
dropoff_counts = add_counts(61629, dropoff_counts, make_airline_array("""Delta"""))

In [275]:
dropoff_counts = add_counts(320668, dropoff_counts, make_airline_array("""Air Serbia
Arik Air
El Al
Volaris
Egyptair
Air Jamaica
XL Airways
China
Thomas Cook
Uzbekistan
Air Europa
Virgin America
Singapore
Etihad
China Southern
Avianca
Virgin Atlantic Airways
Pakistan
COPA
Kuwait Airways
Carribean
Emirates
KLM
Asiana
Sun Country
Swiss International
WestJet
Air India
Miami Air
Delta
South African Airways"""))

In [276]:
dropoff_counts = add_counts(177312, dropoff_counts, make_airline_array("""JetBlue
Hawaiian
Aer Lingus
TAP Air Portugal"""))

In [277]:
dropoff_counts = add_counts(110284, dropoff_counts, make_airline_array("""Qatar Airways
Ukraine International
Cathay Pacific Airways
Iberia
Icelandair
OpenSkies
LOT Polish
Qantas
Interjet
British Airways
ANA
Aerolineas Argentinas"""))

In [278]:
dropoff_counts = add_counts(253179, dropoff_counts, make_airline_array("""American Eagle
American
Alaska
Air Berlin
Qatar Airways
Royal Jordanian
Finnair"""))

In [279]:
total_dropoffs = sum(dropoff_counts.values())

In [280]:
total_dropoffs

2822158

In [281]:
dropoff_counts.most_common()

[('Delta', 445249),
 ('American', 324416),
 ('United', 209987),
 ('JetBlue', 147791),
 ('WestJet', 147259),
 ('Virgin America', 122061),
 ('Southwest', 111717),
 ('Air Canada', 111717),
 ('Spirit', 106008),
 ('Frontier', 103463),
 ('TAP Air Portugal', 46873),
 ('Aer Lingus', 46873),
 ('Qatar Airways', 45358),
 ('Alaska', 44422),
 ('Hawaiian', 44328),
 ('Air Berlin', 36168),
 ('Royal Jordanian', 36168),
 ('Finnair', 36168),
 ('American Eagle', 36168),
 ('Interjet', 17384),
 ('Virgin Atlantic Airways', 12889),
 ('Air India', 12889),
 ('El Al', 12889),
 ('Swiss International', 12889),
 ('British Airways', 11735),
 ('OpenSkies', 11735),
 ('Icelandair', 11735),
 ('Cathay Pacific Airways', 11735),
 ('Lufthansa', 10739),
 ('Air China', 10739),
 ('Austrian', 10739),
 ('Air Jamaica', 10344),
 ('Carribean', 10344),
 ('China Southern', 10344),
 ('Arik Air', 10344),
 ('Uzbekistan', 10344),
 ('XL Airways', 10344),
 ('Singapore', 10344),
 ('China', 10344),
 ('Miami Air', 10344),
 ('COPA', 10344),
 (

In [282]:
market_shares = Counter()

In [283]:
for airline, dropoffs in dropoff_counts.most_common():
    market_shares[airline] = dropoffs/float(total_dropoffs) * 100

In [284]:
market_shares

Counter({'ANA': 0.3256373314321877,
         'Aer Lingus': 1.6608921258129417,
         'Aeroflot Russian': 0.290345189744869,
         'Aerolineas Argentinas': 0.3256373314321877,
         'Aeromexico': 0.290345189744869,
         'Air Berlin': 1.2815724704286577,
         'Air Canada': 3.9585664587170526,
         'Air China': 0.3805244072089515,
         'Air Europa': 0.36652802571649074,
         'Air France': 0.290345189744869,
         'Air India': 0.4567072431805732,
         'Air Jamaica': 0.36652802571649074,
         'Air Serbia': 0.36652802571649074,
         'Alaska': 1.574043692805293,
         'Alitalia': 0.290345189744869,
         'Allegiant Air': 0.09017921746408245,
         'American': 11.49531670445099,
         'American Eagle': 1.2815724704286577,
         'Arik Air': 0.36652802571649074,
         'Asiana': 0.36652802571649074,
         'Austrian': 0.3805244072089515,
         'Avianca': 0.36652802571649074,
         'Azerbaijan Hava Yollary': 0.290345189744869,
 

In [285]:
market_shares.most_common()

[('Delta', 15.776898387687721),
 ('American', 11.49531670445099),
 ('United', 7.440653570778107),
 ('JetBlue', 5.236808144689277),
 ('WestJet', 5.21795732202095),
 ('Virgin America', 4.325094484433543),
 ('Southwest', 3.9585664587170526),
 ('Air Canada', 3.9585664587170526),
 ('Spirit', 3.7562744538045),
 ('Frontier', 3.666095236340418),
 ('TAP Air Portugal', 1.6608921258129417),
 ('Aer Lingus', 1.6608921258129417),
 ('Qatar Airways', 1.6072098018608454),
 ('Alaska', 1.574043692805293),
 ('Hawaiian', 1.5707129083488591),
 ('Royal Jordanian', 1.2815724704286577),
 ('Finnair', 1.2815724704286577),
 ('American Eagle', 1.2815724704286577),
 ('Air Berlin', 1.2815724704286577),
 ('Interjet', 0.6159825211770567),
 ('Virgin Atlantic Airways', 0.4567072431805732),
 ('Air India', 0.4567072431805732),
 ('El Al', 0.4567072431805732),
 ('Swiss International', 0.4567072431805732),
 ('OpenSkies', 0.4158165488962701),
 ('British Airways', 0.4158165488962701),
 ('Icelandair', 0.4158165488962701),
 ('Ca