## American Intercontinental Flights

by: Konrad Krawczyk and Dorian Buijse

In [1]:
import csv
import pandas as pd
from collections import Counter
from tempfile import NamedTemporaryFile
import shutil


Problem with the data, oktober used a 5 number classification for the airports instead of the standard 3 letter one. To fix this we used the following script.

In [None]:
propercodes = csv.reader(open("./L_AIRPORT.csv"))
numbertoairport = csv.reader(open("./L_AIRPORT_ID.csv"))
prpcode = dict()
numtoair = dict()

for line in propercodes:
    prpcode[line[-1]] = line[0]

for line in numbertoairport:
    numtoair[line[0]] = line[-1]
    
def numbertoiata(number):
    return prpcode[numtoair[number]]

Now to create the new dataset where all faulty classifications are fixed. (Only Oktober is faulty)

In [None]:
filename = "./flights.csv"
tempfile = NamedTemporaryFile(mode='wb', delete=False)

with open(filename, 'rb') as csvfile, tempfile:
    reader = csv.reader(csvfile)
    writer = csv.writer(tempfile)
    for row in reader:
        if row[1] == '10':        #Oktober
            row[7] = numbertoiata(row[7])
            row[8] = numbertoiata(row[8])
            writer.writerow(row)
        else:
            writer.writerow(row)  

shutil.move(tempfile.name, filename)

Remove all non delayed filghts and delayed flights without a reason.

In [2]:
f1 = open('flights.csv', 'rb')
f2 = open('flights_delayed.csv', 'wb')
reader = csv.reader(f1)
writer = csv.writer(f2)
for row in reader:
    if row[-1] != '':
        writer.writerow(row)
    else:
        continue

f1.close()
f2.close()

Take the 6 largest airlines

- United Airlines
- American Airlines
- Delta
- South west
- Alaska
- Jetblue


In [5]:
f2 = open('./flights_delayed.csv', 'rb')
f3 = open('./flights_delayed_selairl.csv', 'wb')
r = csv.reader(f2)
w = csv.writer(f3)

for row in r:
    if row[4] in ['UA', 'AA', 'DL', 'WN', 'AS', 'B6']:
        w.writerow(row)
    else:
        continue
    
f2.close()
f3.close()

In [24]:
count = Counter()

f3 = open('./flights_delayed_selairl.csv', 'rb')
r = csv.reader(f3)

for line in r:
    count[line[7]] += 1
    count[line[8]] += 1

f3.close()

Count average amount of delayed flights of the 6 biggest airlines per day per airport 

In [28]:
count_avg = Counter()
for k,v in count.items():
    count_avg[k] = v/365
print(count_avg)

Counter({'ATL': 217, 'LAX': 165, 'DFW': 151, 'ORD': 149, 'DEN': 136, 'LAS': 118, 'SFO': 111, 'BOS': 110, 'JFK': 107, 'MCO': 107, 'PHX': 105, 'BWI': 90, 'SEA': 89, 'LGA': 88, 'MDW': 86, 'EWR': 82, 'IAH': 78, 'MIA': 74, 'FLL': 62, 'MSP': 62, 'DAL': 62, 'SAN': 58, 'DCA': 58, 'TPA': 58, 'HOU': 57, 'DTW': 55, 'CLT': 55, 'PHL': 47, 'OAK': 42, 'SLC': 40, 'STL': 39, 'AUS': 36, 'BNA': 35, 'PDX': 33, 'SJC': 32, 'MSY': 32, 'SMF': 31, 'MCI': 30, 'IAD': 29, 'SNA': 28, 'SJU': 26, 'SAT': 25, 'PBI': 25, 'RDU': 24, 'RSW': 21, 'PIT': 17, 'IND': 17, 'ABQ': 17, 'BDL': 16, 'MKE': 16, 'CMH': 15, 'JAX': 14, 'CLE': 14, 'ONT': 14, 'BUR': 13, 'BUF': 13, 'HNL': 13, 'ANC': 13, 'OMA': 10, 'RNO': 9, 'OKC': 9, 'PVD': 9, 'ELP': 9, 'TUS': 8, 'TUL': 8, 'LGB': 8, 'RIC': 7, 'CHS': 7, 'MEM': 7, 'GEG': 6, 'ALB': 6, 'SDF': 6, 'BHM': 6, 'OGG': 6, 'ORF': 5, 'HPN': 5, 'BOI': 4, 'ISP': 4, 'CAK': 4, 'ROC': 4, 'GRR': 4, 'MHT': 4, 'CVG': 4, 'LIT': 3, 'JNU': 3, 'DAY': 3, 'MAF': 3, 'DSM': 3, 'SRQ': 3, 'SAV': 3, 'STT': 3, 'PNS': 3, '

Any airport with less than a ***variable*** (*var*) number of total flights a day is removed from the dataset

In [51]:
count2 = Counter()

var = 100 # Number of minimal daily flights

f4 = open("flights.csv")
flights = csv.reader(f4)

for line in flights:
    count2[line[7]] += 1
    count2[line[8]] += 1
    
for k,v in count2.items():
    count2[k] = v/365
    
f4.close()

imp_airp = []
for k,v in count2.iteritems():
    if v >= 100:
        imp_airp.append(k)
    else:
        continue

In [56]:
f3 = open('./flights_delayed_selairl.csv', 'rb')
f5 = open('./flights_delayed_selairl_selairp.csv', 'wb')
read = csv.reader(f3)
write = csv.writer(f5)

for row in read:
    if row[7] and row[8] in imp_airp:
        write.writerow(row)
    else:
        continue
    
f3.close()
f5.close()


