In [129]:
import pandas as pd
import geopandas as gpd
import matplotlib
import matplotlib.pyplot as plt
import geojson
import json
import jenkspy
import numpy as np
import glob
import io
import os
from shapely.geometry import Point, LineString
from altair import datum
import altair as alt
alt.renderers.enable('notebook')
import altair_latimes as lat
alt.themes.register('latimes', lat.theme)
alt.themes.enable('latimes')
pd.options.display.max_columns = 50
pd.options.display.max_rows = 100

In [130]:
a_flight = pd.read_csv('input/flights/DL89_228f611c.csv')

In [131]:
a_flight.head()

Unnamed: 0,Timestamp,UTC,Callsign,Position,Altitude,Speed,Direction
0,1571684146,2019-10-21T18:55:46Z,DAL89,"33.945415,-118.410347",0,0,264
1,1571688008,2019-10-21T20:00:08Z,DAL89,"33.945431,-118.410126",0,9,264
2,1571688026,2019-10-21T20:00:26Z,DAL89,"33.945442,-118.409904",0,9,258
3,1571688044,2019-10-21T20:00:44Z,DAL89,"33.945438,-118.40966",0,9,250
4,1571688065,2019-10-21T20:01:05Z,DAL89,"33.945442,-118.409454",0,2,236


In [132]:
a_flight.dtypes

Timestamp     int64
UTC          object
Callsign     object
Position     object
Altitude      int64
Speed         int64
Direction     int64
dtype: object

In [133]:
path = 'input/flights/'

In [134]:
files = glob.glob(os.path.join(path, "*.csv"))

In [135]:
file_df = (pd.read_csv(f, encoding = "ISO-8859-1", low_memory=False)\
           .assign(flightid=os.path.basename(f)) for f in files)

In [136]:
concatenated_df = pd.concat(file_df, ignore_index=True)

In [137]:
concatenated_df['flightid'] = concatenated_df['flightid']\
    .str.replace('DL89_','')\
    .str.replace('.csv','')

In [138]:
len(concatenated_df)

196276

In [139]:
concatenated_df.columns = concatenated_df.columns.str.strip().str.lower().str.replace(' ', '_')\
                    .str.replace('(', '').str.replace(')', '').str.replace('-','_')

In [140]:
flights = pd.DataFrame(concatenated_df.sort_values(by='timestamp', ascending=False))

In [141]:
lat = []
lon = []

for row in flights['position']:
    lat.append(row.split(',')[0])
    lon.append(row.split(',')[1])
        
flights['latitude'] = lat
flights['longitude'] = lon

In [142]:
flights['latitude'] = flights['latitude'].astype(float)
flights['longitude'] = flights['longitude'].astype(float)

In [143]:
flights['utc_datetime'] = pd.to_datetime(flights['utc'], format='%Y-%m-%dT%H:%M:%SZ').dt.tz_localize('UTC')

In [144]:
flights['datetime'] = flights['utc_datetime'].dt.tz_convert('America/Los_Angeles')

In [145]:
flights['date'] = pd.to_datetime(flights['datetime']).dt.strftime('%m/%d/%Y')
flights['time'] = pd.to_datetime(flights['datetime']).dt.strftime('%H:%M:%S')

In [146]:
# flights['time'] = flights['datetime'].dt.time

In [147]:
flights['display_time'] = pd.to_datetime(flights['datetime']).dt.strftime('%I:%M %p')

In [148]:
flights['weekday'] = flights['datetime'].dt.weekday_name

In [149]:
flights = flights.drop(['utc', 'timestamp','position','utc_datetime'], axis=1)

In [150]:
flights.head()

Unnamed: 0,callsign,altitude,speed,direction,flightid,latitude,longitude,datetime,date,time,display_time,weekday
100664,DAL89,0,12,132,23856f50,33.947285,-118.404953,2020-01-14 12:10:28-08:00,01/14/2020,12:10:28,12:10 PM,Tuesday
100663,DAL89,0,15,132,23856f50,33.947453,-118.405182,2020-01-14 12:10:16-08:00,01/14/2020,12:10:16,12:10 PM,Tuesday
100662,DAL89,0,19,118,23856f50,33.947697,-118.405479,2020-01-14 12:10:05-08:00,01/14/2020,12:10:05,12:10 PM,Tuesday
100661,DAL89,0,20,101,23856f50,33.947823,-118.405685,2020-01-14 12:09:58-08:00,01/14/2020,12:09:58,12:09 PM,Tuesday
100660,DAL89,0,21,84,23856f50,33.947857,-118.405907,2020-01-14 12:09:52-08:00,01/14/2020,12:09:52,12:09 PM,Tuesday


In [151]:
flights.to_csv('output/flights.csv')

In [161]:
flights_geo = gpd.GeoDataFrame(flights, geometry=gpd.points_from_xy(flights['longitude'], flights['latitude']))

In [162]:
flights_geo.head()

Unnamed: 0,callsign,altitude,speed,direction,flightid,latitude,longitude,datetime,date,time,display_time,weekday,geometry
100664,DAL89,0,12,132,23856f50,33.947285,-118.404953,2020-01-14 12:10:28-08:00,01/14/2020,12:10:28,12:10 PM,Tuesday,POINT (-118.404953 33.947285)
100663,DAL89,0,15,132,23856f50,33.947453,-118.405182,2020-01-14 12:10:16-08:00,01/14/2020,12:10:16,12:10 PM,Tuesday,POINT (-118.405182 33.947453)
100662,DAL89,0,19,118,23856f50,33.947697,-118.405479,2020-01-14 12:10:05-08:00,01/14/2020,12:10:05,12:10 PM,Tuesday,POINT (-118.405479 33.947697)
100661,DAL89,0,20,101,23856f50,33.947823,-118.405685,2020-01-14 12:09:58-08:00,01/14/2020,12:09:58,12:09 PM,Tuesday,POINT (-118.405685 33.947823)
100660,DAL89,0,21,84,23856f50,33.947857,-118.405907,2020-01-14 12:09:52-08:00,01/14/2020,12:09:52,12:09 PM,Tuesday,POINT (-118.405907 33.947857)


In [168]:
# flights_geo_line = flights_geo.groupby(['flightid', 'date', 'time'], as_index=False)['geometry']\
#     .apply(lambda x: LineString(x.tolist()) if x.size > 1 else x.tolist())


In [170]:
# flights_geo_line

flightid  date        time    
1f2e4d5f  01/14/2019  12:38:33     [POINT (-118.403481 33.94812)]
                      12:40:41    [POINT (-118.403275 33.948231)]
                      12:40:50     [POINT (-118.403061 33.94836)]
                      12:41:00    [POINT (-118.402939 33.948624)]
                      12:41:10    [POINT (-118.403038 33.948875)]
                      12:41:17     [POINT (-118.403389 33.94894)]
                      12:41:23     [POINT (-118.403679 33.94891)]
                      12:41:32    [POINT (-118.403923 33.948883)]
                      12:41:38     [POINT (-118.404297 33.94883)]
                      12:41:44    [POINT (-118.404579 33.948795)]
                      12:41:54     [POINT (-118.405037 33.94875)]
                      12:42:07    [POINT (-118.405685 33.948681)]
                      12:42:13     [POINT (-118.406036 33.94865)]
                      12:42:21    [POINT (-118.406609 33.948593)]
                      12:42:36    [POINT (-11

In [160]:
flights_geo = gpd.GeoDataFrame(flights_geo, geometry='geometry')

flightid  datetime                 
1f2e4d5f  2019-01-14 12:38:33-08:00     [POINT (-118.403481 33.94812)]
          2019-01-14 12:40:41-08:00    [POINT (-118.403275 33.948231)]
          2019-01-14 12:40:50-08:00     [POINT (-118.403061 33.94836)]
          2019-01-14 12:41:00-08:00    [POINT (-118.402939 33.948624)]
          2019-01-14 12:41:10-08:00    [POINT (-118.403038 33.948875)]
Name: geometry, dtype: object

In [112]:
flights_geo.to_file('output/flights_geo.geojson', driver='GeoJSON')

In [None]:
# gas flight
flight23856f50 = flights_geo[flights_geo['flightid'] == '23856f50']
# https://www.flightradar24.com/data/flights/dl89#23856f50

### investigate anomalous routes

In [None]:
flight1f9f3f90 = flights_geo[flights_geo['flightid'] == '1f9f3f90']
# https://www.flightradar24.com/data/flights/dl89#1f9f3f90

flight1f9e5864 = flights_geo[flights_geo['flightid'] == '1f9e5864']
# https://www.flightradar24.com/data/flights/dl89#1f9e5864

In [None]:
flight1f9e5864.plot()

In [None]:
western = flights_geo[flights_geo['longitude'] < -116]

In [None]:
western.plot()

In [None]:
drop these flights: 
    
    1f9f3f90 detroit to shanghai
    