In [1]:
import pandas as pd
import geopandas as gpd
import matplotlib
import matplotlib.pyplot as plt
import geojson
import json
import jenkspy
import numpy as np
import glob
import io
import os
from shapely.geometry import Point, LineString
from altair import datum
import altair as alt
alt.renderers.enable('notebook')
import altair_latimes as lat
alt.themes.register('latimes', lat.theme)
alt.themes.enable('latimes')
pd.options.display.max_columns = 50
pd.options.display.max_rows = 100

In [2]:
    a_flight = pd.read_csv('input/flights/DL89_228f611c.csv')

In [3]:
a_flight.head()

Unnamed: 0,Timestamp,UTC,Callsign,Position,Altitude,Speed,Direction
0,1571684146,2019-10-21T18:55:46Z,DAL89,"33.945415,-118.410347",0,0,264
1,1571688008,2019-10-21T20:00:08Z,DAL89,"33.945431,-118.410126",0,9,264
2,1571688026,2019-10-21T20:00:26Z,DAL89,"33.945442,-118.409904",0,9,258
3,1571688044,2019-10-21T20:00:44Z,DAL89,"33.945438,-118.40966",0,9,250
4,1571688065,2019-10-21T20:01:05Z,DAL89,"33.945442,-118.409454",0,2,236


In [4]:
a_flight.dtypes

Timestamp     int64
UTC          object
Callsign     object
Position     object
Altitude      int64
Speed         int64
Direction     int64
dtype: object

In [5]:
path = 'input/flights/'

In [6]:
files = glob.glob(os.path.join(path, "*.csv"))

In [7]:
file_df = (pd.read_csv(f, encoding = "ISO-8859-1", low_memory=False)\
           .assign(flightid=os.path.basename(f)) for f in files)

In [8]:
concatenated_df = pd.concat(file_df, ignore_index=True)

In [9]:
concatenated_df['flightid'] = concatenated_df['flightid']\
    .str.replace('DL89_','')\
    .str.replace('.csv','')

In [10]:
len(concatenated_df)

196276

In [11]:
concatenated_df.columns = concatenated_df.columns.str.strip().str.lower().str.replace(' ', '_')\
                    .str.replace('(', '').str.replace(')', '').str.replace('-','_')

In [12]:
flights = pd.DataFrame(concatenated_df.sort_values(by='timestamp', ascending=False))

In [13]:
lat = []
lon = []

for row in flights['position']:
    lat.append(row.split(',')[0])
    lon.append(row.split(',')[1])
        
flights['latitude'] = lat
flights['longitude'] = lon

In [14]:
flights['latitude'] = flights['latitude'].astype(float)
flights['longitude'] = flights['longitude'].astype(float)

In [15]:
flights['utc_datetime'] = pd.to_datetime(flights['utc'], format='%Y-%m-%dT%H:%M:%SZ').dt.tz_localize('UTC')

In [16]:
flights['datetime'] = flights['utc_datetime'].dt.tz_convert('America/Los_Angeles')

In [17]:
flights['date'] = pd.to_datetime(flights['datetime']).dt.strftime('%m/%d/%Y')
flights['time'] = pd.to_datetime(flights['datetime']).dt.strftime('%H:%M:%S')

In [18]:
# flights['time'] = flights['datetime'].dt.time

In [19]:
flights['display_time'] = pd.to_datetime(flights['datetime']).dt.strftime('%I:%M %p')

In [20]:
flights['weekday'] = flights['datetime'].dt.weekday_name

In [21]:
flights = flights.drop(['utc', 'timestamp','position','utc_datetime'], axis=1)

In [22]:
flights.head()

Unnamed: 0,callsign,altitude,speed,direction,flightid,latitude,longitude,datetime,date,time,display_time,weekday
100664,DAL89,0,12,132,23856f50,33.947285,-118.404953,2020-01-14 12:10:28-08:00,01/14/2020,12:10:28,12:10 PM,Tuesday
100663,DAL89,0,15,132,23856f50,33.947453,-118.405182,2020-01-14 12:10:16-08:00,01/14/2020,12:10:16,12:10 PM,Tuesday
100662,DAL89,0,19,118,23856f50,33.947697,-118.405479,2020-01-14 12:10:05-08:00,01/14/2020,12:10:05,12:10 PM,Tuesday
100661,DAL89,0,20,101,23856f50,33.947823,-118.405685,2020-01-14 12:09:58-08:00,01/14/2020,12:09:58,12:09 PM,Tuesday
100660,DAL89,0,21,84,23856f50,33.947857,-118.405907,2020-01-14 12:09:52-08:00,01/14/2020,12:09:52,12:09 PM,Tuesday


In [23]:
flights.to_csv('output/flights.csv')

In [24]:
flights_geo = gpd.GeoDataFrame(flights, geometry=gpd.points_from_xy(flights['longitude'], flights['latitude']))

In [25]:
flights_geo_line = flights_geo.groupby('flightid')['geometry']\
    .apply(lambda x: LineString(x.tolist()) if x.size > 1 else x.tolist())

In [26]:
flights_geo_line = gpd.GeoDataFrame(flights_geo_line, geometry='geometry').reset_index()

In [27]:
flights_geo_line.head()

## Need a start date, time and an end date, time

Unnamed: 0,flightid,geometry
0,1f2e4d5f,"LINESTRING (121.893097 30.955673, 121.896004 3..."
1,1f337e97,"LINESTRING (121.854141 31.05942, 121.856094 31..."
2,1f366087,"LINESTRING (121.786087 31.239487, 121.783989 3..."
3,1f39007e,"LINESTRING (121.854584 31.05821, 121.856506 31..."
4,1f3dcc94,"LINESTRING (121.853539 31.060957, 121.856903 3..."


In [28]:
flights_geo.to_file('output/flights_geo.geojson', driver='GeoJSON')
flights_geo_line.to_file('output/flights_geo_line.geojson', driver='GeoJSON')

In [29]:
flights_geo_delta = flights_geo[flights_geo['flightid'] == '23856f50']

In [30]:
flights_geo_line_delta = 

SyntaxError: invalid syntax (<ipython-input-30-c73d3088c77d>, line 1)

In [None]:
# gas flight
flight23856f50 = flights_geo[flights_geo['flightid'] == '23856f50']
# https://www.flightradar24.com/data/flights/dl89#23856f50

In [None]:
delta_89_gas_flight = flight23856f50[flight23856f50['speed'] > 30].\
    sort_values(by='time', ascending=True)

In [None]:
delta_89_gas_flight.to_csv('output/delta_89_gas_flight.csv')

In [None]:
delta_89_gas_flight.dtypes

### investigate anomalous routes

In [None]:
flight1f9f3f90 = flights_geo[flights_geo['flightid'] == '1f9f3f90']
# https://www.flightradar24.com/data/flights/dl89#1f9f3f90

flight1f9e5864 = flights_geo[flights_geo['flightid'] == '1f9e5864']
# https://www.flightradar24.com/data/flights/dl89#1f9e5864

In [None]:
flight1f9e5864.plot()

In [None]:
western = flights_geo[flights_geo['longitude'] < -116]

In [None]:
western.plot()

In [None]:
drop these flights: 
    
    1f9f3f90 detroit to shanghai
    