In [0]:
# Load Packages, set storage links |

from pyspark.sql import functions as f
from pyspark.sql import Window
from pyspark.sql.functions import col, to_timestamp, to_utc_timestamp, concat_ws, udf
from datetime import datetime

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
# import airporttime

# Set Blob paths, create links | 

blob_container = "w261-team28-container" # The name of your container created in https://portal.azure.com
storage_account = "team28" # The name of your Storage account created in https://portal.azure.com
secret_scope = "w261-team28-scope" # The name of the scope created in your local computer using the Databricks CLI
secret_key = "w261-team28-key" # The name of the secret key created in your local computer using the Databricks CLI 
blob_url = f"wasbs://{blob_container}@{storage_account}.blob.core.windows.net"

spark.conf.set(
  f"fs.azure.sas.{blob_container}.{storage_account}.blob.core.windows.net",
  dbutils.secrets.get(scope = secret_scope, key = secret_key)
)

#### Load Data

In [0]:
# Load data from Saved Parquet Files | 

# flights_3m = spark.read.parquet(f'{blob_url}/3m_flights_processed').cache()
# flights_6m = spark.read.parquet(f'{blob_url}/6m_flights_processed').cache()
ICAO_station_mapping = spark.read.parquet(f'{blob_url}/ICAO_station_mapping').cache()
# weather_sample_processed = spark.read.parquet(f'{blob_url}/30k_weather_sample_processed').cache()
# weather_sample_processed_new = spark.read.parquet(f'{blob_url}/weather_2015_02_21').cache()

# 3 months of airline data
AIRLINE_2015_PROCESSED_PATH = blob_url + '/processed/airline_2015_data.parquet'
flight_data = spark.read.parquet(AIRLINE_2015_PROCESSED_PATH).cache()

# 3 months of weather data
WEATHER_2015_PROCESSED_PATH = blob_url + '/processed/weather_2015_data.parquet'
weather_data = spark.read.parquet(WEATHER_2015_PROCESSED_PATH).cache()

# Quick rename for sake of brevity
flights = flight_data
stations = ICAO_station_mapping
weather = weather_data

In [0]:
display(weather_sample_processed)

STATION,DATE,LATITUDE,LONGITUDE,ELEVATION,NAME,HOUR,WND_DIRECTION_ANGLE,WND_TYPE_CODE,WND_SPEED_RATE,CIG_CEILING_HEIGHT_DIMENSION,CIG_CEILING_DETERMINATION_CODE,CIG_CAVOK_CODE,VIS_DISTANCE_DIMENSION,VIS_VARIABILITY_CODE,TMP_AIR_TEMP,DEW_POINT_TEMP,SLP_SEA_LEVEL_PRES,VALID_WEATHER_DATA,WND_DIRECTION_ANGLE-AVG,WND_SPEED_RATE-AVG,CIG_CEILING_HEIGHT_DIMENSION-AVG,VIS_DISTANCE_DIMENSION-AVG,TMP_AIR_TEMP-AVG,DEW_POINT_TEMP-AVG,SLP_SEA_LEVEL_PRES-AVG
,2015-02-01T12:15:00.000+0000,38.25,-122.6,27.1,"PETALUMA MUNICIPAL AIRPORT, CA US",2015-02-01T12:00:00.000+0000,,C,0.0,91.0,M,N,16093.0,N,40.0,40.0,,0,,0.0,91.0,16093.0,40.0,40.0,
,2015-02-08T08:35:00.000+0000,38.25,-122.6,27.1,"PETALUMA MUNICIPAL AIRPORT, CA US",2015-02-08T08:00:00.000+0000,,C,0.0,671.0,M,N,16093.0,N,140.0,130.0,,0,,0.0,671.0,16093.0,140.0,130.0,
,2015-02-22T22:55:00.000+0000,38.25,-122.6,27.1,"PETALUMA MUNICIPAL AIRPORT, CA US",2015-02-22T22:00:00.000+0000,50.0,N,36.0,22000.0,9,N,16093.0,N,180.0,-10.0,,0,50.0,36.0,22000.0,16093.0,180.0,-10.0,
,2015-03-20T11:15:00.000+0000,38.25,-122.6,27.1,"PETALUMA MUNICIPAL AIRPORT, CA US",2015-03-20T11:00:00.000+0000,,C,0.0,22000.0,9,N,14484.0,N,60.0,60.0,,0,,0.0,22000.0,14484.0,60.0,60.0,
69015093121.0,2015-02-18T22:56:00.000+0000,34.3,-116.16667,625.1,"TWENTYNINE PALMS MC, CA US",2015-02-18T22:00:00.000+0000,,C,0.0,22000.0,9,N,16093.0,N,244.0,-44.0,9960.0,0,,0.0,22000.0,16093.0,244.0,-44.0,9960.0
69015093121.0,2015-02-26T14:56:00.000+0000,34.3,-116.16667,625.1,"TWENTYNINE PALMS MC, CA US",2015-02-26T14:00:00.000+0000,290.0,N,41.0,22000.0,9,N,16093.0,N,78.0,-28.0,10135.0,1,290.0,41.0,22000.0,16093.0,78.0,-28.0,10135.0
69015093121.0,2015-03-19T11:56:00.000+0000,34.3,-116.16667,625.1,"TWENTYNINE PALMS MC, CA US",2015-03-19T11:00:00.000+0000,270.0,N,31.0,22000.0,9,N,16093.0,N,156.0,50.0,10141.0,1,270.0,31.0,22000.0,16093.0,156.0,50.0,10141.0
69015093121.0,2015-03-19T16:56:00.000+0000,34.3,-116.16667,625.1,"TWENTYNINE PALMS MC, CA US",2015-03-19T16:00:00.000+0000,350.0,N,21.0,22000.0,9,N,16093.0,N,189.0,56.0,10160.0,1,350.0,21.0,22000.0,16093.0,189.0,56.0,10160.0
69015093121.0,2015-03-31T14:56:00.000+0000,34.3,-116.16667,625.1,"TWENTYNINE PALMS MC, CA US",2015-03-31T14:00:00.000+0000,,V,15.0,22000.0,9,N,16093.0,N,178.0,-11.0,10140.0,0,,15.0,22000.0,16093.0,178.0,-11.0,10140.0
70000126492.0,2015-01-29T15:53:00.000+0000,60.785,-148.83889,31.4,"PORTAGE GLACIER VISITOR CENTER, AK US",2015-01-29T15:00:00.000+0000,,C,0.0,1676.0,M,N,6437.0,N,-89.0,-94.0,10160.0,0,,0.0,1676.0,6437.0,-89.0,-94.0,10160.0


#### Schema Checks

In [0]:
# Schema Checks | 

# flights.printSchema()
# stations.printSchema()
# weather.printSchema()

In [0]:
# First, join flights to stations on ICAO - airport code which is a unique identifier we can use for both origin and destination
# In this join, we will be including:
#    - `WEATHER_STATION_ID`: the closest weather station which will then allow us to pull in weather at each airport

flights_w_stations = flights.join(stations, flights.ICAO_ORIGIN == stations.ICAO, 'left')\
                            .select(flights['*'], stations['WEATHER_STATION_ID'].alias('ORIGIN_WEATHER_STATION_ID'))\
                            .join(stations, flights.ICAO_DEST == stations.ICAO, 'left')\
                            .select(flights['*'], 'ORIGIN_WEATHER_STATION_ID', stations['WEATHER_STATION_ID'].alias('DEST_WEATHER_STATION_ID'))

In [0]:
# Sanity Check
# flights_w_stations.select(['ORIGIN', 'DEST', 'ICAO_ORIGIN', 'ICAO_DEST', 'ORIGIN_WEATHER_STATION_ID', 'DEST_WEATHER_STATION_ID']).distinct().where(f.col('ICAO_DEST').isin(['KORD', 'KATL', 'KFSD', 'KROC', 'KTPA', 'KGPI', 'KOAK', 'KRAP'])).display()
# stations.where(f.col('ICAO').isin(['KORD', 'KATL', 'KFSD', 'KROC', 'KTPA', 'KGPI', 'KOAK', 'KRAP'])).display()
# flights_w_stations.printSchema()

In [0]:
# Check to make sure new timestamp mapping looks correct
# flights_w_stations.select('CRS_DEP_TIME_UTC_HOUR').withColumn('new_time', to_timestamp(f.col('CRS_DEP_TIME_UTC_HOUR').cast('long') - 10800)).display()

In [0]:
# Second, we need to create a new timestamp to join weather on - which is lagged by 2 hours. 
# Due to the nature of our timestamps - we will actually have to subtract 3 hours to avoid leakage. 10800 is equivalent to 3 hours
flights_w_stations = flights_w_stations.withColumn('CRS_DEP_TIME_UTC_LAG', to_timestamp(f.col('CRS_DEP_TIME_UTC_HOUR').cast('long') - 10800))

# Next, prepend an origin or destination prefix to the weather columns, so when we join we know which weather set we're looking at
origin_weather = weather.select([f.col(weather_feat).alias('ORIGIN_WEATHER_'+weather_feat) for weather_feat in weather.columns])
dest_weather = weather.select([f.col(weather_feat).alias('DEST_WEATHER_'+weather_feat) for weather_feat in weather.columns])

# join flights to ORIGIN weather on station_id and our lagged ORIGIN time variable
flights_w_weather_temp = flights_w_stations.join(origin_weather, (flights_w_stations.ORIGIN_WEATHER_STATION_ID == origin_weather.ORIGIN_WEATHER_STATION) &\
                                                          (flights_w_stations.CRS_DEP_TIME_UTC_LAG == origin_weather.ORIGIN_WEATHER_HOUR), 
                                                          'left')

# Finally, join flights to DESTINATION weather on station_id and lagged ORIGIN time variable
flights_w_weather = flights_w_weather_temp.join(dest_weather, (flights_w_weather_temp.DEST_WEATHER_STATION_ID == dest_weather.DEST_WEATHER_STATION) &\
                                                          (flights_w_weather_temp.CRS_DEP_TIME_UTC_LAG == dest_weather.DEST_WEATHER_HOUR), 
                                                          'left')

# flights_w_weather_temp.count()
# flights_w_weather_temp.where(f.col('ID') == '2015-02-01N3MEAAORD730').display()

In [0]:
display(flights_w_weather)

YEAR,QUARTER,MONTH,DAY_OF_MONTH,DAY_OF_WEEK,FL_DATE,OP_UNIQUE_CARRIER,OP_CARRIER_AIRLINE_ID,TAIL_NUM,OP_CARRIER_FL_NUM,ORIGIN_AIRPORT_ID,ORIGIN_AIRPORT_SEQ_ID,ORIGIN,ORIGIN_CITY_NAME,ORIGIN_STATE_ABR,DEST_AIRPORT_ID,DEST_AIRPORT_SEQ_ID,DEST,DEST_CITY_NAME,DEST_STATE_ABR,CRS_DEP_TIME,DEP_TIME,DEP_DELAY,DEP_DELAY_NEW,DEP_DEL15,DEP_DELAY_GROUP,DEP_TIME_BLK,TAXI_OUT,WHEELS_OFF,WHEELS_ON,TAXI_IN,CRS_ARR_TIME,ARR_TIME,ARR_DELAY,ARR_DELAY_NEW,ARR_DEL15,ARR_DELAY_GROUP,ARR_TIME_BLK,CANCELLED,DIVERTED,CRS_ELAPSED_TIME,ACTUAL_ELAPSED_TIME,AIR_TIME,FLIGHTS,DISTANCE,DISTANCE_GROUP,CARRIER_DELAY,WEATHER_DELAY,NAS_DELAY,SECURITY_DELAY,LATE_AIRCRAFT_DELAY,ID,IATA_ORIGIN,ICAO_ORIGIN,AIRPORT_LAT_ORIGIN,AIRPORT_LONG_ORIGIN,AIRPORT_TIMEZONE_ORIGIN,AIRPORT_UTC_OFFSET_ORIGIN,IATA_DEST,ICAO_DEST,AIRPORT_LAT_DEST,AIRPORT_LONG_DEST,AIRPORT_TIMEZONE_DEST,AIRPORT_UTC_OFFSET_DEST,CRS_DEP_TIME_UTC,DEP_TIME_UTC,ARR_TIME_UTC,CRS_ARR_TIME_UTC,CRS_DEP_TIME_UTC_HOUR,CRS_ARR_TIME_UTC_HOUR,ORIGIN_WEATHER_STATION_ID,DEST_WEATHER_STATION_ID,CRS_DEP_TIME_UTC_LAG,ORIGIN_WEATHER_STATION,ORIGIN_WEATHER_DATE,ORIGIN_WEATHER_LATITUDE,ORIGIN_WEATHER_LONGITUDE,ORIGIN_WEATHER_ELEVATION,ORIGIN_WEATHER_NAME,ORIGIN_WEATHER_HOUR,ORIGIN_WEATHER_WND_DIRECTION_ANGLE,ORIGIN_WEATHER_WND_TYPE_CODE,ORIGIN_WEATHER_WND_SPEED_RATE,ORIGIN_WEATHER_CIG_CEILING_HEIGHT_DIMENSION,ORIGIN_WEATHER_CIG_CEILING_DETERMINATION_CODE,ORIGIN_WEATHER_CIG_CAVOK_CODE,ORIGIN_WEATHER_VIS_DISTANCE_DIMENSION,ORIGIN_WEATHER_VIS_VARIABILITY_CODE,ORIGIN_WEATHER_TMP_AIR_TEMP,ORIGIN_WEATHER_DEW_POINT_TEMP,ORIGIN_WEATHER_SLP_SEA_LEVEL_PRES,ORIGIN_WEATHER_VALID_WEATHER_DATA,ORIGIN_WEATHER_WND_DIRECTION_ANGLE-AVG,ORIGIN_WEATHER_WND_SPEED_RATE-AVG,ORIGIN_WEATHER_CIG_CEILING_HEIGHT_DIMENSION-AVG,ORIGIN_WEATHER_VIS_DISTANCE_DIMENSION-AVG,ORIGIN_WEATHER_TMP_AIR_TEMP-AVG,ORIGIN_WEATHER_DEW_POINT_TEMP-AVG,ORIGIN_WEATHER_SLP_SEA_LEVEL_PRES-AVG,DEST_WEATHER_STATION,DEST_WEATHER_DATE,DEST_WEATHER_LATITUDE,DEST_WEATHER_LONGITUDE,DEST_WEATHER_ELEVATION,DEST_WEATHER_NAME,DEST_WEATHER_HOUR,DEST_WEATHER_WND_DIRECTION_ANGLE,DEST_WEATHER_WND_TYPE_CODE,DEST_WEATHER_WND_SPEED_RATE,DEST_WEATHER_CIG_CEILING_HEIGHT_DIMENSION,DEST_WEATHER_CIG_CEILING_DETERMINATION_CODE,DEST_WEATHER_CIG_CAVOK_CODE,DEST_WEATHER_VIS_DISTANCE_DIMENSION,DEST_WEATHER_VIS_VARIABILITY_CODE,DEST_WEATHER_TMP_AIR_TEMP,DEST_WEATHER_DEW_POINT_TEMP,DEST_WEATHER_SLP_SEA_LEVEL_PRES,DEST_WEATHER_VALID_WEATHER_DATA,DEST_WEATHER_WND_DIRECTION_ANGLE-AVG,DEST_WEATHER_WND_SPEED_RATE-AVG,DEST_WEATHER_CIG_CEILING_HEIGHT_DIMENSION-AVG,DEST_WEATHER_VIS_DISTANCE_DIMENSION-AVG,DEST_WEATHER_TMP_AIR_TEMP-AVG,DEST_WEATHER_DEW_POINT_TEMP-AVG,DEST_WEATHER_SLP_SEA_LEVEL_PRES-AVG
2015,1,2,1,7,2015-02-01,DL,19790,N303DQ,1355,10397,1039705,ATL,"Atlanta, GA",GA,11624,1162402,EYW,"Key West, FL",FL,940,939,-1.0,0.0,0.0,-1,0900-0959,15.0,954,1120,3.0,1133,1123,-10.0,0.0,0.0,-1,1100-1159,0.0,0.0,113.0,104.0,86.0,1.0,646.0,3,,,,,,2015-02-01-940-ATL-N303DQ,ATL,KATL,33.6367,-84.428101,America/New_York,-5,EYW,KEYW,24.55610084533692,-81.75959777832031,America/New_York,-5,2015-02-01T14:40:00.000+0000,2015-02-01T14:39:00.000+0000,2015-02-01T16:23:00.000+0000,2015-02-01T16:33:00.000+0000,2015-02-01T14:00:00.000+0000,2015-02-01T16:00:00.000+0000,72219013874,72201012836,2015-02-01T11:00:00.000+0000,72219013874.0,2015-02-01T11:52:00.000+0000,33.6301,-84.4418,307.8,"ATLANTA HARTSFIELD INTERNATIONAL AIRPORT, GA US",2015-02-01T11:00:00.000+0000,120.0,N,21.0,5486.0,M,N,16093.0,N,50.0,-22.0,10229.0,1.0,120.0,21.0,5486.0,16093.0,50.0,-22.0,10229.0,72201012836.0,2015-02-01T11:53:00.000+0000,24.5571,-81.7554,0.3,"KEY WEST INTERNATIONAL AIRPORT, FL US",2015-02-01T11:00:00.000+0000,60.0,N,21.0,22000.0,9,N,16093.0,N,189.0,144.0,10216.0,1.0,60.0,21.0,22000.0,16093.0,189.0,144.0,10216.0
2015,1,2,1,7,2015-02-01,AA,19805,N369AA,1044,13930,1393003,ORD,"Chicago, IL",IL,13303,1330303,MIA,"Miami, FL",FL,1205,1551,226.0,226.0,1.0,12,1200-1259,19.0,1610,1942,6.0,1613,1948,215.0,215.0,1.0,12,1600-1659,0.0,0.0,188.0,177.0,152.0,1.0,1197.0,5,0.0,104.0,0.0,0.0,111.0,2015-02-01-1205-ORD-N369AA,ORD,KORD,41.9786,-87.9048,America/Chicago,-6,MIA,KMIA,25.79319953918457,-80.29060363769531,America/New_York,-5,2015-02-01T18:05:00.000+0000,2015-02-01T21:51:00.000+0000,2015-02-02T00:48:00.000+0000,2015-02-01T21:13:00.000+0000,2015-02-01T18:00:00.000+0000,2015-02-01T21:00:00.000+0000,72530094846,72202012839,2015-02-01T15:00:00.000+0000,72530094846.0,2015-02-01T15:51:00.000+0000,41.995,-87.9336,201.8,"CHICAGO OHARE INTERNATIONAL AIRPORT, IL US",2015-02-01T15:00:00.000+0000,50.0,N,82.0,183.0,W,N,402.0,N,-17.0,-28.0,10132.0,1.0,50.0,82.0,183.0,402.0,-17.0,-28.0,10132.0,72202012839.0,2015-02-01T15:53:00.000+0000,25.7881,-80.3169,8.8,"MIAMI INTERNATIONAL AIRPORT, FL US",2015-02-01T15:00:00.000+0000,100.0,N,77.0,22000.0,9,N,16093.0,N,233.0,122.0,10236.0,1.0,100.0,77.0,22000.0,16093.0,233.0,122.0,10236.0
2015,1,2,2,1,2015-02-02,AA,19805,N025AA,1652,10397,1039705,ATL,"Atlanta, GA",GA,13303,1330303,MIA,"Miami, FL",FL,815,810,-5.0,0.0,0.0,-1,0800-0859,17.0,827,951,11.0,1011,1002,-9.0,0.0,0.0,-1,1000-1059,0.0,0.0,116.0,112.0,84.0,1.0,594.0,3,,,,,,2015-02-02-815-ATL-N025AA,ATL,KATL,33.6367,-84.428101,America/New_York,-5,MIA,KMIA,25.79319953918457,-80.29060363769531,America/New_York,-5,2015-02-02T13:15:00.000+0000,2015-02-02T13:10:00.000+0000,2015-02-02T15:02:00.000+0000,2015-02-02T15:11:00.000+0000,2015-02-02T13:00:00.000+0000,2015-02-02T15:00:00.000+0000,72219013874,72202012839,2015-02-02T10:00:00.000+0000,72219013874.0,2015-02-02T10:52:00.000+0000,33.6301,-84.4418,307.8,"ATLANTA HARTSFIELD INTERNATIONAL AIRPORT, GA US",2015-02-02T10:00:00.000+0000,270.0,N,72.0,1097.0,M,N,16093.0,N,100.0,56.0,10076.0,1.0,270.0,72.0,1097.0,16093.0,100.0,56.0,10076.0,72202012839.0,2015-02-02T10:53:00.000+0000,25.7881,-80.3169,8.8,"MIAMI INTERNATIONAL AIRPORT, FL US",2015-02-02T10:00:00.000+0000,150.0,N,26.0,22000.0,9,N,12875.0,N,206.0,183.0,10169.0,1.0,150.0,26.0,22000.0,12875.0,206.0,183.0,10169.0
2015,1,2,3,2,2015-02-03,AA,19805,N371AA,1044,13930,1393003,ORD,"Chicago, IL",IL,13303,1330303,MIA,"Miami, FL",FL,1205,1224,19.0,19.0,1.0,1,1200-1259,23.0,1247,1611,5.0,1613,1616,3.0,3.0,0.0,0,1600-1659,0.0,0.0,188.0,172.0,144.0,1.0,1197.0,5,,,,,,2015-02-03-1205-ORD-N371AA,ORD,KORD,41.9786,-87.9048,America/Chicago,-6,MIA,KMIA,25.79319953918457,-80.29060363769531,America/New_York,-5,2015-02-03T18:05:00.000+0000,2015-02-03T18:24:00.000+0000,2015-02-03T21:16:00.000+0000,2015-02-03T21:13:00.000+0000,2015-02-03T18:00:00.000+0000,2015-02-03T21:00:00.000+0000,72530094846,72202012839,2015-02-03T15:00:00.000+0000,72530094846.0,2015-02-03T15:51:00.000+0000,41.995,-87.9336,201.8,"CHICAGO OHARE INTERNATIONAL AIRPORT, IL US",2015-02-03T15:00:00.000+0000,250.0,N,31.0,22000.0,9,N,16093.0,N,-78.0,-117.0,10222.0,1.0,250.0,31.0,22000.0,16093.0,-78.0,-117.0,10222.0,72202012839.0,2015-02-03T15:53:00.000+0000,25.7881,-80.3169,8.8,"MIAMI INTERNATIONAL AIRPORT, FL US",2015-02-03T15:00:00.000+0000,,V,15.0,22000.0,9,N,16093.0,N,206.0,122.0,10225.0,0.0,222.87671232876716,15.0,22000.0,16093.0,206.0,122.0,10225.0
2015,1,2,4,3,2015-02-04,AA,19805,N3BCAA,985,13930,1393003,ORD,"Chicago, IL",IL,13303,1330303,MIA,"Miami, FL",FL,1450,1453,3.0,3.0,0.0,0,1400-1459,13.0,1506,1850,13.0,1857,1903,6.0,6.0,0.0,0,1800-1859,0.0,0.0,187.0,190.0,164.0,1.0,1197.0,5,,,,,,2015-02-04-1450-ORD-N3BCAA,ORD,KORD,41.9786,-87.9048,America/Chicago,-6,MIA,KMIA,25.79319953918457,-80.29060363769531,America/New_York,-5,2015-02-04T20:50:00.000+0000,2015-02-04T20:53:00.000+0000,2015-02-05T00:03:00.000+0000,2015-02-04T23:57:00.000+0000,2015-02-04T20:00:00.000+0000,2015-02-04T23:00:00.000+0000,72530094846,72202012839,2015-02-04T17:00:00.000+0000,72530094846.0,2015-02-04T17:59:00.000+0000,41.995,-87.9336,201.8,"CHICAGO OHARE INTERNATIONAL AIRPORT, IL US",2015-02-04T17:00:00.000+0000,340.0,N,62.0,975.0,M,N,4023.0,N,-72.0,-106.0,,0.0,340.0,62.0,975.0,4023.0,-72.0,-106.0,10204.350318471335,72202012839.0,2015-02-04T17:53:00.000+0000,25.7881,-80.3169,8.8,"MIAMI INTERNATIONAL AIRPORT, FL US",2015-02-04T17:00:00.000+0000,110.0,N,36.0,9144.0,M,N,16093.0,N,244.0,150.0,10195.0,1.0,110.0,36.0,9144.0,16093.0,244.0,150.0,10195.0
2015,1,2,9,1,2015-02-09,AA,19805,N3FLAA,1078,13930,1393003,ORD,"Chicago, IL",IL,13303,1330303,MIA,"Miami, FL",FL,600,555,-5.0,0.0,0.0,-1,0600-0659,16.0,611,932,39.0,1003,1011,8.0,8.0,0.0,0,1000-1059,0.0,0.0,183.0,196.0,141.0,1.0,1197.0,5,,,,,,2015-02-09-600-ORD-N3FLAA,ORD,KORD,41.9786,-87.9048,America/Chicago,-6,MIA,KMIA,25.79319953918457,-80.29060363769531,America/New_York,-5,2015-02-09T12:00:00.000+0000,2015-02-09T11:55:00.000+0000,2015-02-09T15:11:00.000+0000,2015-02-09T15:03:00.000+0000,2015-02-09T12:00:00.000+0000,2015-02-09T15:00:00.000+0000,72530094846,72202012839,2015-02-09T09:00:00.000+0000,72530094846.0,2015-02-09T09:51:00.000+0000,41.995,-87.9336,201.8,"CHICAGO OHARE INTERNATIONAL AIRPORT, IL US",2015-02-09T09:00:00.000+0000,340.0,N,46.0,335.0,M,N,16093.0,N,-44.0,-56.0,10179.0,1.0,340.0,46.0,335.0,16093.0,-44.0,-56.0,10179.0,72202012839.0,2015-02-09T09:53:00.000+0000,25.7881,-80.3169,8.8,"MIAMI INTERNATIONAL AIRPORT, FL US",2015-02-09T09:00:00.000+0000,,C,0.0,5486.0,M,N,16093.0,N,178.0,156.0,10145.0,0.0,153.3108108108108,0.0,5486.0,16093.0,178.0,156.0,10145.0
2015,1,2,9,1,2015-02-09,AA,19805,N359AA,1044,13930,1393003,ORD,"Chicago, IL",IL,13303,1330303,MIA,"Miami, FL",FL,1205,1208,3.0,3.0,0.0,0,1200-1259,14.0,1222,1537,7.0,1613,1544,-29.0,0.0,0.0,-2,1600-1659,0.0,0.0,188.0,156.0,135.0,1.0,1197.0,5,,,,,,2015-02-09-1205-ORD-N359AA,ORD,KORD,41.9786,-87.9048,America/Chicago,-6,MIA,KMIA,25.79319953918457,-80.29060363769531,America/New_York,-5,2015-02-09T18:05:00.000+0000,2015-02-09T18:08:00.000+0000,2015-02-09T20:44:00.000+0000,2015-02-09T21:13:00.000+0000,2015-02-09T18:00:00.000+0000,2015-02-09T21:00:00.000+0000,72530094846,72202012839,2015-02-09T15:00:00.000+0000,72530094846.0,2015-02-09T15:51:00.000+0000,41.995,-87.9336,201.8,"CHICAGO OHARE INTERNATIONAL AIRPORT, IL US",2015-02-09T15:00:00.000+0000,20.0,N,51.0,488.0,M,N,9656.0,N,-39.0,-61.0,10224.0,1.0,20.0,51.0,488.0,9656.0,-39.0,-61.0,10224.0,72202012839.0,2015-02-09T15:53:00.000+0000,25.7881,-80.3169,8.8,"MIAMI INTERNATIONAL AIRPORT, FL US",2015-02-09T15:00:00.000+0000,140.0,N,41.0,2896.0,M,N,16093.0,N,211.0,172.0,10145.0,1.0,140.0,41.0,2896.0,16093.0,211.0,172.0,10145.0
2015,1,2,10,2,2015-02-10,AA,19805,N361AA,1044,13930,1393003,ORD,"Chicago, IL",IL,13303,1330303,MIA,"Miami, FL",FL,1205,1202,-3.0,0.0,0.0,-1,1200-1259,20.0,1222,1538,5.0,1613,1543,-30.0,0.0,0.0,-2,1600-1659,0.0,0.0,188.0,161.0,136.0,1.0,1197.0,5,,,,,,2015-02-10-1205-ORD-N361AA,ORD,KORD,41.9786,-87.9048,America/Chicago,-6,MIA,KMIA,25.79319953918457,-80.29060363769531,America/New_York,-5,2015-02-10T18:05:00.000+0000,2015-02-10T18:02:00.000+0000,2015-02-10T20:43:00.000+0000,2015-02-10T21:13:00.000+0000,2015-02-10T18:00:00.000+0000,2015-02-10T21:00:00.000+0000,72530094846,72202012839,2015-02-10T15:00:00.000+0000,72530094846.0,2015-02-10T15:51:00.000+0000,41.995,-87.9336,201.8,"CHICAGO OHARE INTERNATIONAL AIRPORT, IL US",2015-02-10T15:00:00.000+0000,100.0,N,36.0,7620.0,M,N,16093.0,N,-22.0,-72.0,10249.0,1.0,100.0,36.0,7620.0,16093.0,-22.0,-72.0,10249.0,72202012839.0,2015-02-10T15:53:00.000+0000,25.7881,-80.3169,8.8,"MIAMI INTERNATIONAL AIRPORT, FL US",2015-02-10T15:00:00.000+0000,280.0,N,62.0,22000.0,9,N,16093.0,N,211.0,144.0,10090.0,1.0,280.0,62.0,22000.0,16093.0,211.0,144.0,10090.0
2015,1,2,12,4,2015-02-12,AA,19805,N357AA,1044,13930,1393003,ORD,"Chicago, IL",IL,13303,1330303,MIA,"Miami, FL",FL,1205,1235,30.0,30.0,1.0,2,1200-1259,15.0,1250,1622,5.0,1613,1627,14.0,14.0,0.0,0,1600-1659,0.0,0.0,188.0,172.0,152.0,1.0,1197.0,5,,,,,,2015-02-12-1205-ORD-N357AA,ORD,KORD,41.9786,-87.9048,America/Chicago,-6,MIA,KMIA,25.79319953918457,-80.29060363769531,America/New_York,-5,2015-02-12T18:05:00.000+0000,2015-02-12T18:35:00.000+0000,2015-02-12T21:27:00.000+0000,2015-02-12T21:13:00.000+0000,2015-02-12T18:00:00.000+0000,2015-02-12T21:00:00.000+0000,72530094846,72202012839,2015-02-12T15:00:00.000+0000,72530094846.0,2015-02-12T15:51:00.000+0000,41.995,-87.9336,201.8,"CHICAGO OHARE INTERNATIONAL AIRPORT, IL US",2015-02-12T15:00:00.000+0000,340.0,N,82.0,22000.0,9,N,16093.0,N,-122.0,-189.0,10313.0,1.0,340.0,82.0,22000.0,16093.0,-122.0,-189.0,10313.0,72202012839.0,2015-02-12T15:53:00.000+0000,25.7881,-80.3169,8.8,"MIAMI INTERNATIONAL AIRPORT, FL US",2015-02-12T15:00:00.000+0000,300.0,N,41.0,22000.0,9,N,16093.0,N,211.0,100.0,10198.0,1.0,300.0,41.0,22000.0,16093.0,211.0,100.0,10198.0
2015,1,2,12,4,2015-02-12,AA,19805,N3FRAA,985,13930,1393003,ORD,"Chicago, IL",IL,13303,1330303,MIA,"Miami, FL",FL,1450,1458,8.0,8.0,0.0,0,1400-1459,15.0,1513,1851,7.0,1857,1858,1.0,1.0,0.0,0,1800-1859,0.0,0.0,187.0,180.0,158.0,1.0,1197.0,5,,,,,,2015-02-12-1450-ORD-N3FRAA,ORD,KORD,41.9786,-87.9048,America/Chicago,-6,MIA,KMIA,25.79319953918457,-80.29060363769531,America/New_York,-5,2015-02-12T20:50:00.000+0000,2015-02-12T20:58:00.000+0000,2015-02-12T23:58:00.000+0000,2015-02-12T23:57:00.000+0000,2015-02-12T20:00:00.000+0000,2015-02-12T23:00:00.000+0000,72530094846,72202012839,2015-02-12T17:00:00.000+0000,72530094846.0,2015-02-12T17:51:00.000+0000,41.995,-87.9336,201.8,"CHICAGO OHARE INTERNATIONAL AIRPORT, IL US",2015-02-12T17:00:00.000+0000,320.0,N,57.0,22000.0,9,N,16093.0,N,-106.0,-189.0,10322.0,1.0,320.0,57.0,22000.0,16093.0,-106.0,-189.0,10322.0,72202012839.0,2015-02-12T17:53:00.000+0000,25.7881,-80.3169,8.8,"MIAMI INTERNATIONAL AIRPORT, FL US",2015-02-12T17:00:00.000+0000,340.0,N,26.0,22000.0,9,N,16093.0,N,233.0,100.0,10181.0,1.0,340.0,26.0,22000.0,16093.0,233.0,100.0,10181.0


In [0]:
# flights_w_weather_temp.where(f.col('FL_DATE') == '2015-02-21').display()
# flights_w_weather_temp.where(f.col('ID') == '2015-02-21N4XDAAORD1805').display()
# flights_w_weather.where(f.col('ID') == '2015-02-21N4XDAAORD1805').display()

In [0]:
JOINED_2015_PROCESSED_PATH = blob_url + '/processed/joined_2015_data.parquet'

flights_w_weather.write.parquet(JOINED_2015_PROCESSED_PATH)

In [0]:

flights_in = spark.read.parquet(JOINED_2015_PROCESSED_PATH).cache()

In [0]:
flights_in.count()

In [0]:
# Null values
def null_values(df):
  '''Pass pandas df argument and return columns with null values and percentages.'''
  df = df.replace('', np.nan)
  null_values = df.isna().sum()
  null_values_percent = null_values/len(df) * 100
  null_table = pd.concat([null_values, null_values_percent], axis=1).rename(
    columns = {0:'Null Values', 1:'Percentage'})
  null_table = null_table[null_table.Percentage != 0]
  sorted_table = null_table.sort_values('Percentage', ascending=False)
  
  print(f'''Total Number of Columns: {df.shape[1]}\nNumber of Columns with Null Values: {sorted_table.shape[0]}''')
  
  return sorted_table

In [0]:
# flights_in.where(f.col('DEST_WEATHER_WND_SPEED_RATE-AVG').isNull()).groupBy(f.col('OP_UNIQUE_CARRIER')).count().display()
flights_in.where((f.col('ORIGIN_WEATHER_STATION').isNull()) | (f.col('DEST_WEATHER_STATION').isNull())).count()

In [0]:
flights_in.printSchema()

In [0]:
df = flights_in.toPandas()
null_df = null_values(df)

null_df

In [0]:
flights_in.display()

YEAR,QUARTER,MONTH,DAY_OF_MONTH,DAY_OF_WEEK,FL_DATE,OP_UNIQUE_CARRIER,OP_CARRIER_AIRLINE_ID,TAIL_NUM,OP_CARRIER_FL_NUM,ORIGIN_AIRPORT_ID,ORIGIN_AIRPORT_SEQ_ID,ORIGIN,ORIGIN_CITY_NAME,ORIGIN_STATE_ABR,DEST_AIRPORT_ID,DEST_AIRPORT_SEQ_ID,DEST,DEST_CITY_NAME,DEST_STATE_ABR,CRS_DEP_TIME,DEP_TIME,DEP_DELAY,DEP_DELAY_NEW,DEP_DEL15,DEP_DELAY_GROUP,DEP_TIME_BLK,TAXI_OUT,WHEELS_OFF,WHEELS_ON,TAXI_IN,CRS_ARR_TIME,ARR_TIME,ARR_DELAY,ARR_DELAY_NEW,ARR_DEL15,ARR_DELAY_GROUP,ARR_TIME_BLK,CANCELLED,DIVERTED,CRS_ELAPSED_TIME,ACTUAL_ELAPSED_TIME,AIR_TIME,FLIGHTS,DISTANCE,DISTANCE_GROUP,CARRIER_DELAY,WEATHER_DELAY,NAS_DELAY,SECURITY_DELAY,LATE_AIRCRAFT_DELAY,ID,IATA_ORIGIN,ICAO_ORIGIN,AIRPORT_LAT_ORIGIN,AIRPORT_LONG_ORIGIN,AIRPORT_TIMEZONE_ORIGIN,AIRPORT_UTC_OFFSET_ORIGIN,IATA_DEST,ICAO_DEST,AIRPORT_LAT_DEST,AIRPORT_LONG_DEST,AIRPORT_TIMEZONE_DEST,AIRPORT_UTC_OFFSET_DEST,CRS_DEP_TIME_UTC,DEP_TIME_UTC,ARR_TIME_UTC,CRS_ARR_TIME_UTC,CRS_DEP_TIME_UTC_HOUR,CRS_ARR_TIME_UTC_HOUR,ORIGIN_WEATHER_STATION_ID,DEST_WEATHER_STATION_ID,CRS_DEP_TIME_UTC_LAG,ORIGIN_WEATHER_STATION,ORIGIN_WEATHER_DATE,ORIGIN_WEATHER_LATITUDE,ORIGIN_WEATHER_LONGITUDE,ORIGIN_WEATHER_ELEVATION,ORIGIN_WEATHER_NAME,ORIGIN_WEATHER_HOUR,ORIGIN_WEATHER_WND_DIRECTION_ANGLE,ORIGIN_WEATHER_WND_TYPE_CODE,ORIGIN_WEATHER_WND_SPEED_RATE,ORIGIN_WEATHER_CIG_CEILING_HEIGHT_DIMENSION,ORIGIN_WEATHER_CIG_CEILING_DETERMINATION_CODE,ORIGIN_WEATHER_CIG_CAVOK_CODE,ORIGIN_WEATHER_VIS_DISTANCE_DIMENSION,ORIGIN_WEATHER_VIS_VARIABILITY_CODE,ORIGIN_WEATHER_TMP_AIR_TEMP,ORIGIN_WEATHER_DEW_POINT_TEMP,ORIGIN_WEATHER_SLP_SEA_LEVEL_PRES,ORIGIN_WEATHER_VALID_WEATHER_DATA,ORIGIN_WEATHER_WND_DIRECTION_ANGLE-AVG,ORIGIN_WEATHER_WND_SPEED_RATE-AVG,ORIGIN_WEATHER_CIG_CEILING_HEIGHT_DIMENSION-AVG,ORIGIN_WEATHER_VIS_DISTANCE_DIMENSION-AVG,ORIGIN_WEATHER_TMP_AIR_TEMP-AVG,ORIGIN_WEATHER_DEW_POINT_TEMP-AVG,ORIGIN_WEATHER_SLP_SEA_LEVEL_PRES-AVG,DEST_WEATHER_STATION,DEST_WEATHER_DATE,DEST_WEATHER_LATITUDE,DEST_WEATHER_LONGITUDE,DEST_WEATHER_ELEVATION,DEST_WEATHER_NAME,DEST_WEATHER_HOUR,DEST_WEATHER_WND_DIRECTION_ANGLE,DEST_WEATHER_WND_TYPE_CODE,DEST_WEATHER_WND_SPEED_RATE,DEST_WEATHER_CIG_CEILING_HEIGHT_DIMENSION,DEST_WEATHER_CIG_CEILING_DETERMINATION_CODE,DEST_WEATHER_CIG_CAVOK_CODE,DEST_WEATHER_VIS_DISTANCE_DIMENSION,DEST_WEATHER_VIS_VARIABILITY_CODE,DEST_WEATHER_TMP_AIR_TEMP,DEST_WEATHER_DEW_POINT_TEMP,DEST_WEATHER_SLP_SEA_LEVEL_PRES,DEST_WEATHER_VALID_WEATHER_DATA,DEST_WEATHER_WND_DIRECTION_ANGLE-AVG,DEST_WEATHER_WND_SPEED_RATE-AVG,DEST_WEATHER_CIG_CEILING_HEIGHT_DIMENSION-AVG,DEST_WEATHER_VIS_DISTANCE_DIMENSION-AVG,DEST_WEATHER_TMP_AIR_TEMP-AVG,DEST_WEATHER_DEW_POINT_TEMP-AVG,DEST_WEATHER_SLP_SEA_LEVEL_PRES-AVG
2015,1,1,2,5,2015-01-02,AS,19930,N584AS,139,13930,1393003,ORD,"Chicago, IL",IL,10299,1029904,ANC,"Anchorage, AK",AK,935,934,-1.0,0.0,0.0,-1,0900-0959,13.0,947,1329,4.0,1339,1333,-6.0,0.0,0.0,-1,1300-1359,0.0,0.0,424.0,419.0,402.0,1.0,2846.0,11,,,,,,2015-01-02-935-ORD-N584AS,ORD,KORD,41.9786,-87.9048,America/Chicago,-6,ANC,PANC,61.17440032958984,-149.99600219726562,America/Anchorage,-9,2015-01-02T15:35:00.000+0000,2015-01-02T15:34:00.000+0000,2015-01-02T22:33:00.000+0000,2015-01-02T22:39:00.000+0000,2015-01-02T15:00:00.000+0000,2015-01-02T22:00:00.000+0000,72530094846,70273026451,2015-01-02T12:00:00.000+0000,72530094846.0,2015-01-02T12:51:00.000+0000,41.995,-87.9336,201.8,"CHICAGO OHARE INTERNATIONAL AIRPORT, IL US",2015-01-02T12:00:00.000+0000,,C,0.0,22000.0,9,N,16093.0,N,-78.0,-100.0,10246.0,0.0,244.24242424242425,0.0,22000.0,16093.0,-78.0,-100.0,10246.0,70273026451.0,2015-01-02T12:53:00.000+0000,61.169,-150.0278,36.6,"ANCHORAGE TED STEVENS INTERNATIONAL AIRPORT, AK US",2015-01-02T12:00:00.000+0000,,C,0.0,22000.0,9,N,16093.0,N,-33.0,-83.0,10184.0,0.0,226.08695652173915,0.0,22000.0,16093.0,-33.0,-83.0,10184.0
2015,1,1,4,7,2015-01-04,UA,19977,N87527,1130,13930,1393003,ORD,"Chicago, IL",IL,10299,1029904,ANC,"Anchorage, AK",AK,1807,1916,69.0,69.0,1.0,4,1800-1859,19.0,1935,2254,5.0,2212,2259,47.0,47.0,1.0,3,2200-2259,0.0,0.0,425.0,403.0,379.0,1.0,2846.0,11,20.0,0.0,0.0,0.0,27.0,2015-01-04-1807-ORD-N87527,ORD,KORD,41.9786,-87.9048,America/Chicago,-6,ANC,PANC,61.17440032958984,-149.99600219726562,America/Anchorage,-9,2015-01-05T00:07:00.000+0000,2015-01-05T01:16:00.000+0000,2015-01-05T07:59:00.000+0000,2015-01-05T07:12:00.000+0000,2015-01-05T00:00:00.000+0000,2015-01-05T07:00:00.000+0000,72530094846,70273026451,2015-01-04T21:00:00.000+0000,72530094846.0,2015-01-04T21:51:00.000+0000,41.995,-87.9336,201.8,"CHICAGO OHARE INTERNATIONAL AIRPORT, IL US",2015-01-04T21:00:00.000+0000,310.0,N,98.0,671.0,M,N,16093.0,N,-83.0,-128.0,10235.0,1.0,310.0,98.0,671.0,16093.0,-83.0,-128.0,10235.0,70273026451.0,2015-01-04T21:53:00.000+0000,61.169,-150.0278,36.6,"ANCHORAGE TED STEVENS INTERNATIONAL AIRPORT, AK US",2015-01-04T21:00:00.000+0000,,C,0.0,22000.0,9,N,16093.0,N,-111.0,-139.0,10258.0,0.0,246.0,0.0,22000.0,16093.0,-111.0,-139.0,10258.0
2015,1,3,7,6,2015-03-07,UA,19977,N66831,1151,13930,1393003,ORD,"Chicago, IL",IL,10299,1029904,ANC,"Anchorage, AK",AK,1859,1929,30.0,30.0,1.0,2,1800-1859,15.0,1944,2325,5.0,2252,2330,38.0,38.0,1.0,2,2200-2259,0.0,0.0,413.0,421.0,401.0,1.0,2846.0,11,30.0,0.0,8.0,0.0,0.0,2015-03-07-1859-ORD-N66831,ORD,KORD,41.9786,-87.9048,America/Chicago,-6,ANC,PANC,61.17440032958984,-149.99600219726562,America/Anchorage,-9,2015-03-08T00:59:00.000+0000,2015-03-08T01:29:00.000+0000,2015-03-08T08:30:00.000+0000,2015-03-08T07:52:00.000+0000,2015-03-08T00:00:00.000+0000,2015-03-08T07:00:00.000+0000,72530094846,70273026451,2015-03-07T21:00:00.000+0000,72530094846.0,2015-03-07T21:51:00.000+0000,41.995,-87.9336,201.8,"CHICAGO OHARE INTERNATIONAL AIRPORT, IL US",2015-03-07T21:00:00.000+0000,300.0,N,77.0,22000.0,9,N,16093.0,N,61.0,-17.0,10169.0,1.0,300.0,77.0,22000.0,16093.0,61.0,-17.0,10169.0,70273026451.0,2015-03-07T21:53:00.000+0000,61.169,-150.0278,36.6,"ANCHORAGE TED STEVENS INTERNATIONAL AIRPORT, AK US",2015-03-07T21:00:00.000+0000,180.0,N,46.0,1067.0,M,N,16093.0,N,44.0,1.0,10106.0,1.0,180.0,46.0,1067.0,16093.0,44.0,1.0,10106.0
2015,1,3,9,1,2015-03-09,UA,19977,N73299,1651,13930,1393003,ORD,"Chicago, IL",IL,10299,1029904,ANC,"Anchorage, AK",AK,1859,1923,24.0,24.0,1.0,1,1800-1859,16.0,1939,2323,4.0,2252,2327,35.0,35.0,1.0,2,2200-2259,0.0,0.0,413.0,424.0,404.0,1.0,2846.0,11,24.0,0.0,11.0,0.0,0.0,2015-03-09-1859-ORD-N73299,ORD,KORD,41.9786,-87.9048,America/Chicago,-6,ANC,PANC,61.17440032958984,-149.99600219726562,America/Anchorage,-9,2015-03-09T23:59:00.000+0000,2015-03-10T00:23:00.000+0000,2015-03-10T07:27:00.000+0000,2015-03-10T06:52:00.000+0000,2015-03-09T23:00:00.000+0000,2015-03-10T06:00:00.000+0000,72530094846,70273026451,2015-03-09T20:00:00.000+0000,72530094846.0,2015-03-09T20:51:00.000+0000,41.995,-87.9336,201.8,"CHICAGO OHARE INTERNATIONAL AIRPORT, IL US",2015-03-09T20:00:00.000+0000,210.0,N,15.0,22000.0,9,N,16093.0,N,94.0,-17.0,10206.0,1.0,210.0,15.0,22000.0,16093.0,94.0,-17.0,10206.0,70273026451.0,2015-03-09T20:53:00.000+0000,61.169,-150.0278,36.6,"ANCHORAGE TED STEVENS INTERNATIONAL AIRPORT, AK US",2015-03-09T20:00:00.000+0000,350.0,N,57.0,22000.0,9,N,16093.0,N,-61.0,-183.0,10200.0,1.0,350.0,57.0,22000.0,16093.0,-61.0,-183.0,10200.0
2015,1,3,12,4,2015-03-12,UA,19977,N87512,1651,13930,1393003,ORD,"Chicago, IL",IL,10299,1029904,ANC,"Anchorage, AK",AK,1859,1943,44.0,44.0,1.0,2,1800-1859,29.0,2012,2324,6.0,2252,2330,38.0,38.0,1.0,2,2200-2259,0.0,0.0,413.0,407.0,372.0,1.0,2846.0,11,17.0,0.0,0.0,0.0,21.0,2015-03-12-1859-ORD-N87512,ORD,KORD,41.9786,-87.9048,America/Chicago,-6,ANC,PANC,61.17440032958984,-149.99600219726562,America/Anchorage,-9,2015-03-12T23:59:00.000+0000,2015-03-13T00:43:00.000+0000,2015-03-13T07:30:00.000+0000,2015-03-13T06:52:00.000+0000,2015-03-12T23:00:00.000+0000,2015-03-13T06:00:00.000+0000,72530094846,70273026451,2015-03-12T20:00:00.000+0000,72530094846.0,2015-03-12T20:51:00.000+0000,41.995,-87.9336,201.8,"CHICAGO OHARE INTERNATIONAL AIRPORT, IL US",2015-03-12T20:00:00.000+0000,110.0,N,51.0,22000.0,9,N,16093.0,N,128.0,-11.0,10299.0,1.0,110.0,51.0,22000.0,16093.0,128.0,-11.0,10299.0,70273026451.0,2015-03-12T20:53:00.000+0000,61.169,-150.0278,36.6,"ANCHORAGE TED STEVENS INTERNATIONAL AIRPORT, AK US",2015-03-12T20:00:00.000+0000,360.0,N,21.0,22000.0,9,N,16093.0,N,-122.0,-239.0,9992.0,1.0,360.0,21.0,22000.0,16093.0,-122.0,-239.0,9992.0
2015,1,3,19,4,2015-03-19,AS,19930,N508AS,139,13930,1393003,ORD,"Chicago, IL",IL,10299,1029904,ANC,"Anchorage, AK",AK,940,928,-12.0,0.0,0.0,-1,0900-0959,28.0,956,1315,4.0,1328,1319,-9.0,0.0,0.0,-1,1300-1359,0.0,0.0,408.0,411.0,379.0,1.0,2846.0,11,,,,,,2015-03-19-940-ORD-N508AS,ORD,KORD,41.9786,-87.9048,America/Chicago,-6,ANC,PANC,61.17440032958984,-149.99600219726562,America/Anchorage,-9,2015-03-19T14:40:00.000+0000,2015-03-19T14:28:00.000+0000,2015-03-19T21:19:00.000+0000,2015-03-19T21:28:00.000+0000,2015-03-19T14:00:00.000+0000,2015-03-19T21:00:00.000+0000,72530094846,70273026451,2015-03-19T11:00:00.000+0000,72530094846.0,2015-03-19T11:51:00.000+0000,41.995,-87.9336,201.8,"CHICAGO OHARE INTERNATIONAL AIRPORT, IL US",2015-03-19T11:00:00.000+0000,120.0,N,15.0,3658.0,M,N,16093.0,N,39.0,-78.0,10250.0,1.0,120.0,15.0,3658.0,16093.0,39.0,-78.0,10250.0,70273026451.0,2015-03-19T11:53:00.000+0000,61.169,-150.0278,36.6,"ANCHORAGE TED STEVENS INTERNATIONAL AIRPORT, AK US",2015-03-19T11:00:00.000+0000,,C,0.0,4267.0,M,N,16093.0,N,0.0,-56.0,10076.0,0.0,234.17910447761196,0.0,4267.0,16093.0,0.0,-56.0,10076.0
2015,1,3,19,4,2015-03-19,UA,19977,N13248,1651,13930,1393003,ORD,"Chicago, IL",IL,10299,1029904,ANC,"Anchorage, AK",AK,1859,1932,33.0,33.0,1.0,2,1800-1859,17.0,1949,2256,4.0,2252,2300,8.0,8.0,0.0,0,2200-2259,0.0,0.0,413.0,388.0,367.0,1.0,2846.0,11,,,,,,2015-03-19-1859-ORD-N13248,ORD,KORD,41.9786,-87.9048,America/Chicago,-6,ANC,PANC,61.17440032958984,-149.99600219726562,America/Anchorage,-9,2015-03-19T23:59:00.000+0000,2015-03-20T00:32:00.000+0000,2015-03-20T07:00:00.000+0000,2015-03-20T06:52:00.000+0000,2015-03-19T23:00:00.000+0000,2015-03-20T06:00:00.000+0000,72530094846,70273026451,2015-03-19T20:00:00.000+0000,72530094846.0,2015-03-19T20:51:00.000+0000,41.995,-87.9336,201.8,"CHICAGO OHARE INTERNATIONAL AIRPORT, IL US",2015-03-19T20:00:00.000+0000,80.0,N,41.0,3353.0,M,N,16093.0,N,72.0,-94.0,10213.0,1.0,80.0,41.0,3353.0,16093.0,72.0,-94.0,10213.0,70273026451.0,2015-03-19T20:53:00.000+0000,61.169,-150.0278,36.6,"ANCHORAGE TED STEVENS INTERNATIONAL AIRPORT, AK US",2015-03-19T20:00:00.000+0000,340.0,N,21.0,6096.0,M,N,16093.0,N,28.0,-67.0,10079.0,1.0,340.0,21.0,6096.0,16093.0,28.0,-67.0,10079.0
2015,1,3,22,7,2015-03-22,UA,19977,N37263,1539,13930,1393003,ORD,"Chicago, IL",IL,10299,1029904,ANC,"Anchorage, AK",AK,1859,1930,31.0,31.0,1.0,2,1800-1859,19.0,1949,2257,4.0,2252,2301,9.0,9.0,0.0,0,2200-2259,0.0,0.0,413.0,391.0,368.0,1.0,2846.0,11,,,,,,2015-03-22-1859-ORD-N37263,ORD,KORD,41.9786,-87.9048,America/Chicago,-6,ANC,PANC,61.17440032958984,-149.99600219726562,America/Anchorage,-9,2015-03-22T23:59:00.000+0000,2015-03-23T00:30:00.000+0000,2015-03-23T07:01:00.000+0000,2015-03-23T06:52:00.000+0000,2015-03-22T23:00:00.000+0000,2015-03-23T06:00:00.000+0000,72530094846,70273026451,2015-03-22T20:00:00.000+0000,72530094846.0,2015-03-22T20:51:00.000+0000,41.995,-87.9336,201.8,"CHICAGO OHARE INTERNATIONAL AIRPORT, IL US",2015-03-22T20:00:00.000+0000,60.0,N,46.0,2134.0,M,N,16093.0,N,6.0,-89.0,10255.0,1.0,60.0,46.0,2134.0,16093.0,6.0,-89.0,10255.0,70273026451.0,2015-03-22T20:53:00.000+0000,61.169,-150.0278,36.6,"ANCHORAGE TED STEVENS INTERNATIONAL AIRPORT, AK US",2015-03-22T20:00:00.000+0000,330.0,N,15.0,22000.0,9,N,16093.0,N,50.0,-44.0,10161.0,1.0,330.0,15.0,22000.0,16093.0,50.0,-44.0,10161.0
2015,1,3,23,1,2015-03-23,AS,19930,N588AS,139,13930,1393003,ORD,"Chicago, IL",IL,10299,1029904,ANC,"Anchorage, AK",AK,940,1226,166.0,166.0,1.0,11,0900-0959,62.0,1328,1638,4.0,1328,1642,194.0,194.0,1.0,12,1300-1359,0.0,0.0,408.0,436.0,370.0,1.0,2846.0,11,0.0,0.0,194.0,0.0,0.0,2015-03-23-940-ORD-N588AS,ORD,KORD,41.9786,-87.9048,America/Chicago,-6,ANC,PANC,61.17440032958984,-149.99600219726562,America/Anchorage,-9,2015-03-23T14:40:00.000+0000,2015-03-23T17:26:00.000+0000,2015-03-24T00:42:00.000+0000,2015-03-23T21:28:00.000+0000,2015-03-23T14:00:00.000+0000,2015-03-23T21:00:00.000+0000,72530094846,70273026451,2015-03-23T11:00:00.000+0000,72530094846.0,2015-03-23T11:51:00.000+0000,41.995,-87.9336,201.8,"CHICAGO OHARE INTERNATIONAL AIRPORT, IL US",2015-03-23T11:00:00.000+0000,80.0,N,41.0,274.0,M,N,1609.0,N,-22.0,-33.0,10253.0,1.0,80.0,41.0,274.0,1609.0,-22.0,-33.0,10253.0,70273026451.0,2015-03-23T11:53:00.000+0000,61.169,-150.0278,36.6,"ANCHORAGE TED STEVENS INTERNATIONAL AIRPORT, AK US",2015-03-23T11:00:00.000+0000,360.0,N,15.0,22000.0,9,N,16093.0,N,-11.0,-67.0,10148.0,1.0,360.0,15.0,22000.0,16093.0,-11.0,-67.0,10148.0
2015,1,3,26,4,2015-03-26,UA,19977,N78524,1651,13930,1393003,ORD,"Chicago, IL",IL,10299,1029904,ANC,"Anchorage, AK",AK,1859,2011,72.0,72.0,1.0,4,1800-1859,22.0,2033,2347,5.0,2252,2352,60.0,60.0,1.0,4,2200-2259,0.0,0.0,413.0,401.0,374.0,1.0,2846.0,11,60.0,0.0,0.0,0.0,0.0,2015-03-26-1859-ORD-N78524,ORD,KORD,41.9786,-87.9048,America/Chicago,-6,ANC,PANC,61.17440032958984,-149.99600219726562,America/Anchorage,-9,2015-03-26T23:59:00.000+0000,2015-03-27T01:11:00.000+0000,2015-03-27T07:52:00.000+0000,2015-03-27T06:52:00.000+0000,2015-03-26T23:00:00.000+0000,2015-03-27T06:00:00.000+0000,72530094846,70273026451,2015-03-26T20:00:00.000+0000,72530094846.0,2015-03-26T20:51:00.000+0000,41.995,-87.9336,201.8,"CHICAGO OHARE INTERNATIONAL AIRPORT, IL US",2015-03-26T20:00:00.000+0000,310.0,N,77.0,1372.0,M,N,16093.0,N,44.0,-44.0,10151.0,1.0,310.0,77.0,1372.0,16093.0,44.0,-44.0,10151.0,70273026451.0,2015-03-26T20:53:00.000+0000,61.169,-150.0278,36.6,"ANCHORAGE TED STEVENS INTERNATIONAL AIRPORT, AK US",2015-03-26T20:00:00.000+0000,330.0,N,26.0,3658.0,M,N,16093.0,N,39.0,-72.0,9933.0,1.0,330.0,26.0,3658.0,16093.0,39.0,-72.0,9933.0


In [0]:
weather.where(((f.col('DATE') > "2015-02-20T23:59:59.000") & (f.col('DATE') < "2015-02-22T00:00:00.000")) & (f.col('STATION') == '72211012842')).display() 

STATION,DATE,LATITUDE,LONGITUDE,ELEVATION,NAME,HOUR,WND_DIRECTION_ANGLE,WND_TYPE_CODE,WND_SPEED_RATE,CIG_CEILING_HEIGHT_DIMENSION,CIG_CEILING_DETERMINATION_CODE,CIG_CAVOK_CODE,VIS_DISTANCE_DIMENSION,VIS_VARIABILITY_CODE,TMP_AIR_TEMP,DEW_POINT_TEMP,SLP_SEA_LEVEL_PRES,VALID_WEATHER_DATA,WND_DIRECTION_ANGLE-AVG,WND_SPEED_RATE-AVG,CIG_CEILING_HEIGHT_DIMENSION-AVG,VIS_DISTANCE_DIMENSION-AVG,TMP_AIR_TEMP-AVG,DEW_POINT_TEMP-AVG,SLP_SEA_LEVEL_PRES-AVG
72211012842,2015-02-21T00:53:00.000+0000,27.96194,-82.5403,5.8,"TAMPA INTERNATIONAL AIRPORT, FL US",2015-02-21T00:00:00.000+0000,60.0,N,31.0,22000.0,9,N,16093.0,N,111.0,-78.0,10281.0,1,60.0,31.0,22000.0,16093.0,111.0,-78.0,10281.0
72211012842,2015-02-21T01:53:00.000+0000,27.96194,-82.5403,5.8,"TAMPA INTERNATIONAL AIRPORT, FL US",2015-02-21T01:00:00.000+0000,40.0,N,26.0,22000.0,9,N,16093.0,N,100.0,-78.0,10287.0,1,40.0,26.0,22000.0,16093.0,100.0,-78.0,10287.0
72211012842,2015-02-21T02:53:00.000+0000,27.96194,-82.5403,5.8,"TAMPA INTERNATIONAL AIRPORT, FL US",2015-02-21T02:00:00.000+0000,70.0,N,31.0,22000.0,9,N,16093.0,N,94.0,-61.0,10295.0,1,70.0,31.0,22000.0,16093.0,94.0,-61.0,10295.0
72211012842,2015-02-21T03:53:00.000+0000,27.96194,-82.5403,5.8,"TAMPA INTERNATIONAL AIRPORT, FL US",2015-02-21T03:00:00.000+0000,70.0,N,26.0,22000.0,9,N,16093.0,N,83.0,-11.0,10295.0,1,70.0,26.0,22000.0,16093.0,83.0,-11.0,10295.0
72211012842,2015-02-21T04:59:00.000+0000,27.96194,-82.5403,5.8,"TAMPA INTERNATIONAL AIRPORT, FL US",2015-02-21T04:00:00.000+0000,,,,,9,,,,,,,0,60.0,28.5,22000.0,16093.0,97.0,-57.0,10289.5
72211012842,2015-02-21T05:53:00.000+0000,27.96194,-82.5403,5.8,"TAMPA INTERNATIONAL AIRPORT, FL US",2015-02-21T05:00:00.000+0000,70.0,N,41.0,22000.0,9,N,16093.0,N,72.0,11.0,10290.0,1,70.0,41.0,22000.0,16093.0,72.0,11.0,10290.0
72211012842,2015-02-21T06:53:00.000+0000,27.96194,-82.5403,5.8,"TAMPA INTERNATIONAL AIRPORT, FL US",2015-02-21T06:00:00.000+0000,70.0,N,21.0,22000.0,9,N,16093.0,N,67.0,11.0,10283.0,1,70.0,21.0,22000.0,16093.0,67.0,11.0,10283.0
72211012842,2015-02-21T07:53:00.000+0000,27.96194,-82.5403,5.8,"TAMPA INTERNATIONAL AIRPORT, FL US",2015-02-21T07:00:00.000+0000,,V,21.0,22000.0,9,N,16093.0,N,67.0,11.0,10275.0,0,63.333333333333336,21.0,22000.0,16093.0,67.0,11.0,10275.0
72211012842,2015-02-21T08:53:00.000+0000,27.96194,-82.5403,5.8,"TAMPA INTERNATIONAL AIRPORT, FL US",2015-02-21T08:00:00.000+0000,60.0,N,21.0,22000.0,9,N,16093.0,N,67.0,17.0,10270.0,1,60.0,21.0,22000.0,16093.0,67.0,17.0,10270.0
72211012842,2015-02-21T09:53:00.000+0000,27.96194,-82.5403,5.8,"TAMPA INTERNATIONAL AIRPORT, FL US",2015-02-21T09:00:00.000+0000,70.0,N,26.0,22000.0,9,N,16093.0,N,67.0,17.0,10265.0,1,70.0,26.0,22000.0,16093.0,67.0,17.0,10265.0


In [0]:
weather.printSchema()

In [0]:
stations2 = stations
prefix = 'ORIGIN'
stations3 = stations2.select([col(c).alias(prefix+c) for c in stations2.columns])
stations3.display()

In [0]:
flights_w_weather_temp2 = flights_w_stations.join(weather, (flights_w_stations.ORIGIN_WEATHER_STATION_ID == weather.STATION) &\
                                                          (flights_w_stations.CRS_DEP_TIME_UTC_HOUR == weather.HOUR))

flights_w_weather_temp2.display()

YEAR,QUARTER,MONTH,DAY_OF_MONTH,DAY_OF_WEEK,FL_DATE,OP_UNIQUE_CARRIER,OP_CARRIER_AIRLINE_ID,OP_CARRIER,TAIL_NUM,OP_CARRIER_FL_NUM,ORIGIN_AIRPORT_ID,ORIGIN_AIRPORT_SEQ_ID,ORIGIN_CITY_MARKET_ID,ORIGIN,ORIGIN_CITY_NAME,ORIGIN_STATE_ABR,ORIGIN_STATE_FIPS,ORIGIN_STATE_NM,ORIGIN_WAC,DEST_AIRPORT_ID,DEST_AIRPORT_SEQ_ID,DEST_CITY_MARKET_ID,DEST,DEST_CITY_NAME,DEST_STATE_ABR,DEST_STATE_FIPS,DEST_STATE_NM,DEST_WAC,CRS_DEP_TIME,DEP_DEL15,DEP_DELAY_GROUP,DEP_TIME_BLK,TAXI_OUT,WHEELS_OFF,WHEELS_ON,TAXI_IN,CRS_ARR_TIME,CANCELLED,DIVERTED,CRS_ELAPSED_TIME,AIR_TIME,FLIGHTS,DISTANCE,DISTANCE_GROUP,ID,IATA_ORIGIN,ICAO_ORIGIN,AIRPORT_LAT_ORIGIN,AIRPORT_LONG_ORIGIN,AIRPORT_TIMEZONE_ORIGIN,AIRPORT_UTC_OFFSET_ORIGIN,IATA_DEST,ICAO_DEST,AIRPORT_LAT_DEST,AIRPORT_LONG_DEST,AIRPORT_TIMEZONE_DEST,AIRPORT_UTC_OFFSET_DEST,CRS_DEP_TIME_UTC,CRS_ARR_TIME_UTC,CRS_DEP_TIME_UTC_HOUR,CRS_ARR_TIME_UTC_HOUR,ORIGIN_WEATHER_STATION_ID,DEST_WEATHER_STATION_ID,STATION,DATE,LATITUDE,LONGITUDE,ELEVATION,NAME,HOUR,WND_DIRECTION_ANGLE,WND_TYPE_CODE,WND_SPEED_RATE,CIG_CEILING_HEIGHT_DIMENSION,CIG_CEILING_DETERMINATION_CODE,CIG_CAVOK_CODE,VIS_DISTANCE_DIMENSION,VIS_VARIABILITY_CODE,TMP_AIR_TEMP,DEW_POINT_TEMP,SLP_SEA_LEVEL_PRES,VALID_WEATHER_DATA,WND_DIRECTION_ANGLE-AVG,WND_SPEED_RATE-AVG,CIG_CEILING_HEIGHT_DIMENSION-AVG,VIS_DISTANCE_DIMENSION-AVG,TMP_AIR_TEMP-AVG,DEW_POINT_TEMP-AVG,SLP_SEA_LEVEL_PRES-AVG
2015,1,2,12,4,2015-02-12,AA,19805,AA,N4YRAA,1065,13930,1393003,30977,ORD,"Chicago, IL",IL,17,Illinois,41,10721,1072102,30721,BOS,"Boston, MA",MA,25,Massachusetts,13,1300,0.0,0,1300-1359,16.0,1324,1616.0,6.0,1613,0.0,0.0,133.0,112.0,1.0,867.0,4,2015-02-12N4YRAAORD1300,ORD,KORD,41.9786,-87.9048,America/Chicago,-6,BOS,KBOS,42.36429977,-71.00520325,America/New_York,-5,2015-02-12T19:00:00.000+0000,2015-02-12T21:13:00.000+0000,2015-02-12T19:00:00.000+0000,2015-02-12T21:00:00.000+0000,72530094846,72509014739,72530094846,2015-02-12T19:51:00.000+0000,41.995,-87.9336,201.8,"CHICAGO OHARE INTERNATIONAL AIRPORT, IL US",2015-02-12T19:00:00.000+0000,350,N,46,22000,9,N,16093,N,-89,-183,10318.0,1,350.0,46.0,22000.0,16093.0,-89.0,-183.0,10318.0
2015,1,2,12,4,2015-02-12,AA,19805,AA,N3MEAA,1644,13930,1393003,30977,ORD,"Chicago, IL",IL,17,Illinois,41,14908,1490803,32575,SNA,"Santa Ana, CA",CA,6,California,91,1345,1.0,1,1300-1359,13.0,1421,1553.0,7.0,1608,0.0,0.0,263.0,212.0,1.0,1726.0,7,2015-02-12N3MEAAORD1345,ORD,KORD,41.9786,-87.9048,America/Chicago,-6,SNA,KSNA,33.67570114,-117.8679962,America/Los_Angeles,-8,2015-02-12T19:45:00.000+0000,2015-02-13T00:08:00.000+0000,2015-02-12T19:00:00.000+0000,2015-02-13T00:00:00.000+0000,72530094846,72297793184,72530094846,2015-02-12T19:51:00.000+0000,41.995,-87.9336,201.8,"CHICAGO OHARE INTERNATIONAL AIRPORT, IL US",2015-02-12T19:00:00.000+0000,350,N,46,22000,9,N,16093,N,-89,-183,10318.0,1,350.0,46.0,22000.0,16093.0,-89.0,-183.0,10318.0
2015,1,2,12,4,2015-02-12,MQ,20398,MQ,N650MQ,2941,13930,1393003,30977,ORD,"Chicago, IL",IL,17,Illinois,41,11267,1126702,31267,DAY,"Dayton, OH",OH,39,Ohio,44,1305,0.0,-1,1300-1359,10.0,1310,1456.0,3.0,1509,0.0,0.0,64.0,46.0,1.0,240.0,1,2015-02-12N650MQORD1305,ORD,KORD,41.9786,-87.9048,America/Chicago,-6,DAY,KDAY,39.90240097045898,-84.21939849853516,America/New_York,-5,2015-02-12T19:05:00.000+0000,2015-02-12T20:09:00.000+0000,2015-02-12T19:00:00.000+0000,2015-02-12T20:00:00.000+0000,72530094846,72429093815,72530094846,2015-02-12T19:51:00.000+0000,41.995,-87.9336,201.8,"CHICAGO OHARE INTERNATIONAL AIRPORT, IL US",2015-02-12T19:00:00.000+0000,350,N,46,22000,9,N,16093,N,-89,-183,10318.0,1,350.0,46.0,22000.0,16093.0,-89.0,-183.0,10318.0
2015,1,2,12,4,2015-02-12,MQ,20398,MQ,N528MQ,3020,13930,1393003,30977,ORD,"Chicago, IL",IL,17,Illinois,41,11042,1104203,30647,CLE,"Cleveland, OH",OH,39,Ohio,44,1323,1.0,4,1300-1359,18.0,1448,1639.0,5.0,1535,0.0,0.0,72.0,51.0,1.0,315.0,2,2015-02-12N528MQORD1323,ORD,KORD,41.9786,-87.9048,America/Chicago,-6,CLE,KCLE,41.4117012024,-81.8498001099,America/New_York,-5,2015-02-12T19:23:00.000+0000,2015-02-12T20:35:00.000+0000,2015-02-12T19:00:00.000+0000,2015-02-12T20:00:00.000+0000,72530094846,72524014820,72530094846,2015-02-12T19:51:00.000+0000,41.995,-87.9336,201.8,"CHICAGO OHARE INTERNATIONAL AIRPORT, IL US",2015-02-12T19:00:00.000+0000,350,N,46,22000,9,N,16093,N,-89,-183,10318.0,1,350.0,46.0,22000.0,16093.0,-89.0,-183.0,10318.0
2015,1,2,12,4,2015-02-12,MQ,20398,MQ,N849MQ,3126,13930,1393003,30977,ORD,"Chicago, IL",IL,17,Illinois,41,11612,1161203,31612,EVV,"Evansville, IN",IN,18,Indiana,42,1310,0.0,0,1300-1359,10.0,1320,1355.0,3.0,1414,0.0,0.0,64.0,35.0,1.0,273.0,2,2015-02-12N849MQORD1310,ORD,KORD,41.9786,-87.9048,America/Chicago,-6,EVV,KEVV,38.0369987488,-87.5324020386,America/Chicago,-6,2015-02-12T19:10:00.000+0000,2015-02-12T20:14:00.000+0000,2015-02-12T19:00:00.000+0000,2015-02-12T20:00:00.000+0000,72530094846,72432093817,72530094846,2015-02-12T19:51:00.000+0000,41.995,-87.9336,201.8,"CHICAGO OHARE INTERNATIONAL AIRPORT, IL US",2015-02-12T19:00:00.000+0000,350,N,46,22000,9,N,16093,N,-89,-183,10318.0,1,350.0,46.0,22000.0,16093.0,-89.0,-183.0,10318.0
2015,1,2,12,4,2015-02-12,MQ,20398,MQ,N833MQ,3321,13930,1393003,30977,ORD,"Chicago, IL",IL,17,Illinois,41,11203,1120302,30424,CWA,"Mosinee, WI",WI,55,Wisconsin,45,1345,0.0,0,1300-1359,17.0,1415,1456.0,4.0,1448,0.0,0.0,63.0,41.0,1.0,212.0,1,2015-02-12N833MQORD1345,ORD,KORD,41.9786,-87.9048,America/Chicago,-6,CWA,KCWA,44.7775993347,-89.6668014526,America/Chicago,-6,2015-02-12T19:45:00.000+0000,2015-02-12T20:48:00.000+0000,2015-02-12T19:00:00.000+0000,2015-02-12T20:00:00.000+0000,72530094846,72646594890,72530094846,2015-02-12T19:51:00.000+0000,41.995,-87.9336,201.8,"CHICAGO OHARE INTERNATIONAL AIRPORT, IL US",2015-02-12T19:00:00.000+0000,350,N,46,22000,9,N,16093,N,-89,-183,10318.0,1,350.0,46.0,22000.0,16093.0,-89.0,-183.0,10318.0
2015,1,2,12,4,2015-02-12,MQ,20398,MQ,N507MQ,3339,13930,1393003,30977,ORD,"Chicago, IL",IL,17,Illinois,41,15096,1509602,35096,SYR,"Syracuse, NY",NY,36,New York,22,1325,0.0,0,1300-1359,20.0,1347,1610.0,3.0,1603,0.0,0.0,98.0,83.0,1.0,607.0,3,2015-02-12N507MQORD1325,ORD,KORD,41.9786,-87.9048,America/Chicago,-6,SYR,KSYR,43.11119842529297,-76.1063003540039,America/New_York,-5,2015-02-12T19:25:00.000+0000,2015-02-12T21:03:00.000+0000,2015-02-12T19:00:00.000+0000,2015-02-12T21:00:00.000+0000,72530094846,72519014771,72530094846,2015-02-12T19:51:00.000+0000,41.995,-87.9336,201.8,"CHICAGO OHARE INTERNATIONAL AIRPORT, IL US",2015-02-12T19:00:00.000+0000,350,N,46,22000,9,N,16093,N,-89,-183,10318.0,1,350.0,46.0,22000.0,16093.0,-89.0,-183.0,10318.0
2015,1,2,12,4,2015-02-12,MQ,20398,MQ,N942MQ,3357,13930,1393003,30977,ORD,"Chicago, IL",IL,17,Illinois,41,11977,1197702,31977,GRB,"Green Bay, WI",WI,55,Wisconsin,45,1315,0.0,-1,1300-1359,15.0,1324,1357.0,3.0,1407,0.0,0.0,52.0,33.0,1.0,173.0,1,2015-02-12N942MQORD1315,ORD,KORD,41.9786,-87.9048,America/Chicago,-6,GRB,KGRB,44.48509979248047,-88.12960052490234,America/Chicago,-6,2015-02-12T19:15:00.000+0000,2015-02-12T20:07:00.000+0000,2015-02-12T19:00:00.000+0000,2015-02-12T20:00:00.000+0000,72530094846,72645014898,72530094846,2015-02-12T19:51:00.000+0000,41.995,-87.9336,201.8,"CHICAGO OHARE INTERNATIONAL AIRPORT, IL US",2015-02-12T19:00:00.000+0000,350,N,46,22000,9,N,16093,N,-89,-183,10318.0,1,350.0,46.0,22000.0,16093.0,-89.0,-183.0,10318.0
2015,1,2,12,4,2015-02-12,OO,20304,OO,N963SW,5344,13930,1393003,30977,ORD,"Chicago, IL",IL,17,Illinois,41,11337,1133703,31337,DLH,"Duluth, MN",MN,27,Minnesota,63,1310,1.0,2,1300-1359,20.0,1404,1508.0,6.0,1445,0.0,0.0,95.0,64.0,1.0,397.0,2,2015-02-12N963SWORD1310,ORD,KORD,41.9786,-87.9048,America/Chicago,-6,DLH,KDLH,46.8420982361,-92.1936035156,America/Chicago,-6,2015-02-12T19:10:00.000+0000,2015-02-12T20:45:00.000+0000,2015-02-12T19:00:00.000+0000,2015-02-12T20:00:00.000+0000,72530094846,72745014913,72530094846,2015-02-12T19:51:00.000+0000,41.995,-87.9336,201.8,"CHICAGO OHARE INTERNATIONAL AIRPORT, IL US",2015-02-12T19:00:00.000+0000,350,N,46,22000,9,N,16093,N,-89,-183,10318.0,1,350.0,46.0,22000.0,16093.0,-89.0,-183.0,10318.0
2015,1,2,12,4,2015-02-12,UA,19977,UA,N827UA,486,13930,1393003,30977,ORD,"Chicago, IL",IL,17,Illinois,41,10529,1052904,30529,BDL,"Hartford, CT",CT,9,Connecticut,11,1310,0.0,0,1300-1359,16.0,1337,1613.0,5.0,1616,0.0,0.0,126.0,96.0,1.0,783.0,4,2015-02-12N827UAORD1310,ORD,KORD,41.9786,-87.9048,America/Chicago,-6,BDL,KBDL,41.9388999939,-72.68319702149999,America/New_York,-5,2015-02-12T19:10:00.000+0000,2015-02-12T21:16:00.000+0000,2015-02-12T19:00:00.000+0000,2015-02-12T21:00:00.000+0000,72530094846,72508014740,72530094846,2015-02-12T19:51:00.000+0000,41.995,-87.9336,201.8,"CHICAGO OHARE INTERNATIONAL AIRPORT, IL US",2015-02-12T19:00:00.000+0000,350,N,46,22000,9,N,16093,N,-89,-183,10318.0,1,350.0,46.0,22000.0,16093.0,-89.0,-183.0,10318.0


In [0]:
weather.filter((col('DATE') > "2015-02-20T23:59:59.000") & (col('DATE') < "2015-02-22T00:00:00.000")).display()

STATION,DATE,LATITUDE,LONGITUDE,ELEVATION,NAME,HOUR,WND_DIRECTION_ANGLE,WND_TYPE_CODE,WND_SPEED_RATE,CIG_CEILING_HEIGHT_DIMENSION,CIG_CEILING_DETERMINATION_CODE,CIG_CAVOK_CODE,VIS_DISTANCE_DIMENSION,VIS_VARIABILITY_CODE,TMP_AIR_TEMP,DEW_POINT_TEMP,SLP_SEA_LEVEL_PRES,VALID_WEATHER_DATA,WND_DIRECTION_ANGLE-AVG,WND_SPEED_RATE-AVG,CIG_CEILING_HEIGHT_DIMENSION-AVG,VIS_DISTANCE_DIMENSION-AVG,TMP_AIR_TEMP-AVG,DEW_POINT_TEMP-AVG,SLP_SEA_LEVEL_PRES-AVG
70000126492,2015-02-21T03:53:00.000+0000,60.785,-148.83889,31.4,"PORTAGE GLACIER VISITOR CENTER, AK US",2015-02-21T03:00:00.000+0000,100.0,N,82.0,1006.0,M,N,16093.0,N,56.0,17.0,10221.0,1,100.0,82.0,1006.0,16093.0,56.0,17.0,10221.0
70063899999,2015-02-21T09:01:00.000+0000,54.85,-163.4166666,6.0,"FALSE PASS, AK US",2015-02-21T09:00:00.000+0000,230.0,N,21.0,884.0,9,N,16093.0,,30.0,30.0,,0,230.0,21.0,884.0,16093.0,30.0,30.0,
70148626642,2015-02-21T10:01:00.000+0000,67.73167,-164.54833,3.0,"KIVALINA AIRPORT, AK US",2015-02-21T10:00:00.000+0000,70.0,N,72.0,1097.0,M,N,16093.0,N,-17.0,-33.0,,0,70.0,72.0,1097.0,16093.0,-17.0,-33.0,
70204026703,2015-02-21T10:56:00.000+0000,63.76667,-171.73278,8.5,"GAMBELL AIRPORT, AK US",2015-02-21T10:00:00.000+0000,30.0,N,144.0,335.0,M,N,1207.0,N,-130.0,-150.0,,0,30.0,144.0,335.0,1207.0,-130.0,-150.0,
70222326602,2015-02-21T13:36:00.000+0000,64.935,-161.155,33.5,"KOYUK ALFRED ADAMS AIRPORT, AK US",2015-02-21T13:00:00.000+0000,30.0,N,57.0,2591.0,M,N,16093.0,N,,,,0,30.0,57.0,2591.0,16093.0,,,
70333325518,2015-02-21T18:56:00.000+0000,56.31139,-158.37306,5.5,"CHIGNIK AIRPORT, AK US",2015-02-21T18:00:00.000+0000,110.0,N,46.0,884.0,M,N,16093.0,N,60.0,20.0,,0,110.0,46.0,884.0,16093.0,60.0,20.0,
70350025501,2015-02-21T05:51:00.000+0000,57.75111,-152.48556,24.4,"KODIAK AIRPORT, AK US",2015-02-21T05:00:00.000+0000,50.0,N,72.0,152.0,M,N,4023.0,N,60.0,60.0,,0,50.0,72.0,152.0,4023.0,60.0,60.0,
70360525604,2015-02-21T20:56:00.000+0000,59.01139,-161.81972,4.6,"PLATINUM AIRPORT, AK US",2015-02-21T20:00:00.000+0000,110.0,N,82.0,488.0,M,N,4023.0,N,38.0,27.0,9963.0,1,110.0,82.0,488.0,4023.0,38.0,27.0,9963.0
70381025309,2015-02-21T06:00:00.000+0000,58.3566,-134.564,4.9,"JUNEAU AIRPORT, AK US",2015-02-21T06:00:00.000+0000,,C,0.0,,9,N,9000.0,,0.0,-6.0,10308.0,0,70.0,0.0,1433.0,9000.0,0.0,-6.0,10308.0
70392500112,2015-02-21T18:56:00.000+0000,59.733,-157.267,82.3,"KOLIGANEK AIRPORT, AK US",2015-02-21T18:00:00.000+0000,130.0,N,154.0,1676.0,M,N,16093.0,N,38.0,5.0,10023.0,1,130.0,154.0,1676.0,16093.0,38.0,5.0,10023.0


In [0]:
flights.where(f.col('CRS_DEP_TIME_UTC_HOUR') == '2015-02-26T13:00:00.000+0000').distinct().display()

YEAR,QUARTER,MONTH,DAY_OF_MONTH,DAY_OF_WEEK,FL_DATE,OP_UNIQUE_CARRIER,OP_CARRIER_AIRLINE_ID,OP_CARRIER,TAIL_NUM,OP_CARRIER_FL_NUM,ORIGIN_AIRPORT_ID,ORIGIN_AIRPORT_SEQ_ID,ORIGIN_CITY_MARKET_ID,ORIGIN,ORIGIN_CITY_NAME,ORIGIN_STATE_ABR,ORIGIN_STATE_FIPS,ORIGIN_STATE_NM,ORIGIN_WAC,DEST_AIRPORT_ID,DEST_AIRPORT_SEQ_ID,DEST_CITY_MARKET_ID,DEST,DEST_CITY_NAME,DEST_STATE_ABR,DEST_STATE_FIPS,DEST_STATE_NM,DEST_WAC,CRS_DEP_TIME,DEP_DEL15,DEP_DELAY_GROUP,DEP_TIME_BLK,TAXI_OUT,WHEELS_OFF,WHEELS_ON,TAXI_IN,CRS_ARR_TIME,CANCELLED,DIVERTED,CRS_ELAPSED_TIME,AIR_TIME,FLIGHTS,DISTANCE,DISTANCE_GROUP,ID,IATA_ORIGIN,ICAO_ORIGIN,AIRPORT_LAT_ORIGIN,AIRPORT_LONG_ORIGIN,AIRPORT_TIMEZONE_ORIGIN,AIRPORT_UTC_OFFSET_ORIGIN,IATA_DEST,ICAO_DEST,AIRPORT_LAT_DEST,AIRPORT_LONG_DEST,AIRPORT_TIMEZONE_DEST,AIRPORT_UTC_OFFSET_DEST,CRS_DEP_TIME_UTC,CRS_ARR_TIME_UTC,CRS_DEP_TIME_UTC_HOUR,CRS_ARR_TIME_UTC_HOUR
2015,1,2,26,4,2015-02-26,OO,20304,OO,N809SK,4478,13930,1393003,30977,ORD,"Chicago, IL",IL,17,Illinois,41,14869,1486903,34614,SLC,"Salt Lake City, UT",UT,49,Utah,87,750,1.0,1,0700-0759,47.0,905,1056,8.0,1029,0.0,0.0,219.0,171.0,1.0,1250.0,6,2015-02-26N809SKORD750,ORD,KORD,41.9786,-87.9048,America/Chicago,-6,SLC,KSLC,40.78839874267578,-111.97799682617188,America/Denver,-7,2015-02-26T13:50:00.000+0000,2015-02-26T17:29:00.000+0000,2015-02-26T13:00:00.000+0000,2015-02-26T17:00:00.000+0000
2015,1,2,26,4,2015-02-26,MQ,20398,MQ,N611MQ,3392,13930,1393003,30977,ORD,"Chicago, IL",IL,17,Illinois,41,11042,1104203,30647,CLE,"Cleveland, OH",OH,39,Ohio,44,730,0.0,-1,0700-0759,52.0,814,1005,5.0,944,0.0,0.0,74.0,51.0,1.0,315.0,2,2015-02-26N611MQORD730,ORD,KORD,41.9786,-87.9048,America/Chicago,-6,CLE,KCLE,41.4117012024,-81.8498001099,America/New_York,-5,2015-02-26T13:30:00.000+0000,2015-02-26T14:44:00.000+0000,2015-02-26T13:00:00.000+0000,2015-02-26T14:00:00.000+0000
2015,1,2,26,4,2015-02-26,WN,19393,WN,N791SW,513,10397,1039705,30397,ATL,"Atlanta, GA",GA,13,Georgia,34,15304,1530402,33195,TPA,"Tampa, FL",FL,12,Florida,33,850,0.0,-1,0800-0859,15.0,903,1007,6.0,1020,0.0,0.0,90.0,64.0,1.0,406.0,2,2015-02-26N791SWATL850,ATL,KATL,33.6367,-84.428101,America/New_York,-5,TPA,KTPA,27.975500106811523,-82.533203125,America/New_York,-5,2015-02-26T13:50:00.000+0000,2015-02-26T15:20:00.000+0000,2015-02-26T13:00:00.000+0000,2015-02-26T15:00:00.000+0000
2015,1,2,26,4,2015-02-26,UA,19977,UA,N38473,1168,13930,1393003,30977,ORD,"Chicago, IL",IL,17,Illinois,41,12892,1289203,32575,LAX,"Los Angeles, CA",CA,6,California,91,705,0.0,0,0700-0759,29.0,737,928,15.0,940,0.0,0.0,275.0,231.0,1.0,1744.0,7,2015-02-26N38473ORD705,ORD,KORD,41.9786,-87.9048,America/Chicago,-6,LAX,KLAX,33.94250107,-118.4079971,America/Los_Angeles,-8,2015-02-26T13:05:00.000+0000,2015-02-26T17:40:00.000+0000,2015-02-26T13:00:00.000+0000,2015-02-26T17:00:00.000+0000
2015,1,2,26,4,2015-02-26,EV,20366,EV,N873AS,5162,10397,1039705,30397,ATL,"Atlanta, GA",GA,13,Georgia,34,11641,1164102,31641,FAY,"Fayetteville, NC",NC,37,North Carolina,36,820,0.0,-1,0800-0859,16.0,833,927,7.0,931,0.0,0.0,71.0,54.0,1.0,331.0,2,2015-02-26N873ASATL820,ATL,KATL,33.6367,-84.428101,America/New_York,-5,FAY,KFAY,34.9911994934082,-78.88030242919922,America/New_York,-5,2015-02-26T13:20:00.000+0000,2015-02-26T14:31:00.000+0000,2015-02-26T13:00:00.000+0000,2015-02-26T14:00:00.000+0000
2015,1,2,26,4,2015-02-26,WN,19393,WN,N631SW,1368,10397,1039705,30397,ATL,"Atlanta, GA",GA,13,Georgia,34,13198,1319801,33198,MCI,"Kansas City, MO",MO,29,Missouri,64,850,0.0,-1,0800-0859,14.0,901,953,5.0,1010,0.0,0.0,140.0,112.0,1.0,692.0,3,2015-02-26N631SWATL850,ATL,KATL,33.6367,-84.428101,America/New_York,-5,MCI,KMCI,39.2976,-94.713898,America/Chicago,-6,2015-02-26T13:50:00.000+0000,2015-02-26T16:10:00.000+0000,2015-02-26T13:00:00.000+0000,2015-02-26T16:00:00.000+0000
2015,1,2,26,4,2015-02-26,DL,19790,DL,N6714Q,1294,10397,1039705,30397,ATL,"Atlanta, GA",GA,13,Georgia,34,14492,1449202,34492,RDU,"Raleigh/Durham, NC",NC,37,North Carolina,36,859,1.0,4,0800-0859,15.0,1022,1110,8.0,1020,0.0,0.0,81.0,48.0,1.0,356.0,2,2015-02-26N6714QATL859,ATL,KATL,33.6367,-84.428101,America/New_York,-5,RDU,KRDU,35.87760162353516,-78.7874984741211,America/New_York,-5,2015-02-26T13:59:00.000+0000,2015-02-26T15:20:00.000+0000,2015-02-26T13:00:00.000+0000,2015-02-26T15:00:00.000+0000
2015,1,2,26,4,2015-02-26,DL,19790,DL,N945DN,1358,10397,1039705,30397,ATL,"Atlanta, GA",GA,13,Georgia,34,11057,1105703,31057,CLT,"Charlotte, NC",NC,37,North Carolina,36,850,0.0,0,0800-0859,12.0,911,949,6.0,1004,0.0,0.0,74.0,38.0,1.0,226.0,1,2015-02-26N945DNATL850,ATL,KATL,33.6367,-84.428101,America/New_York,-5,CLT,KCLT,35.2140007019043,-80.94309997558594,America/New_York,-5,2015-02-26T13:50:00.000+0000,2015-02-26T15:04:00.000+0000,2015-02-26T13:00:00.000+0000,2015-02-26T15:00:00.000+0000
2015,1,2,26,4,2015-02-26,WN,19393,WN,N652SW,319,10397,1039705,30397,ATL,"Atlanta, GA",GA,13,Georgia,34,10721,1072102,30721,BOS,"Boston, MA",MA,25,Massachusetts,13,850,1.0,1,0800-0859,20.0,935,1348,9.0,1125,0.0,1.0,155.0,,1.0,946.0,4,2015-02-26N652SWATL850,ATL,KATL,33.6367,-84.428101,America/New_York,-5,BOS,KBOS,42.36429977,-71.00520325,America/New_York,-5,2015-02-26T13:50:00.000+0000,2015-02-26T16:25:00.000+0000,2015-02-26T13:00:00.000+0000,2015-02-26T16:00:00.000+0000
2015,1,2,26,4,2015-02-26,F9,20436,F9,N216FR,1070,13930,1393003,30977,ORD,"Chicago, IL",IL,17,Illinois,41,10397,1039705,30397,ATL,"Atlanta, GA",GA,13,Georgia,34,725,1.0,4,0700-0759,52.0,931,1202,8.0,1020,0.0,0.0,115.0,91.0,1.0,606.0,3,2015-02-26N216FRORD725,ORD,KORD,41.9786,-87.9048,America/Chicago,-6,ATL,KATL,33.6367,-84.428101,America/New_York,-5,2015-02-26T13:25:00.000+0000,2015-02-26T15:20:00.000+0000,2015-02-26T13:00:00.000+0000,2015-02-26T15:00:00.000+0000


In [0]:
flights_in = spark.read.parquet(JOINED_3M_PROCESSED_PATH).cache()flights_in = spark.read.parquet(JOINED_3M_PROCESSED_PATH).cache()