In [1]:
import os
import sys
import pandas as pd
import numpy as np
sys.path.append(os.getcwd() + '/Modules/')
from utc_converter import utc_converter
from metar_decoder import metar_decoder
pd.set_option('display.max_columns', None)

#### Data preprocessing
---

In [30]:
# load data frame
rw = pd.read_csv("..//Datasets/NEW/RW_with_airports.csv")

In [31]:
# add time with UTC format 
rw = utc_converter(rw)

In [32]:
rw.head()

Unnamed: 0,tail_number,date,aircraft,origin_code,origin,destination_code,destination,departure,dep_UTC_time,arrival,arr_UTC_time,duration,distance_mi,Owner,origin_Latitude,origin_Longitude,destination_Latitude,destination_Longitude,airport_origin,city_origin,origin_state,country_origin,airport_destination,city_destination,destination_state,country_destination,departure_shift,arrival_shift
0,N640EC,3/11/21,EC35,KPGV,Pitt-Greenville (KPGV),KRWI,Rocky Mount-Wilson Rgnl (KRWI),04:29AM EST,09:29:00,04:46AM EST,09:46:00,16,32.296705,"METRO AVIATION INCSHREVEPORT, LA, US(Corporation)",35.635201,-77.3853,35.8563,-77.891899,Pitt Greenville Airport,Greenville,NC,US,Rocky Mount Wilson Regional Airport,Rocky Mount,NC,US,night,night
1,N640EC,3/11/21,EC35,KRWI,Rocky Mount-Wilson Rgnl (KRWI),KPGV,Pitt-Greenville (KPGV),02:17AM EST,07:17:00,03:47AM EST,08:47:00,89,32.296705,"METRO AVIATION INCSHREVEPORT, LA, US(Corporation)",35.8563,-77.891899,35.635201,-77.3853,Rocky Mount Wilson Regional Airport,Rocky Mount,NC,US,Pitt Greenville Airport,Greenville,NC,US,night,night
2,N640EC,3/11/21,EC35,KPGV,Pitt-Greenville (KPGV),KRWI,Rocky Mount-Wilson Rgnl (KRWI),01:15AM EST,06:15:00,01:30AM EST,06:30:00,15,32.296705,"METRO AVIATION INCSHREVEPORT, LA, US(Corporation)",35.635201,-77.3853,35.8563,-77.891899,Pitt Greenville Airport,Greenville,NC,US,Rocky Mount Wilson Regional Airport,Rocky Mount,NC,US,night,night
3,N640EC,3/10/21,EC35,KIXA,"Roanoke Rapids, NC",KPGV,Pitt-Greenville (KPGV),11:33PM EST,04:33:00,12:02AM EST,05:02:00,28,54.850847,"METRO AVIATION INCSHREVEPORT, LA, US(Corporation)",36.40384,-77.63786,35.635201,-77.3853,Halifax-Northampton Regional Airport,Roanoke Rapids,NC,US,Pitt Greenville Airport,Greenville,NC,US,night,night
4,N640EC,3/10/21,EC35,KRWI,Rocky Mount-Wilson Rgnl (KRWI),KIXA,"Roanoke Rapids, NC",08:50PM EST,01:50:00,09:31PM EST,02:31:00,41,41.163189,"METRO AVIATION INCSHREVEPORT, LA, US(Corporation)",35.8563,-77.891899,36.41391,-77.62894,Rocky Mount Wilson Regional Airport,Rocky Mount,NC,US,Halifax-Northampton Regional Airport,Roanoke Rapids,NC,US,night,night


In [33]:
# convert date to datetime
rw.date = pd.to_datetime(rw.date, format='%m/%d/%y')

In [34]:
# split ICAO_code
rw['ICAO_origin'] = rw.origin.str.rsplit("(", n = 1, expand = True)[1].str.split(")", n = 1, expand = True)[0]
rw['ICAO_destination'] = rw.destination.str.rsplit("(", n = 1, expand = True)[1].str.split(")", n = 1, expand = True)[0]

In [35]:
# Split dual ICAO and take the last one
rw.loc[rw.ICAO_origin.str.contains(' / ', na=False), 'ICAO_origin'] = rw.loc[rw.ICAO_origin.str.contains(' / ', na=False), 'ICAO_origin']\
                                                                        .str.split(" / ", expand=True, n=1)[1]

rw.loc[rw.ICAO_destination.str.contains(' / ', na=False), 'ICAO_destination'] = rw.loc[rw.ICAO_destination.str.contains(' / ', na=False), 'ICAO_destination']\
                                                                                  .str.split(" / ", expand=True, n=1)[1]

In [36]:
# replace null ICAO code by airport code
rw.loc[rw.ICAO_origin.isna(), 'ICAO_origin'] = rw.loc[rw.ICAO_origin.isna(), 'origin_code'] 
rw.loc[rw.ICAO_destination.isna(), 'ICAO_destination'] = rw.loc[rw.ICAO_destination.isna(), 'destination_code']

In [37]:
# Transform aita code to icao
rw.loc[~rw.ICAO_origin.str.startswith('K', na=False), 'ICAO_origin'] = 'K' + rw.loc[~rw.ICAO_origin.str.startswith('K', na=False), 'ICAO_origin']
rw.loc[~rw.ICAO_destination.str.startswith('K', na=False), 'ICAO_destination'] = 'K' + rw.loc[~rw.ICAO_destination.str.startswith('K', na=False), 'ICAO_destination']

In [38]:
# extract hour from dep_UTC_time and assign a new variable
rw['dep_UTC_hour'] = pd.to_datetime(rw.dep_UTC_time.astype('str'), format='%H:%M:%S', errors='coerce').dt.hour.astype('str')
rw['arr_UTC_hour'] = pd.to_datetime(rw.arr_UTC_time.astype('str'), format='%H:%M:%S', errors='coerce').dt.hour.astype('str')

In [39]:
# sort by 'tail_number', 'date', 'dep_UTC_hour'
rw = rw.sort_values(['tail_number', 'date', 'dep_UTC_hour']).reset_index(drop=True)

In [40]:
# inspect data
rw

Unnamed: 0,tail_number,date,aircraft,origin_code,origin,destination_code,destination,departure,dep_UTC_time,arrival,arr_UTC_time,duration,distance_mi,Owner,origin_Latitude,origin_Longitude,destination_Latitude,destination_Longitude,airport_origin,city_origin,origin_state,country_origin,airport_destination,city_destination,destination_state,country_destination,departure_shift,arrival_shift,ICAO_origin,ICAO_destination,dep_UTC_hour,arr_UTC_hour
0,N101AE,2020-12-04,B06,II29,Owens Fld (II29),0IN7,"Indianapolis, IN",08:04PM EST,01:04:00,08:58PM EST,01:58:00,54,21.173583,"AIR EVAC EMS INCO FALLON, MO, US(Corporation)",39.610901,-86.756104,39.716670,-86.383330,Owens Field,Greencastle,IN,US,Roto-Whirl/Holiday Heliport,Indianapolis,IN,US,night,night,KII29,K0IN7,1,1
1,N101AE,2020-12-04,B06,K2R2,Hendricks County-Gordon Graham Fld (2R2),8IN9,Marcidale (8IN9),10:44PM EST,03:44:00,11:44PM EST,04:44:00,60,8.086743,"AIR EVAC EMS INCO FALLON, MO, US(Corporation)",39.748100,-86.473801,39.685600,-86.602203,Hendricks County Gordon Graham Field,Indianapolis,IN,US,Marcidale Airport,Amo,IN,US,night,night,K2R2,K8IN9,3,4
2,N101AE,2020-12-05,B06,38II,"Greencastle, IN",0IN7,"Indianapolis, IN",01:12PM EST,18:12:00,04:44PM EST,21:44:00,212,18.618289,"AIR EVAC EMS INCO FALLON, MO, US(Corporation)",39.650000,-86.750000,39.722050,-86.413390,Hampton Field,Greencastle,IN,US,Roto-Whirl/Holiday Heliport,Indianapolis,IN,US,day,day,K38II,K0IN7,18,21
3,N101AE,2020-12-07,B06,07I,"Bloomington, IN",KEYE,Eagle Creek Airpark (KEYE),08:22PM EST,01:22:00,10:37PM EST,03:37:00,134,56.379143,"AIR EVAC EMS INCO FALLON, MO, US(Corporation)",39.060840,-86.648100,39.830700,-86.294403,Lake Monroe Seaplane Base,Bloomington,IN,US,Eagle Creek Airpark,Indianapolis,IN,US,night,night,K07I,KEYE,1,3
4,N101AE,2020-12-07,B06,38II,Hampton Fld (38II),38II,"Greencastle, IN",12:46PM EST,17:46:00,02:00PM EST,19:00:00,74,7.663635,"AIR EVAC EMS INCO FALLON, MO, US(Corporation)",39.728611,-86.823056,39.665580,-86.704650,Hampton Field,Greencastle,IN,US,Hampton Field,Greencastle,IN,US,day,day,K38II,K38II,17,19
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96069,N9NJ,2021-03-10,A139,KN81,Hammonton Muni (N81),KTTN,Trenton Mercer (KTTN),12:44PM EST,17:44:00,01:05PM EST,18:05:00,20,42.134984,"STATE OF NEW JERSEYTRENTON, NJ, US(Government)",39.667500,-74.757698,40.276699,-74.813499,Hammonton Municipal Airport,Hammonton,NJ,US,Trenton Mercer Airport,Trenton,NJ,US,day,day,KN81,KTTN,17,18
96070,N9SP,2021-02-26,,KECP,Northwest Florida Beaches Intl (KECP),27FD,"Panama City, FL",04:56PM CST,22:56:00,05:39PM CST,23:39:00,42,2.576154,"STATE OF NEW YORKALBANY, NY, US(Government)",30.357106,-85.795414,30.391940,-85.811110,Northwest Florida Beaches International Airport,Panama City,FL,US,Coastal Helicopters Inc Heliport,Panama City,FL,US,day,day,KECP,K27FD,22,23
96071,N9SP,2021-03-03,B430,KFWQ,Rostraver (KFWQ),KAGC,Allegheny County (KAGC),05:53PM EST,22:53:00,05:59PM EST,22:59:00,6,11.266494,"STATE OF NEW YORKALBANY, NY, US(Government)",40.209702,-79.831398,40.354401,-79.930199,Rostraver Airport,Monongahela,PA,US,Allegheny County Airport,Pittsburgh,PA,US,day,day,KFWQ,KAGC,22,22
96072,N9SP,2021-03-03,B430,KAGC,Allegheny County (KAGC),KALB,Albany Intl (KALB),06:30PM EST,23:30:00,08:40PM EST,01:40:00,129,357.967476,"STATE OF NEW YORKALBANY, NY, US(Government)",40.354401,-79.930199,42.748299,-73.801697,Allegheny County Airport,Pittsburgh,PA,US,Albany International Airport,Albany,NY,US,day,night,KAGC,KALB,23,1


#### METAR data merging for Origin Location
---

In [None]:
# # scrape METAR data for Origin
# from metar_scraper_origin import metar_scraper_origin
# metar_scraper_origin(rw)

In [41]:
# load data frame
metar_origin = pd.read_csv("..//Datasets/NEW/METAR/RW_METAR_origin.csv")

In [42]:
# split date and time. Extract only hours  
metar_origin['hour'] = pd.to_datetime(metar_origin.date, format='%d/%m/%Y %H:%M').dt.hour.astype('str')
metar_origin['date'] = pd.to_datetime(metar_origin.date, format='%d/%m/%Y %H:%M').dt.date.astype('str')

In [43]:
# convert date to string
rw.date = rw.date.astype('str')

In [44]:
# sort by 'tail_number', 'date', 'hour' and drop duplicates
metar_origin = metar_origin.sort_values(['tail_number', 'date', 'hour']).drop_duplicates(subset=['tail_number', 'ICAO_origin', 'date', 'hour'])\
                           .reset_index(drop=True)

In [45]:
# inspect data
metar_origin.head()

Unnamed: 0,tail_number,ICAO_origin,date,METAR,hour
0,N101AE,KGPC,2020-12-09,METAR KGPC 090355Z AUTO 21009KT 10SM CLR 01/M0...,3
1,N101AE,KGPC,2020-12-09,METAR KGPC 090455Z AUTO 22008KT 10SM CLR 01/M0...,4
2,N101AE,KIND,2020-12-18,METAR KIND 182354Z 15007KT 10SM SCT220 OVC250 ...,23
3,N101AE,KIND,2020-12-19,METAR KIND 190054Z 16010KT 10SM OVC230 01/M04 ...,0
4,N101AE,KIND,2020-12-29,METAR KIND 292154Z 11013KT 10SM BKN090 OVC140 ...,21


In [46]:
# merge by 'tail_number', 'date', 'ICAO_origin', 'dep_UTC_hour'
rw = rw.merge(metar_origin.add_suffix('_origin'), how='left', left_on=['tail_number', 'date', 'ICAO_origin', 'dep_UTC_hour'],
        right_on=['tail_number_origin', 'date_origin', 'ICAO_origin_origin', 'hour_origin'])\
        .drop(['ICAO_origin', 'dep_UTC_hour','tail_number_origin', 'ICAO_origin_origin', 'date_origin', 'hour_origin'], axis=1)

In [54]:
# check for null values
rw.METAR_origin.isna().sum()

41750

In [53]:
# percent of merged metar origin data
print(round(len(rw.loc[rw.METAR_origin.notnull()])/ len(rw) * 100,2), "%")

56.54 %


In [55]:
rw.loc[rw.METAR_origin.isna()]

Unnamed: 0,tail_number,date,aircraft,origin_code,origin,destination_code,destination,departure,dep_UTC_time,arrival,arr_UTC_time,duration,distance_mi,Owner,origin_Latitude,origin_Longitude,destination_Latitude,destination_Longitude,airport_origin,city_origin,origin_state,country_origin,airport_destination,city_destination,destination_state,country_destination,departure_shift,arrival_shift,ICAO_destination,arr_UTC_hour,METAR_origin
0,N101AE,2020-12-04,B06,II29,Owens Fld (II29),0IN7,"Indianapolis, IN",08:04PM EST,01:04:00,08:58PM EST,01:58:00,54,21.173583,"AIR EVAC EMS INCO FALLON, MO, US(Corporation)",39.610901,-86.756104,39.716670,-86.383330,Owens Field,Greencastle,IN,US,Roto-Whirl/Holiday Heliport,Indianapolis,IN,US,night,night,K0IN7,1,
1,N101AE,2020-12-04,B06,K2R2,Hendricks County-Gordon Graham Fld (2R2),8IN9,Marcidale (8IN9),10:44PM EST,03:44:00,11:44PM EST,04:44:00,60,8.086743,"AIR EVAC EMS INCO FALLON, MO, US(Corporation)",39.748100,-86.473801,39.685600,-86.602203,Hendricks County Gordon Graham Field,Indianapolis,IN,US,Marcidale Airport,Amo,IN,US,night,night,K8IN9,4,
2,N101AE,2020-12-05,B06,38II,"Greencastle, IN",0IN7,"Indianapolis, IN",01:12PM EST,18:12:00,04:44PM EST,21:44:00,212,18.618289,"AIR EVAC EMS INCO FALLON, MO, US(Corporation)",39.650000,-86.750000,39.722050,-86.413390,Hampton Field,Greencastle,IN,US,Roto-Whirl/Holiday Heliport,Indianapolis,IN,US,day,day,K0IN7,21,
3,N101AE,2020-12-07,B06,07I,"Bloomington, IN",KEYE,Eagle Creek Airpark (KEYE),08:22PM EST,01:22:00,10:37PM EST,03:37:00,134,56.379143,"AIR EVAC EMS INCO FALLON, MO, US(Corporation)",39.060840,-86.648100,39.830700,-86.294403,Lake Monroe Seaplane Base,Bloomington,IN,US,Eagle Creek Airpark,Indianapolis,IN,US,night,night,KEYE,3,
4,N101AE,2020-12-07,B06,38II,Hampton Fld (38II),38II,"Greencastle, IN",12:46PM EST,17:46:00,02:00PM EST,19:00:00,74,7.663635,"AIR EVAC EMS INCO FALLON, MO, US(Corporation)",39.728611,-86.823056,39.665580,-86.704650,Hampton Field,Greencastle,IN,US,Hampton Field,Greencastle,IN,US,day,day,K38II,19,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96057,N9NJ,2021-02-15,A139,KN81,Hammonton Muni (N81),0NJ0,"Atlantic City, NJ",03:21PM EST,20:21:00,03:57PM EST,20:57:00,36,27.405643,"STATE OF NEW JERSEYTRENTON, NJ, US(Government)",39.667500,-74.757698,39.358970,-74.434630,Hammonton Municipal Airport,Hammonton,NJ,US,Atlantic City Medical Center Heliport,Atlantic City,NJ,US,day,day,K0NJ0,20,
96059,N9NJ,2021-02-17,A139,KN81,Hammonton Muni (N81),KMJX,Ocean County (KMJX),04:40PM EST,21:40:00,04:53PM EST,21:53:00,12,30.576678,"STATE OF NEW JERSEYTRENTON, NJ, US(Government)",39.667500,-74.757698,39.927502,-74.292397,Hammonton Municipal Airport,Hammonton,NJ,US,Ocean County Airport,Toms River,NJ,US,day,day,KMJX,21,
96061,N9NJ,2021-02-23,A139,KN81,Hammonton Muni (N81),KTTN,Trenton Mercer (KTTN),03:19PM EST,20:19:00,03:37PM EST,20:37:00,17,42.134984,"STATE OF NEW JERSEYTRENTON, NJ, US(Government)",39.667500,-74.757698,40.276699,-74.813499,Hammonton Municipal Airport,Hammonton,NJ,US,Trenton Mercer Airport,Trenton,NJ,US,day,day,KTTN,20,
96069,N9NJ,2021-03-10,A139,KN81,Hammonton Muni (N81),KTTN,Trenton Mercer (KTTN),12:44PM EST,17:44:00,01:05PM EST,18:05:00,20,42.134984,"STATE OF NEW JERSEYTRENTON, NJ, US(Government)",39.667500,-74.757698,40.276699,-74.813499,Hammonton Municipal Airport,Hammonton,NJ,US,Trenton Mercer Airport,Trenton,NJ,US,day,day,KTTN,18,


#### METAR data merging for Destination Location
---

In [None]:
# # scrape METAR data for Origin
# from metar_scraper_destination import metar_scraper_destination
# metar_scraper_destination(rw)

In [58]:
# load data frame
metar_destination = pd.read_csv("../Datasets/NEW/METAR/RW_METAR_destination.csv")

In [59]:
# split date and time. Extract only hours  
metar_destination['hour'] = pd.to_datetime(metar_destination.date, format='%d/%m/%Y %H:%M').dt.hour.astype('str')
metar_destination['date'] = pd.to_datetime(metar_destination.date, format='%d/%m/%Y %H:%M').dt.date.astype('str')

In [60]:
# sort by 'tail_number', 'date', 'hour' and drop duplicates
metar_destination = metar_destination.sort_values(['tail_number', 'date', 'hour']).drop_duplicates(subset=['tail_number', 'ICAO_destination', 'date', 'hour'])\
                           .reset_index(drop=True)

In [61]:
# inspect data
metar_destination.head()

Unnamed: 0,tail_number,ICAO_destination,date,METAR,hour
0,N101AE,KEYE,2020-12-07,METAR KEYE 070353Z AUTO 36005KT 10SM OVC027 M0...,3
1,N101AE,KEYE,2020-12-07,METAR KEYE 070453Z AUTO 35005KT 10SM OVC030 M0...,4
2,N101AE,KIND,2020-12-18,METAR KIND 180054Z 27007KT 9SM FEW014 BKN020 O...,0
3,N101AE,KIND,2020-12-18,METAR KIND 180154Z 28007KT 9SM FEW026 OVC040 M...,1
4,N101AE,KIND,2020-12-29,METAR KIND 291954Z 11013G19KT 10SM FEW085 BKN1...,19


In [62]:
# merge by 'tail_number', 'date', 'ICAO_origin', 'arr_UTC_hour'
rw = rw.merge(metar_destination.add_suffix('_destination'), how='left', left_on=['tail_number', 'date', 'ICAO_destination', 'arr_UTC_hour'],
        right_on=['tail_number_destination', 'date_destination', 'ICAO_destination_destination', 'hour_destination'])\
        .drop(['ICAO_destination', 'arr_UTC_hour','tail_number_destination', 'ICAO_destination_destination', 
               'date_destination', 'hour_destination'], axis=1)

In [63]:
# check for null values
rw.METAR_destination.isna().sum()

54028

In [64]:
# percent of merged metar destination data
print(round(len(rw.loc[rw.METAR_destination.notnull()])/ len(rw) * 100,2), "%")

43.76 %


#### METAR Decoding
---

In [65]:
# decode metar data
rw = metar_decoder(rw)

In [66]:
# inspect data
rw.head()

Unnamed: 0,tail_number,date,aircraft,origin_code,origin,destination_code,destination,departure,dep_UTC_time,arrival,arr_UTC_time,duration,distance_mi,Owner,origin_Latitude,origin_Longitude,destination_Latitude,destination_Longitude,airport_origin,city_origin,origin_state,country_origin,airport_destination,city_destination,destination_state,country_destination,departure_shift,arrival_shift,METAR_origin,METAR_time_origin,report_type_origin,temperature_origin,dew_point_origin,wind_origin,wind_peak_origin,visibility_origin,pressure_origin,press_sea_level_origin,sky_origin,remarks_origin,METAR_destination,METAR_time_destination,report_type_destination,temperature_destination,dew_point_destination,wind_destination,wind_peak_destination,visibility_destination,pressure_destination,press_sea_level_destination,sky_destination,remarks_destination
0,N101AE,2020-12-04,B06,II29,Owens Fld (II29),0IN7,"Indianapolis, IN",08:04PM EST,01:04:00,08:58PM EST,01:58:00,54,21.173583,"AIR EVAC EMS INCO FALLON, MO, US(Corporation)",39.610901,-86.756104,39.71667,-86.38333,Owens Field,Greencastle,IN,US,Roto-Whirl/Holiday Heliport,Indianapolis,IN,US,night,night,,,,,,,,,,,,,,,,,,,,,,,,
1,N101AE,2020-12-04,B06,K2R2,Hendricks County-Gordon Graham Fld (2R2),8IN9,Marcidale (8IN9),10:44PM EST,03:44:00,11:44PM EST,04:44:00,60,8.086743,"AIR EVAC EMS INCO FALLON, MO, US(Corporation)",39.7481,-86.473801,39.6856,-86.602203,Hendricks County Gordon Graham Field,Indianapolis,IN,US,Marcidale Airport,Amo,IN,US,night,night,,,,,,,,,,,,,,,,,,,,,,,,
2,N101AE,2020-12-05,B06,38II,"Greencastle, IN",0IN7,"Indianapolis, IN",01:12PM EST,18:12:00,04:44PM EST,21:44:00,212,18.618289,"AIR EVAC EMS INCO FALLON, MO, US(Corporation)",39.65,-86.75,39.72205,-86.41339,Hampton Field,Greencastle,IN,US,Roto-Whirl/Holiday Heliport,Indianapolis,IN,US,day,day,,,,,,,,,,,,,,,,,,,,,,,,
3,N101AE,2020-12-07,B06,07I,"Bloomington, IN",KEYE,Eagle Creek Airpark (KEYE),08:22PM EST,01:22:00,10:37PM EST,03:37:00,134,56.379143,"AIR EVAC EMS INCO FALLON, MO, US(Corporation)",39.06084,-86.6481,39.8307,-86.294403,Lake Monroe Seaplane Base,Bloomington,IN,US,Eagle Creek Airpark,Indianapolis,IN,US,night,night,,,,,,,,,,,,,METAR KEYE 070353Z AUTO 36005KT 10SM OVC027 M0...,03:53:00,"routine report, cycle 4 (automatic report)",30.9 F,23.0 F,N at 5 knots,missing,10 miles,1015.6 mb,1016.4 mb,overcast at 2700 feet,Automated station (type 2)
4,N101AE,2020-12-07,B06,38II,Hampton Fld (38II),38II,"Greencastle, IN",12:46PM EST,17:46:00,02:00PM EST,19:00:00,74,7.663635,"AIR EVAC EMS INCO FALLON, MO, US(Corporation)",39.728611,-86.823056,39.66558,-86.70465,Hampton Field,Greencastle,IN,US,Hampton Field,Greencastle,IN,US,day,day,,,,,,,,,,,,,,,,,,,,,,,,


In [67]:
rw.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 96074 entries, 0 to 96073
Data columns (total 52 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   tail_number                  96074 non-null  object 
 1   date                         96074 non-null  object 
 2   aircraft                     90671 non-null  object 
 3   origin_code                  96074 non-null  object 
 4   origin                       96074 non-null  object 
 5   destination_code             96074 non-null  object 
 6   destination                  96074 non-null  object 
 7   departure                    96074 non-null  object 
 8   dep_UTC_time                 96074 non-null  object 
 9   arrival                      96074 non-null  object 
 10  arr_UTC_time                 96074 non-null  object 
 11  duration                     96074 non-null  int64  
 12  distance_mi                  96074 non-null  float64
 13  Owner           

In [68]:
rw.isna().sum()

tail_number                        0
date                               0
aircraft                        5403
origin_code                        0
origin                             0
destination_code                   0
destination                        0
departure                          0
dep_UTC_time                       0
arrival                            0
arr_UTC_time                       0
duration                           0
distance_mi                        0
Owner                              0
origin_Latitude                    0
origin_Longitude                   0
destination_Latitude               0
destination_Longitude              0
airport_origin                     0
city_origin                       63
origin_state                      40
country_origin                     0
airport_destination                0
city_destination                   0
destination_state                 23
country_destination                0
departure_shift                    0
a

In [69]:
# export as csv
rw.to_csv('../Datasets/NEW/RW_with_metar.csv', index=False)