In [4]:
import numpy as np
import pandas as pd
import os

In [3]:
from math import radians, cos, sin, asin, sqrt
def haversine(p1, p2):
    """
    Calculate the great circle distance between two points 
    on the earth (specified in decimal degrees)
    """
    lon1, lat1 = p1[0], p1[1]
    lon2, lat2 = p2[0], p2[1]
    # convert decimal degrees to radians 
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
    # haversine formula 
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a)) 
    # Radius of earth in kilometers is 6371
    km = 6371* c
    return km

def row_haversine(p1, p2):
    return [haversine(p1, p) for p in p2]

In [5]:
#Preprocess the station data from the raw file
header = ['ID', 'LATITUDE', 'LONGITUDE','STATE', 'NAME','HCN/CRN FLAG']
f = open("Historical Weather data/ghcnd-stations.txt", 'r')
s = []
for line in f.readlines():
    s.append([str(line[0:11]).strip(), float(line[12:20].strip()), float(line[21:30].strip()),
                     str(line[38:40]).strip(), str(line[41:71]).strip(), str(line[76:79]).strip()])
stations = pd.DataFrame(s, columns = header)
stations['Coordinates'] = stations.apply(lambda row: (row.LATITUDE, row.LONGITUDE), axis = 1)
print(stations[0:10])

            ID  LATITUDE  LONGITUDE STATE                   NAME HCN/CRN FLAG  \
0  ACW00011604   17.1167   -61.7833        ST JOHNS COOLIDGE FLD                
1  ACW00011647   17.1333   -61.7833                     ST JOHNS                
2  AE000041196   25.3330    55.5170          SHARJAH INTER. AIRP                
3  AEM00041194   25.2550    55.3640                   DUBAI INTL                
4  AEM00041217   24.4330    54.6510               ABU DHABI INTL                
5  AEM00041218   24.2620    55.6090                  AL AIN INTL                
6  AF000040930   35.3170    69.0170                 NORTH-SALANG                
7  AFM00040938   34.2100    62.2280                        HERAT                
8  AFM00040948   34.5660    69.2120                   KABUL INTL                
9  AFM00040990   31.5000    65.8500             KANDAHAR AIRPORT                

           Coordinates  
0  (17.1167, -61.7833)  
1  (17.1333, -61.7833)  
2     (25.333, 55.517)  
3     (2

In [8]:
Airports = ['ABR', 'ABI', 'ADK', 'BQN', 'CAK', 'ABY', 'ALB', 'ABQ', 'AEX', 'ABE', 'APN', 'AMA', 'ANC', 'ANI', 'ATW', 'ACV', 
            'AVL', 'HTS', 'ASE', 'ATL', 'ACY', 'AGS', 'AUS', 'AUS', 'BFL', 'BWI', 'BGR', 'BRW', 'BTR', 'BPT', 'BLI', 'BJI', 
            'RDM', 'BET', 'BIL', 'BGM', 'BHM', 'BIS', 'BMI', 'BOI', 'BOS', 'BZN', 'BRD', 'BKG', 'TRI', 'BRO', 'BQK', 'BUF', 
            'IFP', 'BUR', 'BTV', 'BTM', 'CLD', 'CPR', 'CDC', 'CID', 'CMI', 'CHS', 'CRW', 'STT', 'CLT', 'CHO', 'CHA', 'CYS', 
            'MDW', 'ORD', 'CIC', 'STX', 'CVG', 'CKB', 'CLE', 'COD', 'CLL', 'COS', 'COU', 'CAE', 'CSG', 'CBM', 'GTR', 'CMH', 
            'CCR', 'CDV', 'CRP', 'CEC', 'DAL', 'DFW', 'DAY', 'DAB', 'SCC', 'DRT', 'DEN', 'DSM', 'DET', 'DTW', 'DVL', 'DIK', 
            'DLG', 'DHN', 'DBQ', 'DLH', 'DRO', 'EGE', 'EAU', 'IPL', 'ELP', 'EKO', 'ELM', 'ERI', 'ESC', 'EUG', 'EVV', 'FAI', 
            'FAR', 'FMN', 'XNA', 'FAY', 'FLG', 'FNT', 'FLO', 'FLL', 'RSW', 'FSM', 'FWA', 'FAT', 'GNV', 'GCK', 'GCC', 'GCN', 
            'GFK', 'GRI', 'GJT', 'GRR', 'GTF', 'GRB', 'GSO', 'GLH', 'GSP', 'GUM', 'GPT', 'GUC', 'GST', 'CMX', 'HRL', 'MDT', 
            'BDL', 'PIB', 'HDN', 'HYS', 'HLN', 'HIB', 'HKY', 'ITO', 'HHH', 'HOB', 'HNL', 'MKK', 'EFD', 'IAH', 'HOU', 'HSV', 
            'HYA', 'IDA', 'IND', 'INL', 'IYK', 'IMT', 'ISP', 'ITH', 'JAC', 'JAN', 'JAX', 'OAJ', 'JMS', 'JLN', 'JNU', 'OGG', 
            'AZO', 'FCA', 'MKC', 'MCI', 'KTN', 'EYW', 'GRK', 'ILE', 'AKN', 'ISO', 'LMT', 'TYS', 'ADQ', 'KOA', 'ROR', 'OTZ', 
            'LSE', 'LFT', 'LCH', 'TVL', 'LNY', 'LAN', 'LAR', 'LRD', 'LAS', 'LBE', 'LAW', 'LWB', 'LWS', 'LEX', 'LIH', 'LNK', 
            'LIT', 'LGB', 'GGG', 'LAX', 'SDF', 'LBB', 'LYH', 'MCN', 'MSN', 'MMH', 'MHT', 'MHK', 'MTH', 'MQT', 'MVY', 'MAZ', 
            'MFR', 'MLB', 'MEM', 'MEI', 'MIA', 'MAF', 'MKE', 'MSP', 'MIB', 'MOT', 'MFE', 'MSO', 'CNY', 'MOB', 'MOD', 'MLI', 
            'MLU', 'MRY', 'MGM', 'MTJ', 'MWH', 'CWA', 'MKG', 'MYR', 'ACK', 'APF', 'BNA', 'EWN', 'HVN', 'MSY', 'JFK', 'LGA', 
            'EWR', 'SWF', 'PHF', 'IAG', 'OME', 'ORF', 'OTH', 'LBF', 'OAK', 'OGD', 'OKC', 'OMA', 'ONT', 'MCO', 'OXR', 'PAH', 
            'PPG', 'PSP', 'PMD', 'PFN', 'ECP', 'PSC', 'PLN', 'PNS', 'PIA', 'PSG', 'PHL', 'AZA', 'PHX', 'PIR', 'SOP', 'PIT', 
            'PBG', 'PIH', 'PSE', 'PWM', 'PDX', 'PVD', 'PVU', 'PUB', 'PGD', 'RDU', 'RCA', 'RAP', 'RDR', 'RDD', 'RNO', 'RHI', 
            'RIC', 'ROA', 'RST', 'ROC', 'RKS', 'RFD', 'ROW', 'ROP', 'SMF', 'MBS', 'SPN', 'SLE', 'SLC', 'SJT', 'SAT', 'SAN', 
            'SFO', 'SJC', 'SJU', 'SBP', 'SNA', 'SBA', 'SAF', 'SMX', 'STS', 'SRQ', 'CIU', 'SAV', 'BFF', 'AVP', 'BFI', 'SEA', 
            'SHV', 'SUX', 'FSD', 'SIT', 'SBN', 'SKA', 'GEG', 'SPI', 'SGF', 'UST', 'STC', 'SGU', 'STL', 'KSM', 'PIE', 'SCE', 
            'SHD', 'SCK', 'SUN', 'SYR', 'TLH', 'TPA', 'TEX', 'TXK', 'TKI', 'TOL', 'FOE', 'TVC', 'TTN', 'TUS', 'TUL', 'UTM', 
            'TUP', 'TWF', 'TYR', 'DUT', 'UCA', 'VLD', 'VPS', 'VEL', 'VCT', 'VIS', 'ACT', 'DCA', 'IAD', 'ALO', 'ART', 'ENV', 
            'PBI', 'WYS', 'HPN', 'SPS', 'ICT', 'ISN', 'ILG', 'ILM', 'ORH', 'WRG', 'YKM', 'YAK', 'YAP', 'YUM', 'GPI']
    

In [9]:
def closest_station(Coord):

    minindex = stations['Coordinates'].map(lambda x: haversine(Coord, x)).idxmin(axis = 1)
    stationID = stations['ID'].loc[minindex]

    return stationID


In [10]:
zipcodes = pd.read_csv("AIrport Zip Codes (for weatherscraping).csv")
zipcodes['LocationID'] = [x[1:] for x in zipcodes['LocationID']]
zipcodes = zipcodes.loc[zipcodes['LocationID'].isin(Airports)]
zipcodes['ARPLatitudeS'] = zipcodes['ARPLatitudeS'].map(lambda x: -1*float(x[:-1])/3600 if x[-1] == "S" else float(x[:-1])/3600)
zipcodes['ARPLongitudeS'] = zipcodes['ARPLongitudeS'].map(lambda x: -1*float(x[:-1])/3600 if x[-1] == "W" else float(x[:-1])/3600)
zipcodes['Nearest_Station'] = zipcodes.apply(lambda row: closest_station((row.ARPLatitudeS, row.ARPLongitudeS)), axis = 1)


In [14]:
print(zipcodes[zipcodes['LocationID'] == 'YAP'])

Empty DataFrame
Columns: [LocationID, OwnerCSZ, ManagerCSZ, ZIp1, ZIp2, ARPLatitude, ARPLatitudeS, ARPLongitude, ARPLongitudeS, Nearest_Station]
Index: []


In [11]:
zipcodes[['LocationID', 'Nearest_Station']].to_csv("Airport_to_Weather_Stations.txt", sep = ",", index = False)

In [16]:
# Missed airports

missing_airports = [["BKG",36.5385, -93.1991],["CLD",33.1279, -117.2792],["HDN",40.4814, -107.2166]
    ,["HHH",32.2251, -80.6974]
    ,["FCA",48.3099, -114.2546]
    ,["GPI",48.3099, -114.2546]
    ,["MQT",46.3497, -87.3873]
    ,["AZA",33.3074, -111.6689]
    ,["ROP",14.1720, 145.2438]
    ,["SPN",15.1197, 145.7283]
    ,["UST",29.9547, -81.3433]
    ,["SCE",40.8500, -77.8487]
    ,["UTM",34.685000, -90.347778]
    ,["YAP",9.498889, 138.082500]
    ,["YUM",32.6686, -114.5990]]

airports_dict = {}
for row in missing_airports:
    airports_dict[row[0]] = closest_station((row[1],row[2]))

print(airports_dict)

{'BKG': 'USC00230887', 'CLD': 'USW00003177', 'HDN': 'USC00053867', 'HHH': 'US1SCBF0052', 'FCA': 'USC00244560', 'GPI': 'USC00244560', 'MQT': 'US1MIMQ0016', 'AZA': 'USW00023104', 'ROP': 'CQC00914801', 'SPN': 'CQC00914855', 'UST': 'US1FLSJ0023', 'SCE': 'US1PACN0002', 'UTM': 'USC00229155', 'YAP': 'FMC00914429', 'YUM': 'USW00023195'}
