# Historic Weather from Weather Underground
The purpose of this project is to use Weather Undergound's API to obtain historic data for a specified geographical location. The intention is to use this as the input for the existing AirDensity project and to improve this by taking account of altitude.

In [1]:
import pandas as pd


def WUStats1(y=2015, m=6, d=9, lat=51.4775, lon=-0.461389, API_key = '4519864dc92d8fdb'):
    """ This function takes a date and a geographic location
    Using the Weather Underground API, it retrieves the daily summary data, 
    returning a list of mean temperature (C), pressure (mBar), humidity, dew point (C), windspeed (kph) and wind dirn (degrees)"""
    
    address = 'http://api.wunderground.com/api/{0}/history_{1}{2:02d}{3:02d}/q/{4},{5}.json'.format(API_key, y, m, d, lat, lon)
    obs = pd.read_json(address)
    try:        
        o = obs.history.dailysummary[0]
        stats = [o['meantempm'], o['meanpressurem'], o['humidity'], o['meandewptm'], o['meanwindspdm'], o['meanwdird']]
    except:
        o = obs.history.observations[0]
        stats = [o['tempm'], o['pressurem'], o['hum'], o['dewptm'], o['wspdm'], o['wdird']]
    return stats



In [2]:
WUStats1(2016, 12, 31)

['5', '1030.50', '97', '6', '9', '220']

# Dealing with missing data
A bit of experimenting reveals that sometimes you don't get any data. For example, Bracknell airport doesn't seem to record any weather data.

In [3]:
WUStats1(2016, 12, 31,51.38000107, -0.77999997)

['-9999', '-9999', 'N/A', '-9999', '-9999.0', '0']

In an attempt to ensure that something comes back, we can try looping down the first three on the list of nearby weather stations until we get something. I experimented with this in the following function, but it just ended up exceeding the permitted number of API calls within one minute and still failed to produce a single observation from any of the nearby weather stations. So the conclusion is that you can't rely on WU to deliver historic weather stats from a latitude and longitude.

In [4]:
import pandas as pd

def WUStats2(y=2015, m=6, d=9, lat=51.4775, lon=-0.461389, API_key = '4519864dc92d8fdb'):
    """ This function takes a date and a geographic location
    Using the Weather Underground API, it retrieves the daily summary data, 
    returning a list of mean temperature (C), pressure (mBar), humidity, dew point (C), windspeed (kph) and wind dirn (degrees)"""
    
    # First obtain a list of the closest local weather stations, remove empty elements, select first 3 and last
    address = 'http://api.wunderground.com/api/{0}/geolookup/q/{4},{5}.json'.format(API_key, y, m, d, lat, lon)
    loc = pd.read_json(address)
    nearby_weather_stations = loc.location['nearby_weather_stations']['airport']['station'] + loc.location['nearby_weather_stations']['pws']['station']
    nearby_weather_stations = [n for n in nearby_weather_stations if n['lon']!='']
    nearby_weather_stations = [nearby_weather_stations[i] for i in [0,1,2,3,-1]]
   
    stats = []
    
    while len(nearby_weather_stations)>0 and len(stats)==0:
        nws = nearby_weather_stations.pop(0)
        try:        
            address = 'http://api.wunderground.com/api/{0}/history_{1}{2:02d}{3:02d}/q/{4},{5}.json'.format(API_key, y, m, d, nws['lat'], nws['lon'])
            print(address)
            obs = pd.read_json(address)
            o = obs.history.dailysummary[0]
            stats = [o['meantempm'], o['meanpressurem'], o['humidity'], o['meandewptm'], o['meanwindspdm'], o['meanwdird']]
        except:
            pass
    return stats



In [5]:
WUStats2(2016, 12, 31,51.38000107, -0.77999997)

http://api.wunderground.com/api/4519864dc92d8fdb/history_20161231/q/51.38000107,-0.77999997.json
http://api.wunderground.com/api/4519864dc92d8fdb/history_20161231/q/51.27999878,-0.77999997.json


['5', '1030.88', '94', '6', '6', '241']

# Final solution
I'm thinking that the only way around this is to use the closest large airport on this list https://raw.githubusercontent.com/jpatokal/openflights/master/data/airports.dat
I have stored a local copy that could be put back onto the net somewhere


In [30]:
import pandas as pd


def getNearestBigAirportCoords(lat= 51.38000107, lon=-0.77999997):
    """This function uses a list of large airports to return the coordinates of the nearest"""
    airports = pd.read_csv('airports.csv', header=None)
    airports.columns = ['Number', 'Airport', 'Town', 'Country', 'IATA', 'ICAO', 
                        'Latitude', 'Longitude','N1', 'N2', 'TZ','Region','A1','A2']
    airports['Distance'] = airports.apply(lambda obs: (obs.Latitude - lat)**2 + (obs.Longitude - lon)**2, axis=1)
    return airports[airports.Distance==min(airports.Distance)][['Latitude','Longitude']].values[0]


def WUStats(y=2015, m=6, d=9, lat=51.4775, lon=-0.461389, API_key = '4519864dc92d8fdb'):
    """ This function takes a date and a geographic location
    Using the Weather Underground API, it retrieves the daily summary data, 
    returning a list of mean temperature (C), pressure (mBar), humidity, dew point (C), windspeed (kph) and wind dirn (degrees)
    If there's no data based on lat,lon coordinates, the find the nearest big airport"""
    stats = ['-9999']
    try:        
        address = 'http://api.wunderground.com/api/{0}/history_{1}{2:02d}{3:02d}/q/{4},{5}.json'.format(API_key, y, m, d, lat, lon)
        obs = pd.read_json(address)
        o = obs.history.dailysummary[0]
        stats = [o['meantempm'], o['meanpressurem'], o['humidity'], o['meandewptm'], o['meanwindspdm'], o['meanwdird']]
    except:
        pass
    if stats[0] == '-9999' or min([len(s) for s in stats])==0:
        try:
            [lat, lon] = getNearestBigAirportCoords(lat, lon)
            address = 'http://api.wunderground.com/api/{0}/history_{1}{2:02d}{3:02d}/q/{4},{5}.json'.format(API_key, y, m, d, lat, lon)
            obs = pd.read_json(address)
            o = obs.history.dailysummary[0]
            stats = [o['meantempm'], o['meanpressurem'], o['humidity'], o['meandewptm'], o['meanwindspdm'], o['meanwdird']]
        except:
            pass
    return stats





In [7]:
print("In spite of such tricks, there still seem to be locations that don't work")
print(WUStats())
x=WUStats(2017, 1, 21, 45.4480, 6.9802)
print(x)

In spite of such tricks, there still seem to be locations that don't work
['11', '1032.75', '57', '4', '15', '37']
['-1', '1028.14', '76', '-4', '8', '1']


## Checking exactly how figures are reported on WU
Just to check how weather reports are recorded, we compare Rodriguez Ballon airport at 2560m against Lima Collique airport at sea level. Admittedly these aren't very close, but they work OK for this test. 

In [8]:
lat,lon = -16.30, -71.5
getNearestBigAirportCoords(lat, lon)
print(WUStats(2017,2,15,lat,lon))
lat,lon = -12.0, -77.0
getNearestBigAirportCoords(lat, lon)
print(WUStats(2017,2,15,lat,lon))

['14', '1028.08', '88', '10', '7', '262']
['26', '1013.43', '63', '20', '7', '-1']


At 2560m, local pressure would be signficantly lower than the reported 1027.88, so this must be a sea level adjusted figure. A temperature variation of 6.5 degrees per 1000m, would make the sea level equivalent temperature of Rodriguez 30C which is comparable with the temperature in Lima.

In [9]:
Temp,Pressure,Elevation = 13,1027.88,2560
g =  9.80665   # gravitational constant, m/sec^2
M = 28.9644/1000    # molecular weight of dry air, kg/mol
L =  6.5/1000  # temperature lapse rate, deg K/m
R = 8.31432    # gas constant, J/ mol*deg K 
TempK = Temp + 273.15
localP = Pressure * 100 * (TempK / (TempK + L * Elevation)) ** (g*M/(R*L)) # multiply by 100 to get kg/m^3 and adjust for elevation
print('Elevation adjusted pressure {:.2f} mBar'.format(localP/100))
print('Sea level temperature {:.0f} C'.format(Temp + L * Elevation))


Elevation adjusted pressure 763.70 mBar
Sea level temperature 30 C


In [10]:
(lat, lon) = (-25.967936772035277, 27.548840262591703)

In [11]:
print(WUStats(2018,1,28,lat,lon))


['22', '1019.63', '55', '12', '10', '110']


In [12]:
import numpy as np

In [39]:
for lon in np.random.random_sample(3)*180-90:
    for lat in np.random.random_sample(3)*180-90:
        print(lat,lon,WUStats(2018,1,28,lat,lon))

-11.944186734181727 10.37539408149027 ['26', '1009.57', '73', '22', '5', '288']
80.5204672690499 10.37539408149027 ['-7', '1007.38', '65', '-11', '10', '-1']
58.16743391812511 10.37539408149027 ['6', '1003.15', '79', '3', '32', '263']
28.718144392194773 27.73883613574904 ['12', '1026.12', '48', '3', '4', '-1']
-69.85828849171358 27.73883613574904 ['-9999']
50.72338403774597 27.73883613574904 ['-3', '1027.63', '88', '-4', '5', '-1']
-46.706832491958295 2.7395139783585734 ['23', '1012.50', '61', '15', '21', '208']
24.410489385728468 2.7395139783585734 ['19', '1017.57', '27', '-4', '29', '68']
-84.5657283260025 2.7395139783585734 ['-9999']


In [40]:
getNearestBigAirportCoords(16.3388,18.0079)

array([17.91710091, 19.11109924])

In [32]:
airports = pd.read_csv('airports.csv', header=None)
airports.columns = ['Number', 'Airport', 'Town', 'Country', 'IATA', 'ICAO', 'Latitude', 'Longitude','N1', 'N2', 'TZ','Region','A1','A2']

In [38]:
airports[(airports.Longitude==0)]

Unnamed: 0,Number,Airport,Town,Country,IATA,ICAO,Latitude,Longitude,N1,N2,TZ,Region,A1,A2
1963,2033,South Pole Station Airport,Stephen's Island,Antarctica,\N,NZSP,-90.0,0.0,9300,12,U,Antarctica/South_Pole,airport,OurAirports


In [35]:
WUStats(2018,1,28,-33.90019989,  18.49830055)

['23', '1012.50', '61', '15', '21', '208']