## Weather API of "https://mesonet.agron.iastate.edu/request/download.phtml"

In [1]:
"""
Example script that scrapes data from the IEM ASOS download service
"""
from __future__ import print_function
import json
import time
import datetime

# Python 2 and 3: alternative 4
try:
    from urllib.request import urlopen
except ImportError:
    from urllib2 import urlopen

# Number of attempts to download data
MAX_ATTEMPTS = 6
# HTTPS here can be problematic for installs that don't have Lets Encrypt CA
SERVICE = "http://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?"


def download_data(uri):
    """Fetch the data from the IEM
    The IEM download service has some protections in place to keep the number
    of inbound requests in check.  This function implements an exponential
    backoff to keep individual downloads from erroring.
    Args:
      uri (string): URL to fetch
    Returns:
      string data
    """
    attempt = 0
    while attempt < MAX_ATTEMPTS:
        try:
            data = urlopen(uri, timeout=300).read().decode("utf-8")
            if data is not None and not data.startswith("ERROR"):
                return data
        except Exception as exp:
            print("download_data(%s) failed with %s" % (uri, exp))
            time.sleep(5)
        attempt += 1

    print("Exhausted attempts to download, returning empty data")
    return ""


def get_stations_from_filelist(filename):
    """Build a listing of stations from a simple file listing the stations.
    The file should simply have one station per line.
    """
    stations = []
    for line in open(filename):
        stations.append(line.strip())
    return stations


def get_stations_from_networks():
    """Build a station list by using a bunch of IEM networks."""
    stations = []
    states = """AK AL AR AZ CA CO CT DE FL GA HI IA ID IL IN KS KY LA MA MD ME
     MI MN MO MS MT NC ND NE NH NJ NM NV NY OH OK OR PA RI SC SD TN TX UT VA VT
     WA WI WV WY"""
    # IEM quirk to have Iowa AWOS sites in its own labeled network
    networks = ["AWOS"]
    for state in states.split():
        networks.append("%s_ASOS" % (state,))

    for network in networks:
        # Get metadata
        uri = (
            "https://mesonet.agron.iastate.edu/geojson/network/%s.geojson"
        ) % (network,)
        data = urlopen(uri)
        jdict = json.load(data)
        for site in jdict["features"]:
            stations.append(site["properties"]["sid"])
    return stations


def download_alldata():
    """An alternative method that fetches all available data.
    Service supports up to 24 hours worth of data at a time."""
    # timestamps in UTC to request data for
    startts = datetime.datetime(2020, 1, 1)
    endts = datetime.datetime(2020, 1,8)
    interval = datetime.timedelta(hours=24)

    service = SERVICE + "data=all&tz=Etc/UTC&format=comma&latlon=yes&"

    now = startts
    while now < endts:
        thisurl = service
        thisurl += now.strftime("year1=%Y&month1=%m&day1=%d&")
        thisurl += (now + interval).strftime("year2=%Y&month2=%m&day2=%d&")
        print("Downloading: %s" % (now,))
        data = download_data(thisurl)
        outfn = "%s.txt" % (now.strftime("%Y%m%d"),)
        with open(outfn, "w") as fh:
            fh.write(data)
        now += interval


def main():
    """Our main method"""
    # timestamps in UTC to request data for
    startts = datetime.datetime(2020, 1, 1)
    endts = datetime.datetime(2020, 1, 8)

    service = SERVICE + "data=all&tz=Etc/UTC&format=comma&latlon=yes&"

    service += startts.strftime("year1=%Y&month1=%m&day1=%d&")
    service += endts.strftime("year2=%Y&month2=%m&day2=%d&")

    # Two examples of how to specify a list of stations
    stations = get_stations_from_networks()
    # stations = get_stations_from_filelist("mystations.txt")
    for station in stations:
        uri = "%s&station=%s" % (service, station)
        print("Downloading: %s" % (station,))
        data = download_data(uri)
        outfn = "%s_%s_%s.txt" % (
            station,
            startts.strftime("%Y%m%d%H%M"),
            endts.strftime("%Y%m%d%H%M"),
        )
        out = open(outfn, "w")
        out.write(data)
        out.close()


if __name__ == "__main__":
    download_alldata()
    # main()

Downloading: 2020-01-01 00:00:00
Downloading: 2020-01-02 00:00:00
Downloading: 2020-01-03 00:00:00
Downloading: 2020-01-04 00:00:00
Downloading: 2020-01-05 00:00:00
Downloading: 2020-01-06 00:00:00
Downloading: 2020-01-07 00:00:00


## Combine with Flights data

In [4]:
import pandas as pd

In [5]:
df_stations = pd.read_csv('stations.csv',index_col=0)

In [10]:
df_stations

Unnamed: 0,station
0,CLT
1,DFW
2,DCA
3,BDL
4,GSP
...,...
355,CDV
356,ADK
357,OGS
358,PPG


In [3]:
import warnings
warnings.filterwarnings('ignore')

In [14]:
for i in range(2,8):
    ##import data from .txt
    dt = '0' + str(i)
    dt = dt[-2:]
    file = '202001' + dt +'.txt'
    print(file)
    df_weather = pd.read_csv(file,header=5)
    
    ## preparing fields of weather
    df_weather1 = df_weather[['station','valid','tmpf','sknt','vsby','gust','skyl1','ice_accretion_3hr']]
    
    df_weather1 = df_weather[['station','valid','tmpf','sknt','vsby','gust','skyl1','ice_accretion_3hr']]

    df_weather1['tmpf'] = df_weather1['tmpf'].replace(to_replace='M',value='0').astype('float')
    df_weather1['sknt'] = df_weather1['sknt'].replace(to_replace='M',value='0').astype('float')
    df_weather1['vsby'] = df_weather1['vsby'].replace(to_replace='M',value='0').astype('float')
    df_weather1['gust'] = df_weather1['gust'].replace(to_replace='M',value='0').astype('float')
    df_weather1['skyl1'] = df_weather1['skyl1'].replace(to_replace='M',value='0').astype('float')
    df_weather1['ice_accretion_3hr'] = df_weather1['ice_accretion_3hr'].replace(to_replace='M',value='0').astype('float')
    df_weather1['hour'] = df_weather1['valid'].apply(lambda x: x[11:13])
    df_weather1['date'] = df_weather1['valid'].apply(lambda x: x[:10])

    df_weather1.drop(columns=['valid'],inplace=True)
    
    ## Groupby and concate
    df_com = df_weather1.groupby(['station','date','hour'],as_index=False).max()
    df_com = df_stations.merge(df_com,on='station')
    
    df_com_total = pd.concat([df_com_total,df_com],ignore_index=True)
    
    
    
    

20200102.txt
20200103.txt
20200104.txt
20200105.txt
20200106.txt
20200107.txt


In [11]:
    df_weather = pd.read_csv('20200101.txt',header=5)
    
    ## preparing fields of weather
    df_weather1 = df_weather[['station','valid','tmpf','sknt','vsby','gust','skyl1','ice_accretion_3hr']]
    
    df_weather1 = df_weather[['station','valid','tmpf','sknt','vsby','gust','skyl1','ice_accretion_3hr']]

    df_weather1['tmpf'] = df_weather1['tmpf'].replace(to_replace='M',value='0').astype('float')
    df_weather1['sknt'] = df_weather1['sknt'].replace(to_replace='M',value='0').astype('float')
    df_weather1['vsby'] = df_weather1['vsby'].replace(to_replace='M',value='0').astype('float')
    df_weather1['gust'] = df_weather1['gust'].replace(to_replace='M',value='0').astype('float')
    df_weather1['skyl1'] = df_weather1['skyl1'].replace(to_replace='M',value='0').astype('float')
    df_weather1['ice_accretion_3hr'] = df_weather1['ice_accretion_3hr'].replace(to_replace='M',value='0').astype('float')
    df_weather1['hour'] = df_weather1['valid'].apply(lambda x: x[11:13])
    df_weather1['date'] = df_weather1['valid'].apply(lambda x: x[:10])

    df_weather1.drop(columns=['valid'],inplace=True)
    
    ## Groupby and concate
    df_com = df_weather1.groupby(['station','date','hour'],as_index=False).max()
    df_com = df_stations.merge(df_com,on='station')
    
    df_com_total = df_com

In [17]:
df_com_total.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 53458 entries, 0 to 53457
Data columns (total 9 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   station            53458 non-null  object 
 1   date               53458 non-null  object 
 2   hour               53458 non-null  object 
 3   tmpf               53458 non-null  float64
 4   sknt               53458 non-null  float64
 5   vsby               53458 non-null  float64
 6   gust               53458 non-null  float64
 7   skyl1              53458 non-null  float64
 8   ice_accretion_3hr  53458 non-null  float64
dtypes: float64(6), object(3)
memory usage: 3.7+ MB


In [18]:
df_com_total.to_csv('weather2020.csv')