In [2]:
!pip install reverse_geocoder

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting reverse_geocoder
  Downloading reverse_geocoder-1.5.1.tar.gz (2.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m24.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: reverse_geocoder
  Building wheel for reverse_geocoder (setup.py) ... [?25l[?25hdone
  Created wheel for reverse_geocoder: filename=reverse_geocoder-1.5.1-py3-none-any.whl size=2268067 sha256=cb54cbd91b339d2a21004be8e125bacf197febdc73e2fa2b2655d9bd67ac94dd
  Stored in directory: /root/.cache/pip/wheels/bd/e5/88/eb139b6d6a26b8022d370ab991f7a836802fed9871975ec6d9
Successfully built reverse_geocoder
Installing collected packages: reverse_geocoder
Successfully installed reverse_geocoder-1.5.1


In [3]:
import reverse_geocoder as rg 
import datetime
import time 
from urllib.request import urlopen
import os
import pandas as pd

In [4]:
def reverseGeocode(coordinates): 
    '''coordinates - latitude, lognitude'''
    result = rg.search(coordinates)
    return [r['cc'] for r in result]

# Number of attempts to download data
MAX_ATTEMPTS = 6
# HTTPS here can be problematic for installs that don't have Lets Encrypt CA
SERVICE = "http://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?"

def download_data(uri):
    """Fetch the data from the IEM

    The IEM download service has some protections in place to keep the number
    of inbound requests in check.  This function implements an exponential
    backoff to keep individual downloads from erroring.

    Args:
      uri (string): URL to fetch

    Returns:
      string data
    """
    attempt = 0
    while attempt < MAX_ATTEMPTS:
        try:
            data = urlopen(uri, timeout=300).read().decode("utf-8")
            if data is not None and not data.startswith("ERROR"):
                return data
        except Exception as exp:
            print(f"download_data({uri}) failed with {exp}")
            time.sleep(5)
        attempt += 1

    print("Exhausted attempts to download, returning empty data")
    return ""

def fetch_data_for_year(year_start, year_end):
#     clear_working_dir()
    startts = datetime.datetime(year_start, 1, 1)
    endts = datetime.datetime(year_end+1, 1, 1)
    interval = datetime.timedelta(hours=24)

    service = SERVICE + "data=all&tz=Etc/UTC&format=onlycomma&latlon=yes&elev=no&missing=empty&trace=T&direct=no&report_type=3&report_type=4&"

    now = startts
    while now < endts:
        thisurl = service
        thisurl += now.strftime("year1=%Y&month1=%m&day1=%d&")
        thisurl += (now + interval).strftime("year2=%Y&month2=%m&day2=%d&")
        print(f"Downloading: {now}")
        data = download_data(thisurl)
        outfn = f"{now:%Y%m%d}.txt"
        with open(outfn, "w", encoding="ascii") as fh:
            fh.write(data)
        now += interval

all_df = []
cols_to_keep = set(["valid", "lat","lon", "tmpf", "dwpf", "relh", "sknt", "p01i", "feel", "mslp", "ice_accretion_6hr"])

year = 1966
fetch_data_for_year(year,year) # mention year, year


for dfile in os.listdir():
    if dfile.endswith(".txt"):
        try:
            df = pd.read_csv(dfile, engine = 'pyarrow', usecols = cols_to_keep)
            lat_lon = [(a, b) for a, b in df[['lat','lon']].values.tolist()]
            df["CountryCode"] = reverseGeocode(lat_lon)
            numeric_cols = ['tmpf', 'dwpf', 'mslp', 'sknt', 'feel', 'relh', 'ice_accretion_6hr']
            df[numeric_cols] = df[numeric_cols].replace('M', None)
            df[numeric_cols] = df[numeric_cols].astype(float)
            df['Month'] = pd.to_datetime(df['valid']).dt.month
            all_df.append(df.copy())
        except Exception as e:
            print(e)
            pass

df_concatenated = pd.concat(all_df)
df_grouped = df_concatenated.groupby(['CountryCode', 'Month']).agg({
                'tmpf': 'mean',
                'dwpf': 'mean',
                'mslp': 'mean',
                'sknt': 'mean',
                'feel': 'mean',
                'relh': 'mean',
                'ice_accretion_6hr': 'mean'
            }).reset_index()

df_grouped

Downloading: 1966-01-01 00:00:00
Downloading: 1966-01-02 00:00:00
Downloading: 1966-01-03 00:00:00
Downloading: 1966-01-04 00:00:00
Downloading: 1966-01-05 00:00:00
Downloading: 1966-01-06 00:00:00
Downloading: 1966-01-07 00:00:00
Downloading: 1966-01-08 00:00:00
Downloading: 1966-01-09 00:00:00
Downloading: 1966-01-10 00:00:00
Downloading: 1966-01-11 00:00:00
Downloading: 1966-01-12 00:00:00
Downloading: 1966-01-13 00:00:00
Downloading: 1966-01-14 00:00:00
Downloading: 1966-01-15 00:00:00
Downloading: 1966-01-16 00:00:00
Downloading: 1966-01-17 00:00:00
Downloading: 1966-01-18 00:00:00
Downloading: 1966-01-19 00:00:00
Downloading: 1966-01-20 00:00:00
Downloading: 1966-01-21 00:00:00
Downloading: 1966-01-22 00:00:00
Downloading: 1966-01-23 00:00:00
Downloading: 1966-01-24 00:00:00
Downloading: 1966-01-25 00:00:00
Downloading: 1966-01-26 00:00:00
Downloading: 1966-01-27 00:00:00
Downloading: 1966-01-28 00:00:00
Downloading: 1966-01-29 00:00:00
Downloading: 1966-01-30 00:00:00
Downloadin

Unnamed: 0,CountryCode,Month,tmpf,dwpf,mslp,sknt,feel,relh,ice_accretion_6hr
0,AE,1,68.140943,58.966038,1017.872381,7.094340,68.106698,74.666604,
1,AE,2,67.573906,58.721562,1015.458594,8.296875,67.578672,74.540859,
2,AE,3,70.298750,57.673750,1014.046575,9.568493,70.308681,66.197292,
3,AE,4,74.938609,61.937913,1010.150862,8.457627,75.407130,66.594870,
4,AE,5,83.785000,66.832500,1006.490411,9.537415,88.850347,59.565417,
...,...,...,...,...,...,...,...,...,...
871,ZW,8,66.383098,43.565232,,5.932874,66.012728,47.952478,
872,ZW,9,71.892540,46.495560,,7.639535,71.212901,45.725837,
873,ZW,10,76.078997,47.553028,,7.928325,75.006202,42.148209,
874,ZW,11,76.979507,54.426071,,7.191201,76.390391,50.980077,


In [5]:
fileName = "df_"+str(year)+".csv"
df_grouped.to_csv(fileName)

In [6]:
for f in os.listdir():
  if '.txt' in f:
    os.remove(f)