In [1]:
import requests
import os
import json
import numpy as np
import pandas as pd
from datetime import datetime, timedelta

from dotenv import load_dotenv

In [2]:
 # Load environment variables from .env file
load_dotenv()

# Get the API key
api_key = os.getenv('NOAA_CLIMATE_DATA')
token = api_key
token

'alFfGMhomKzczEqYgTcxpPQLSFbuDtxY'

In [22]:
def get_weather_data(token, location_id, start_date, end_date, data_type):
    url = "https://www.ncdc.noaa.gov/cdo-web/api/v2/data"
    headers = {
        'token': token
    }
    params = {
        'datasetid': 'GSOY',  # Global Historical Climatology Network - GSOM is monthly, GSOY is yearly
        'locationid': f'FIPS:{location_id}',  # Location ID for country
        'startdate': start_date,
        'enddate': end_date,
        'datatypeid': data_type,
        'units': 'standard',  # Use 'standard' for Fahrenheit, inches, etc.
        'limit': 1000  # Adjust as needed
    }

    response = requests.get(url, headers=headers, params=params)
    
    return response



In [4]:
# list of dates
start_dates = [
    '1995-01-01', '2000-01-01', '2005-01-01', '2010-01-01',
    '2015-01-01', '2020-01-01', '2023-01-01'
]



end_dates = [  
    '1999-12-31', '2004-12-31', '2009-12-31', '2014-12-31',
    '2019-12-31', '2023-12-31'
]

# Date types to get
data_types = ["DP10", "DP1X", "DT32", "DX70", "DX90", "PRCP", "RHAV", "TAVG", 'TMAX', 'EMXT', 'EMNT', 'HTDD', 'EMXP', 'MNPN','MXPN', 'HXyz', 'HNyz']

In [5]:
# Load country codes to data frame
codes_path = 'Resources/ghcnd_country_codes.csv'
code_country_df = pd.read_csv(codes_path, sep=',', header=0)
location_ids = code_country_df['Code'].tolist()
print(code_country_df.tail())

    Code
213   WI
214   WQ
215   WZ
216   ZA
217   ZI


In [24]:
# Loop through the country codes
import time

weather_data_df = pd.DataFrame()
for country in code_country_df['Code']:
    print(country)

    # For each country, call NOAA API
    weather_data = get_weather_data(token, country, '1995-01-01', '1999-12-31', ['TAVG', 'TMAX', 'PRCP'])
    if weather_data.status_code == 200:
        weather_response = weather_data.json()
        if 'results' in weather_response:
            weather_data = pd.DataFrame(weather_data.json()['results'])
            weather_data['country'] = country
            weather_data_df = pd.concat([weather_data_df, weather_data])

weather_data_df

AE
<Response [200]>
AF
<Response [200]>
AG
<Response [200]>
AJ
<Response [200]>


KeyError: 'results'

In [7]:
# Loop through each country and date range
for location_id in location_ids:
    for start_date, end_date in zip(start_dates, end_dates):

        for data_type in data_types:
            weather_data = get_weather_data(token, location_id, start_date, end_date, data_type)
            
            # Check if the response is successful or not
            if isinstance(weather_data, tuple):
                # Error case
                print("Error occurred: Status Code:", weather_data[0], "Message:", weather_data[1])
        else:
            # Success case
            # Check if the response contains any data
            if weather_data.get("results"):
                # Parse the JSON strings into dictionaries
                results = weather_data["results"]

                # Create an empty DataFrame
                columns = ["station", "date"] + data_types
                df = pd.DataFrame(columns=columns)

                # Iterate over each result
                for result in results:
                    date = result["date"][:10]  # Extract date portion
                    datatype = result["datatype"]
                    value = result["value"]
                    station = result["station"]
        
                    # Check if the date already exists in the DataFrame
                    if date not in df.index:
                        df.loc[date, "station"] = station
        
                    # Add the value to the corresponding cell
                    df.loc[date, datatype] = value

                # Reset index to make date a column
                df.reset_index(inplace=True)
                # Rename the 'index' column to 'date'
                df.rename(columns={"index": "date"}, inplace=True)

                print(df)
    
            # Constructing filename
            filename = f"ClimateData/{location_id}_{start_date}_{end_date}_weather_data.csv"
        
            # Export the final dataframe to a CSV file with the constructed filename
            df.to_csv(filename, index=False)

<Response [200]>
Error occurred: Status Code: 200 Message: {}
<Response [200]>
Error occurred: Status Code: 200 Message: {}
<Response [200]>
Error occurred: Status Code: 200 Message: {}
<Response [200]>
Error occurred: Status Code: 200 Message: {'metadata': {'resultset': {'offset': 1, 'count': 1, 'limit': 1000}}, 'results': [{'date': '1996-01-01T00:00:00', 'datatype': 'DX70', 'station': 'GHCND:AEM00041218', 'attributes': 'S', 'value': 339}]}
<Response [200]>
Error occurred: Status Code: 200 Message: {'metadata': {'resultset': {'offset': 1, 'count': 1, 'limit': 1000}}, 'results': [{'date': '1996-01-01T00:00:00', 'datatype': 'DX90', 'station': 'GHCND:AEM00041218', 'attributes': 'S', 'value': 218}]}


KeyboardInterrupt: 