In [None]:
import requests
import os
import json
import pandas as pd
import numpy as np

from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

In [None]:
# Get the API key
api_key = os.getenv('NOAA_CLIMATE_DATA')
token = api_key

In [None]:
# Load country codes to data frame
codes_path = 'Resources/ghcnd_country_codes_copy.csv'
code_country_df = pd.read_csv(codes_path, sep=',', header=0)
country_codes = code_country_df['Code'].tolist()

# List of dates
start_dates = [
    '1995-01-01', '2005-01-01', '2015-01-01'
]
end_dates = [
    '2004-12-31', '2014-12-31', '2023-12-31'
]

# Data types to get
data_types = ["DP10", "DP01", "DP1X", "DT32", "DT00", "DX70", "DX90", "PRCP", "RHAV", "TAVG", "TMAX", "TMIN", "EMXT", "EMNT", "HTDD", "EMXP", "MNPN", "MXPN", "HXyz", "HNyz"]


In [None]:
# Function to get station IDs for a country
def get_stations_for_country(token, country_code):
    url = "https://www.ncdc.noaa.gov/cdo-web/api/v2/stations"
    headers = {'token': token}
    params = {'locationid': f'FIPS:{country_code}', 'limit': 1000}
    response = requests.get(url, headers=headers, params=params)
    if response.status_code == 200:
        return response.json().get("results", [])
    else:
        return []

In [None]:
# Function to get weather data for a station
def get_weather_data(token, station_id, start_date, end_date, data_types):
    url = "https://www.ncdc.noaa.gov/cdo-web/api/v2/data"
    headers = {'token': token}
    params = {
        'datasetid': 'GSOY',
        'stationid': station_id,
        'startdate': start_date,
        'enddate': end_date,
        'datatypeid': ','.join(data_types),
        'units': 'standard',
        'limit': 1000
    }
    response = requests.get(url, headers=headers, params=params)
    if response.status_code == 200:
        return response.json().get("results", [])
    else:
        return []

In [None]:
# Ensure output directory exists
os.makedirs('ClimateData', exist_ok=True)

# Iterate over each country code
for country_code in country_codes:
    # Get station IDs for the country
    stations = get_stations_for_country(token, country_code)
    if not stations:
        continue
    
    # Create an empty DataFrame
    columns = ["station", "date"] + data_types
    df = pd.DataFrame(columns=columns)
    
    # Iterate over each station
    for station in stations:
        station_id = station["id"]
        
        # Iterate over each date range
        for start_date, end_date in zip(start_dates, end_dates):
            # Get weather data for the station and date range
            weather_data = get_weather_data(token, station_id, start_date, end_date, data_types)
            
            # Process each result
            for result in weather_data:
                date = result["date"][:10]  # Extract date portion
                datatype = result["datatype"]
                value = result["value"]
                station = result["station"]
                
                # Initialize row data
                row_data = {"station": station, "date": date}
                row_data[datatype] = value
                
                # Append to DataFrame
                df = pd.concat([df, pd.DataFrame([row_data])], ignore_index=True)
    
    # Export DataFrame to CSV file
    output_path = f'ClimateData/{country_code}_climate_data.csv'
    df.to_csv(output_path, index=False)