In [1]:
# Install necessary libraries
# !pip install cdsapi xarray matplotlib netCDF4

import cdsapi
from pathlib import Path
import xarray as xr
import matplotlib.pyplot as plt

In [None]:
# Initialize the CDS API client
c = cdsapi.Client()

# Define the Alberta bounding box (North, West, South, East)
# bbox = [60, -120, 50, -110]  # Alberta, Canada
bbox = [90, -170, 24, -50]  # North America
pressure_levels = ['250', '500', '850', '1000'] 
start_year = 1940
end_year = 2023

# Path to save the downloaded NetCDF file
dir = Path(r'D:\UCalgary_Lectures\GEOG_683\Data_workspace')
output_file = dir / 'NAmerica_geopotential_1940_2023.nc'

# Step 1: Download the data using the CDS API
if not output_file.exists():
    print("Downloading ERA5 data from {0} to {1}...".format(start_year, end_year))
    c.retrieve(
        'reanalysis-era5-pressure-levels-monthly-means',
        {
            'product_type': 'reanalysis',
            'format': 'netcdf',
            'variable': ['u_component_of_wind', 'v_component_of_wind', 'geopotential', 'temperature'],  
            'pressure_level': pressure_levels,
            'year': [str(year) for year in range(start_year, end_year+1)],
            'month': [f'{month:02d}' for month in range(1, 13)],
            'time': ['12:00'],
            'area': bbox,
        },
        str(output_file)
    )
    print("Download complete.")
else:
    print(f"File {output_file} already exists. Skipping download.")

In [None]:
import numpy as np

# Step 2: Load and process the data using xarray
if output_file.exists():
    # Step 1: Load the data using xarray
    data = xr.open_dataset(output_file)

    # Step 2: Extract the relevant variables at 250mb
    u_250mb = data['u'].sel(pressure_level=250)  # Zonal wind (east-west)
    v_250mb = data['v'].sel(pressure_level=250)  # Meridional wind (north-south)
    geopotential_250mb = data['z'].sel(pressure_level=250) / 9.81  # Geopotential height (converted to meters)

    # Step 3: Calculate the mean over the entire period
    u_mean = u_250mb.mean(dim='date')  # Mean of zonal wind over all months
    v_mean = v_250mb.mean(dim='date')  # Mean of meridional wind over all months
    geo_height_mean = geopotential_250mb.mean(dim='date')  # Mean geopotential height

    # Step 4: Calculate the wind speed (magnitude)
    wind_speed = np.sqrt(u_mean**2 + v_mean**2)  # Wind speed is the magnitude of (u, v)

    # Flatten the plot by setting the contour levels
    geo_height_std = geopotential_250mb.std(dim='date')  # Standard deviation of geopotential height
    levels_geo = np.linspace((geo_height_mean - 2 * geo_height_std).min(), (geo_height_mean + 2 * geo_height_std).max(), 100)
    levels_wind = np.linspace(wind_speed.min(), wind_speed.max(), 40)  # Levels for wind speed

    # Step 5: Plot the geopotential height map and wind speed as colored contours
    plt.figure(figsize=(10, 6))

    # Plot the geopotential height as a filled contour map
    contour_geo = plt.contourf(geo_height_mean.longitude, geo_height_mean.latitude, geo_height_mean, levels=levels_geo, cmap='BuPu')
    plt.colorbar(contour_geo, label='Geopotential Height (meters)')

    # Plot the wind speed as colored contours
    # 'Accent', 'Accent_r', 'Blues', 'Blues_r', 'BrBG', 'BrBG_r', 'BuGn', 'BuGn_r', 'BuPu', 'BuPu_r', 'CMRmap', 
    # 'CMRmap_r', 'Dark2', 'Dark2_r', 'GnBu', 'GnBu_r', 'Greens', 'Greens_r', 'Greys', 'Greys_r', 'OrRd', 'OrRd_r', 'Oranges', 
    # 'Oranges_r', 'PRGn', 'PRGn_r', 'Paired', 'Paired_r', 'Pastel1', 'Pastel1_r', 'Pastel2', 'Pastel2_r', 'PiYG', 'PiYG_r', 'PuBu', 
    # 'PuBuGn', 'PuBuGn_r', 'PuBu_r', 'PuOr', 'PuOr_r', 'PuRd', 'PuRd_r', 'Purples', 'Purples_r', 'RdBu', 'RdBu_r', 'RdGy', 'RdGy_r', 
    # 'RdPu', 'RdPu_r', 'RdYlBu', 'RdYlBu_r', 'RdYlGn', 'RdYlGn_r', 'Reds', 'Reds_r', 'Set1', 'Set1_r', 'Set2', 'Set2_r', 'Set3', 'Set3_r', 
    # 'Spectral', 'Spectral_r', 'Wistia', 'Wistia_r', 'YlGn', 'YlGnBu', 'YlGnBu_r', 'YlGn_r', 'YlOrBr', 'YlOrBr_r', 'YlOrRd', 'YlOrRd_r', 
    # 'afmhot', 'afmhot_r', 'autumn', 'autumn_r', 'binary', 'binary_r', 'bone', 'bone_r', 'brg', 'brg_r', 'bwr', 'bwr_r', 'cividis', 'cividis_r', 
    # 'cool', 'cool_r', 'coolwarm', 'coolwarm_r', 'copper', 'copper_r', 'cubehelix', 'cubehelix_r', 'flag', 'flag_r', 'gist_earth', 'gist_earth_r', 
    # 'gist_gray', 'gist_gray_r', 'gist_heat', 'gist_heat_r', 'gist_ncar', 'gist_ncar_r', 'gist_rainbow', 'gist_rainbow_r', 'gist_stern', 'gist_stern_r', 
    # 'gist_yarg', 'gist_yarg_r', 'gnuplot', 'gnuplot2', 'gnuplot2_r', 'gnuplot_r', 'gray', 'gray_r', 'hot', 'hot_r', 'hsv', 'hsv_r', 'inferno', 'inferno_r', 
    # 'jet', 'jet_r', 'magma', 'magma_r', 'nipy_spectral', 'nipy_spectral_r', 'ocean', 'ocean_r', 'pink', 'pink_r', 'plasma', 'plasma_r', 'prism', 'prism_r', 
    # 'rainbow', 'rainbow_r', 'seismic', 'seismic_r', 'spring', 'spring_r', 'summer', 'summer_r', 'tab10', 'tab10_r', 'tab20', 'tab20_r', 'tab20b', 'tab20b_r', 
    # 'tab20c', 'tab20c_r', 'terrain', 'terrain_r', 'turbo', 'turbo_r', 'twilight', 'twilight_r', 'twilight_shifted', 'twilight_shifted_r', 'viridis', 'viridis_r', 
    # 'winter', 'winter_r'

    contour_wind = plt.contourf(u_mean.longitude, u_mean.latitude, wind_speed, levels=levels_wind, cmap='jet', alpha=0.6)
    plt.colorbar(contour_wind, label='Wind Speed (m/s)')

    # Add title and labels
    plt.title('Monthly Averaged Wind Flow Patterns at 250mb (2013-2023)')
    plt.xlabel('Longitude')
    plt.ylabel('Latitude')

    # Show the plot
    plt.grid(True)
    plt.show()
else:
    print(f"File {output_file} does not exist. Please check the path or download step.")


#### Download weather station data

In [None]:
import requests
from bs4 import BeautifulSoup
from pathlib import Path

def download_csv_files(url, folder_path):
    # Local directory where files will be saved using pathlib
    save_dir = Path(folder_path)
    save_dir.mkdir(parents=True, exist_ok=True) 

    # Get the webpage content
    response = requests.get(url)
    if response.status_code != 200:
        print(f"Failed to connect to {url}")
        return

    # Parse the webpage content
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find all the CSV file links
    for link in soup.find_all('a'):
        file_name = link.get('href')
        if file_name.endswith('.csv'):
            file_url = url + file_name

            # Download the CSV file
            print(f"Downloading {file_name}...")
            csv_response = requests.get(file_url)

            # Save the CSV file using pathlib
            file_path = save_dir / file_name
            file_path.write_bytes(csv_response.content)

    print("All files downloaded!")

# Example usage
url = "https://dd.weather.gc.ca/climate/observations/hourly/csv/AB/"
download_folder = r"C:\Users\Sunbeam\Downloads\csv_files"
download_csv_files(url, download_folder)


In [4]:
import arcpy
import re
import pandas as pd
from pathlib import Path

arcpy.env.overwriteOutput = True

def filter_weather_data(csv_dir, months, shp_polygon, weather_param=None):
    """
    Filters weather station data from CSV files for the specified months, creates one point per station,
    and populates the point attributes with weather data for each record. Only CSV files with stations 
    within the bounding box (BBox) are processed.

    Parameters:
    - csv_dir: Directory containing the CSV files.
    - months: List of months to filter data (e.g., [1, 2, 3] for Jan, Feb, Mar).
    - shp_polygon: Shapefile containing the bounding box (BBox) for spatial filtering.
    - weather_param: List of weather parameters to include (e.g., ['Temp (°C)', 'Wind Dir (10s deg)']).
    """
    
    default_columns = ['longitude (x)', 'latitude (y)', 'station name', 'climate id', 'date/time (lst)', 'year', 'month', 'day', 'time (lst)']
    weather_param = weather_param or []
    original_weather_param = [param.lower() for param in weather_param]
    cleaned_weather_param = [re.sub(r'[^a-zA-Z]', '', param.lower())[:10] for param in weather_param]
    selected_columns = default_columns + original_weather_param

    with arcpy.da.SearchCursor(shp_polygon, ["SHAPE@"]) as cursor:
        bbox_polygon = next(cursor)[0]

    csv_files = list(Path(csv_dir).glob("*.csv"))
    station_dict, lat_long_id_dict, all_data = {}, {}, []

    for csv_file in csv_files:
        try:
            weather_data = pd.read_csv(csv_file, encoding='utf-8', on_bad_lines='skip', engine='python')
        except UnicodeDecodeError:
            weather_data = pd.read_csv(csv_file, encoding='ISO-8859-1', on_bad_lines='skip', engine='python')

        weather_data.columns = weather_data.columns.str.strip().str.lower()
        if 'station name' not in weather_data.columns: continue

        if 'date/time (lst)' in weather_data.columns:
            weather_data['date/time (lst)'] = pd.to_datetime(weather_data['date/time (lst)'], errors='coerce')
            weather_data = weather_data[weather_data['date/time (lst)'].dt.month.isin(months)]
        if weather_data.empty: continue

        longitude_col, latitude_col = [col for col in weather_data.columns if 'longitude' in col][0], [col for col in weather_data.columns if 'latitude' in col][0]
        weather_data['station_point'] = weather_data.apply(
            lambda row: arcpy.PointGeometry(arcpy.Point(row[longitude_col], row[latitude_col]), arcpy.SpatialReference(4326)), axis=1
        )
        weather_data = weather_data[weather_data['station_point'].apply(lambda pt: pt.within(bbox_polygon))]

        if weather_data.empty: continue

        for _, row in weather_data.iterrows():
            station_name, climate_id = row['station name'], row['climate id']
            lat_long_id_key = (row[longitude_col], row[latitude_col], climate_id)

            if lat_long_id_key not in lat_long_id_dict:
                lat_long_id_dict[lat_long_id_key] = {'station_name': station_name, 'climate_id': climate_id, 'location': row['station_point']}
                station_dict.setdefault((station_name, climate_id), {'location': row['station_point'], 'climate_id': climate_id, 'data': pd.DataFrame()})

        all_data.append(weather_data[selected_columns])

    print(f"Total unique stations within the bounding box: {len(lat_long_id_dict)} stations")

    # Print year-wise data for each unique station
    for (station_name, climate_id), station_info in station_dict.items():
        station_data = pd.concat([data for data in all_data if data['station name'].eq(station_name).any()])
        station_data['year'] = station_data['date/time (lst)'].dt.year
        year_counts = station_data.groupby('year').size()
        year_info = ', '.join([f"{year} ({count})" for year, count in year_counts.items()])
        print(f"{station_name} (ID: {climate_id}) - {year_info}")

    if lat_long_id_dict:
        shapefile_name = Path(shp_polygon).parent / f"filtered_stations_{Path(shp_polygon).stem}_{'_'.join(map(str, months))}.shp"
        point_features = [info['location'] for info in lat_long_id_dict.values()]
        arcpy.CopyFeatures_management(point_features, str(shapefile_name))

        arcpy.management.AddFields(str(shapefile_name), [["station_n", "TEXT"], ["climate_id", "TEXT"]] + [[field, "TEXT"] for field in cleaned_weather_param])

        with arcpy.da.UpdateCursor(str(shapefile_name), ["station_n", "climate_id"] + cleaned_weather_param) as cursor:
            for i, row in enumerate(cursor):
                station_info = list(lat_long_id_dict.values())[i]
                row[0], row[1] = station_info['station_name'], station_info['climate_id']
                cursor.updateRow(row)

        print(f"Filtered shapefile saved at: {shapefile_name}")

    merged_csv_path = Path(shp_polygon).parent / f"merged_weather_data_{Path(shp_polygon).stem}_{'_'.join(map(str, months))}.csv"
    merged_data = pd.concat(all_data, ignore_index=True)
    merged_data.rename(columns={original: cleaned for original, cleaned in zip(original_weather_param, cleaned_weather_param)}, inplace=True)
    merged_data.to_csv(merged_csv_path, index=False)
    print(f"Merged CSV file saved at: {merged_csv_path}")

csv_dir = r"D:\UCalgary_Lectures\GEOG_683\Data_workspace\Station_Data_csv"
months = [1]
# shp_polygon = r"D:\UCalgary_Lectures\GEOG_683\Data_workspace\BBox\BBox_small.shp"
shp_polygon = r"D:\UCalgary_Lectures\GEOG_683\Data_workspace\BBox\BBox_large.shp"
weather_param = ['Temp (°C)', 'Wind Dir (10s deg)']

filter_weather_data(csv_dir, months, shp_polygon, weather_param)


Total unique stations within the bounding box: 19 stations
CORONATION A (ID: 3011880) - 1953 (744), 1954 (744), 1955 (744), 1956 (744), 1957 (744), 1958 (744), 1959 (744), 1960 (744), 1961 (744), 1962 (744), 1963 (744), 1964 (744), 1965 (744), 1966 (744), 1967 (744), 1968 (744), 1969 (744), 1970 (744), 1971 (744), 1972 (744), 1973 (744), 1974 (744), 1975 (744), 1976 (744), 1977 (744), 1978 (739), 1979 (739), 1980 (739), 1981 (739), 1982 (739), 1983 (744), 1984 (744), 1985 (744), 1986 (744), 1987 (744), 1988 (744), 1989 (744), 1990 (744), 1991 (744), 1992 (744), 1993 (744), 1994 (744)
CORONATION (AUT) (ID: 3011885) - 1995 (744), 1996 (744), 1997 (744), 1998 (744), 1999 (744), 2000 (744), 2001 (744), 2002 (744), 2003 (744)
CORONATION CLIMATE (ID: 3011887) - 2004 (744), 2005 (744), 2006 (744), 2007 (744), 2008 (744), 2009 (744), 2010 (744), 2011 (744), 2012 (744), 2013 (744), 2014 (744), 2015 (744), 2016 (744), 2017 (744), 2018 (744), 2019 (744), 2020 (744), 2021 (744), 2022 (744), 2023 (