### Process missing weather station data

#### Fill missing ECCC station data with 3-hour moving average

In [None]:
import pandas as pd
from pathlib import Path

def eccc_fill_wind_dir_gaps(input_csv):
    """
    Reads wind direction data from a CSV file, fills gaps in 'winddirsde' using a 3-hour circular moving 
    average per climate ID and year without altering existing non-missing values, and saves the filled data 
    to a new CSV file.

    Parameters:
    csv_file (str or Path): Path to the input CSV file containing 'climate id', 'year', and 'winddirsde' columns.
    """
    data = pd.read_csv(input_csv, low_memory=False)

    if not all(col in data.columns for col in ['climate id', 'year', 'winddirsde']):
        raise ValueError("The input CSV file must contain 'climate id', 'year', and 'winddirsde' columns.")

    data['winddirsde'] = data.groupby(['climate id', 'year'])['winddirsde'].transform(
        lambda group: group.interpolate(method='nearest', limit_direction='both')
    )

    output_file = Path(input_csv).with_name(f"{Path(input_csv).stem}_filled.csv")
    data.to_csv(output_file, index=False, float_format='%.2f')
    print(f"Filled data saved to {output_file}")

# Usage example
csv_file = r"D:\UCalgary_Lectures\GEOG_683\Data_workspace\BBox\ECCC_station_data_BBox_small_1_2_3_4_5_6_7_8_9_10_11_12.csv"
eccc_fill_wind_dir_gaps(csv_file)


Filled data saved to D:\UCalgary_Lectures\GEOG_683\Data_workspace\BBox\ECCC_station_data_BBox_small_1_2_3_4_5_6_7_8_9_10_11_12_filled.csv


#### Fill missing ACIS station data with interpolation

In [None]:
import pandas as pd
from pathlib import Path

def acis_fill_wind_dir_gaps(input_csv):
    """
    Reads wind direction data from an ACIS CSV file, fills gaps in 'winddir' using nearest-neighbor interpolation
    grouped by 'station name' and 'year', and saves the filled data to a new CSV file.

    Parameters:
    input_csv (str or Path): Path to the input CSV file containing 'station name', 'datetime', and 'winddir' columns.
    """
    data = pd.read_csv(input_csv, low_memory=False)
    if not all(col in data.columns for col in ['station name', 'datetime', 'winddir']):
        raise ValueError("The input CSV file must contain 'station name', 'datetime', and 'winddir' columns.")

    data['datetime'] = pd.to_datetime(data['datetime'])
    data['year'] = data['datetime'].dt.year

    data['winddir'] = data.groupby(['station name', 'year'])['winddir'].transform(
        lambda group: group.interpolate(method='nearest', limit_direction='both')
    )

    data.drop(columns=['year'], inplace=True)
    output_file = Path(input_csv).with_name(f"{Path(input_csv).stem}_filled.csv")
    data.to_csv(output_file, index=False)
    print(f"Filled data saved to {output_file}")

# Usage example
csv_file = r"D:\UCalgary_Lectures\GEOG_683\Data_workspace\ACIS_Data\ACIS_station_data.csv"
acis_fill_wind_dir_gaps(csv_file)


Filled data saved to D:\UCalgary_Lectures\GEOG_683\Data_workspace\ACIS_Data\ACIS_station_data_filled.csv
