In [1]:
# test_weather_filter.ipynb

import xarray as xr
import pandas as pd
import numpy as np
import sys
import os


sys.path.append(os.path.abspath(".."))
from scripts.weather_filter import load_weather_types_csv, filter_by_weather_types

from scripts.ensemble_loader import load_ensemble_files
from scripts.subsetting import subset_by_lat_lon, subset_time
from scripts.utils import prepare_ensemble_grid, prepare_reference_grid
import xarray as xr

# --- Example File Pattern ---
# Adjust the file pattern to match your ensemble file locations.
ensemble_pattern = os.path.join("../data", "total_precipitation_2017*.nc")
chunks = {'time': 1, 'lat': 100, 'lon': 100}  # Example chunking for large files

# --- Load the ensemble files ---
ds_ensemble = load_ensemble_files(ensemble_pattern, chunks=None)



# 2) Load the CSV as an xarray DataArray
wt_da = load_weather_types_csv("../data/ERA-5_historical.csv", date_col="date", wt_col="slwt")
print("Weather Type DataArray:\n", wt_da)




ds_filtered = filter_by_weather_types(ds_ensemble, wt_da, include_types=[6])
print("Filtered Dataset (wt in [1,3]):\n", ds_filtered)


# 5) Check time dimension
print("Times in filtered dataset:", ds_filtered.time.values)


Weather Type DataArray:
 <xarray.DataArray 'weather_type' (time: 23741)> Size: 190kB
array([6, 3, 2, ..., 8, 8, 8], shape=(23741,))
Coordinates:
  * time     (time) datetime64[ns] 190kB 1958-01-01T11:00:00 ... 2022-12-31T1...
Filtered Dataset (wt in [1,3]):
 <xarray.Dataset> Size: 2GB
Dimensions:        (time: 48, lat: 492, lon: 594, member: 10)
Coordinates:
  * member         (member) object 80B '00' '01' '02' '03' ... '07' '08' '09'
  * time           (time) datetime64[ns] 384B 2017-10-01 ... 2017-10-02T23:00:00
Dimensions without coordinates: lat, lon
Data variables:
    latitude       (time, lat, lon, member) float32 561MB 42.98 42.98 ... 51.82
    longitude      (time, lat, lon, member) float32 561MB 5.498 5.498 ... 22.1
    precipitation  (time, lat, lon, member) float64 1GB 0.0 0.0 0.0 ... 0.0 0.0
Times in filtered dataset: ['2017-10-01T00:00:00.000000000' '2017-10-01T01:00:00.000000000'
 '2017-10-01T02:00:00.000000000' '2017-10-01T03:00:00.000000000'
 '2017-10-01T04:00:00.00000

In [3]:
from scripts.weather_filter import load_weather_types_csv, filter_by_weather_types
from scripts.ensemble_loader import load_ensemble_files

ensemble_pattern = "../data/total_precipitation_2017*.nc"
ds_ensemble = load_ensemble_files(ensemble_pattern)

wt_da = load_weather_types_csv(
    "../data/ERA-5_historical.csv",
    date_col="date",  # The column name in CSV
    wt_col="slwt"     # The column with numeric weather-type ID
)
print("Loaded weather types:\n", wt_da)

# Filter by weather type = [6]
ds_filtered = filter_by_weather_types(ds_ensemble, wt_da, include_types=[3])
print("Filtered dataset:\n", ds_filtered)
print("Times in filtered dataset:", ds_filtered.time.values)


Loaded weather types:
 <xarray.DataArray 'weather_type' (time: 23741)> Size: 190kB
array([6, 3, 2, ..., 8, 8, 8], shape=(23741,))
Coordinates:
  * time     (time) datetime64[ns] 190kB 1958-01-01T11:00:00 ... 2022-12-31T1...
Filtered dataset:
 <xarray.Dataset> Size: 80B
Dimensions:        (time: 0, lat: 492, lon: 594, member: 10)
Coordinates:
  * member         (member) object 80B '00' '01' '02' '03' ... '07' '08' '09'
  * time           (time) datetime64[ns] 0B 
Dimensions without coordinates: lat, lon
Data variables:
    latitude       (time, lat, lon, member) float32 0B 
    longitude      (time, lat, lon, member) float32 0B 
    precipitation  (time, lat, lon, member) float64 0B 
Times in filtered dataset: []
