### Importing Libraries

In [1]:
import xarray as xr
import pandas as pd
from tqdm import tqdm

### Loading, Subsetting, and Saving Data

In [2]:
# Define paths and spatial/temporal bounds
wind_data_path = "Data/Wind/data.nc"
OUTPUT_FILE = "Data/Processed_Wind_Data.nc"
LON_MIN = 13.9
LON_MAX = 14.81
LAT_MIN = 35.6
LAT_MAX = 36.3
START_DATE = '2021-01-01'
END_DATE = '2023-11-12'

# Open the wind data file
wind_ds = xr.open_dataset(wind_data_path)

# Ensure latitude is correctly oriented (ascending order for selection)
if wind_ds.latitude[0] > wind_ds.latitude[-1]:
    wind_ds = wind_ds.reindex(latitude=list(reversed(wind_ds.latitude)))

# Create a date range for the expected time points
date_range = pd.date_range(start=START_DATE, end=END_DATE, freq='12H')

# Initialize an empty list to store interpolated datasets
interpolated_datasets = []

for target_time in tqdm(date_range, desc="Interpolating Wind Data"):
    # Find the closest time in the dataset to the target_time
    closest_time = min(wind_ds.time, key=lambda x: abs(pd.to_datetime(x.values) - target_time))
    
    # Select the data slice closest in time
    ds_slice = wind_ds.sel(time=closest_time)
    
    # Assign the target_time to this slice to align with the interpolation grid
    ds_slice = ds_slice.assign_coords(time=target_time)
    
    interpolated_datasets.append(ds_slice)

# Concatenate all the time slices into a single dataset
interpolated_ds = xr.concat(interpolated_datasets, dim='time')

# Subset the dataset to the specified spatial bounds
wind_ds_subset = interpolated_ds.sel(
    longitude=slice(LON_MIN, LON_MAX),
    latitude=slice(LAT_MIN, LAT_MAX),
)

# Interpolate to fill any missing time points in the final dataset
wind_ds_final = wind_ds_subset.interp(time=date_range)

# Save the final interpolated and subset dataset to a new file
wind_ds_final.to_netcdf(OUTPUT_FILE)
print(f"Processed and interpolated wind data saved to {OUTPUT_FILE}")
print("="*125)
print(f"Wind data from {START_DATE} to {END_DATE} within the specified spatial bounds has been processed and interpolated.")
print("="*125)

# Close the original dataset to free resources
wind_ds.close()

Interpolating Wind Data: 100%|██████████| 2091/2091 [29:33<00:00,  1.18it/s]


Processed and interpolated wind data saved to Data/Processed_Wind_Data.nc
Wind data from 2021-01-01 to 2023-11-12 within the specified spatial bounds has been processed and interpolated.


### Ensuring that the merged file is correct

In [9]:
# Open the processed wind dataset
ds = xr.open_dataset(OUTPUT_FILE)

# Calculate the expected number of time points
date_range = pd.date_range(start=START_DATE, end=END_DATE, freq='12H')
expected_time_points = len(date_range)

# Print section separator
print("=" * 125)
print("Wind Dataset Information")
print("=" * 125)

# Now, print the structure and check the dataset
print("\nDataset Dimensions:")
print(ds.dims)
print("\nDataset Coordinates:")
print(ds.coords)
print("\nData Variables in the Dataset:")
print(ds.data_vars)
print("\nAttributes (Metadata) in the Dataset:")
print(ds.attrs)

# Check that the time dimension is as expected
actual_time_points = ds.sizes['time']
print("\nExpected Time Points:", expected_time_points)
print("Actual Time Points:", actual_time_points)

# Check that the lat/lon are within the specified bounds
lat_min, lat_max = ds['latitude'].min().values, ds['latitude'].max().values
lon_min, lon_max = ds['longitude'].min().values, ds['longitude'].max().values
print("\nLatitude Range in the Dataset:", lat_min, "to", lat_max)
print("Longitude Range in the Dataset:", lon_min, "to", lon_max)

# Check for expected variables
assert 'u10' in ds.variables, "u10 variable is missing from the dataset"
assert 'v10' in ds.variables, "v10 variable is missing from the dataset"

print("")
print("=" * 125)

# Close the dataset after inspection
ds.close()

Wind Dataset Information

Dataset Dimensions:

Dataset Coordinates:
Coordinates:
  * longitude  (longitude) float32 14.1 14.5
  * latitude   (latitude) float32 35.6 36.0
  * time       (time) datetime64[ns] 2021-01-01 ... 2023-11-12

Data Variables in the Dataset:
Data variables:
    v10      (time, latitude, longitude) float64 ...
    u10      (time, latitude, longitude) float64 ...

Attributes (Metadata) in the Dataset:
{'Conventions': 'CF-1.6', 'history': '2023-12-17 08:43:38 GMT by grib_to_netcdf-2.25.1: /opt/ecmwf/mars-client/bin/grib_to_netcdf.bin -S param -o /cache/tmp/77d2d03f-e095-4d90-b83e-999c43b3595c-adaptor.mars_constrained.external-1702802610.7212312-29905-15-tmp.nc /cache/tmp/77d2d03f-e095-4d90-b83e-999c43b3595c-adaptor.mars_constrained.external-1702802608.214588-29905-14-tmp.grib'}

Expected Time Points: 2091
Actual Time Points: 2091

Latitude Range in the Dataset: 35.599998474121094 to 36.0
Longitude Range in the Dataset: 14.100000381469727 to 14.5

