In [2]:
import xarray as xr
import glob
import datetime

import geopandas as gpd
import matplotlib.pyplot as plt
from shapely.geometry import Point
import numpy as np

from google.cloud import storage
import gcsfs
import zarr

import xskillscore as xs
import pandas as pd

import os

import re

In [7]:
print(meso_0_24['time'])
print(meso_24_48['time'])
print(meso_48_72['time'])
print(meso_72_96)

<xarray.DataArray 'time' (time: 52560)> Size: 420kB
array(['2022-01-01T00:10:00.000000000', '2022-01-01T00:20:00.000000000',
       '2022-01-01T00:30:00.000000000', ..., '2022-12-31T23:40:00.000000000',
       '2022-12-31T23:50:00.000000000', '2023-01-01T00:00:00.000000000'],
      dtype='datetime64[ns]')
Coordinates:
  * time     (time) datetime64[ns] 420kB 2022-01-01T00:10:00 ... 2023-01-01
<xarray.DataArray 'time' (time: 52560)> Size: 420kB
array(['2022-01-02T00:10:00.000000000', '2022-01-02T00:20:00.000000000',
       '2022-01-02T00:30:00.000000000', ..., '2023-01-01T23:40:00.000000000',
       '2023-01-01T23:50:00.000000000', '2023-01-02T00:00:00.000000000'],
      dtype='datetime64[ns]')
Coordinates:
  * time     (time) datetime64[ns] 420kB 2022-01-02T00:10:00 ... 2023-01-02
<xarray.DataArray 'time' (time: 52560)> Size: 420kB
array(['2022-01-03T00:10:00.000000000', '2022-01-03T00:20:00.000000000',
       '2022-01-03T00:30:00.000000000', ..., '2023-01-02T23:40:00.000000000',
     

### 10 min intervals

In [206]:
def transform_dataset(meso, start_hour, end_hour):
    # Convert 10 minutes to hours (since rain intensity is in m/s)
    time_delta_hours = 10 / 60  # 10 minutes = 1/6 hours

    # Define the rolling window size, which corresponds to 6 hours
    rolling_window_size = int(6 / time_delta_hours)  # 36 time steps

    # Calculate the rolling sum over the last 6 hours and convert to mm
    total_precipitation_6hr_mm = (
        meso['prate']
        .rolling(time=rolling_window_size, center=True)
        .sum() * time_delta_hours * 3600 * 1000  # m/s to mm
    )

    # Add this as a new variable in the dataset
    meso['total_precipitation_6hr_meso'] = total_precipitation_6hr_mm

    # Define the date range based on your dataset's time dimension
    date_range = pd.date_range(start='2022-01-01', end='2023-01-01', freq='D')[:-1]

    # Extract the time values for each day (144 points per day, 10-minute intervals)
    time_points = pd.date_range('2022-01-01 00:00', periods=144, freq='10min')

    # Transform each variable with a time dimension
    data_vars = {
        var_name: (
            ('date', 'time') + var_data.dims[1:], 
            var_data.values.reshape((len(date_range), len(time_points)) + var_data.shape[1:])
        )
        if 'time' in var_data.dims else var_data
        for var_name, var_data in meso.data_vars.items()
    }

    # Create the new dataset with date and time dimensions
    meso = xr.Dataset(
        data_vars=data_vars,
        coords={
            'date': date_range,
            'time': time_points,
            'height_above_ground_level': meso.coords['height_above_ground_level'],
            'station': meso['id'].data,
        }
    )

    # Drop unnecessary variables and coordinates
    meso = meso.drop_vars(['id', 'xf', 'yf', 'i', 'j', 'height_above_base_level'])

    # Slice for hourly values (every hour)
    meso = meso.sel(time=meso['time'].dt.minute == 0)

    # Rename 'time' to 'prediction_timedelta' and convert to timedelta64
    meso = meso.rename({'time': 'prediction_timedelta'})
    prediction_timedelta = (
        meso['prediction_timedelta'].astype('datetime64[ns]')
        .astype('timedelta64[ns]') % np.timedelta64(1, 'D')
    )

    # Adjust prediction_timedelta for start and end hours
    prediction_timedelta += np.timedelta64(start_hour, 'h')

    # Assign the adjusted prediction_timedelta back to the dataset
    meso = meso.assign_coords(prediction_timedelta=prediction_timedelta)

    # Rename 'date' to 'time'
    meso = meso.rename({'date': 'time'})

    # Slice prediction_timedelta for every 6 hours within the specified range
    meso = meso.sel(prediction_timedelta=meso['prediction_timedelta'].isin(
        [np.timedelta64(i, 'h') for i in range(start_hour, end_hour + 1, 6)]
    ))

    # Slice for start date 2022-01-12
    meso = meso.sel(time=slice('2022-01-12', None))

    # Extract 2m temperature and 10m wind components
    meso['2m_temperature_meso'] = meso['TC'].sel(height_above_ground_level=2)
    meso['10m_u_component_of_wind_meso'] = meso['u'].sel(height_above_ground_level=10)
    meso['10m_v_component_of_wind_meso'] = meso['v'].sel(height_above_ground_level=10)

#     # Convert surface level pressure to mean sea level pressure and convert to hPa
#     meso['mean_sea_level_pressure_meso'] = (
#     (meso['ps'] / 100) + 
#     meso['height_above_sea_level'].sel(height_above_ground_level=2) - 
#     2 * 9.81 / 1000 * meso['height_above_sea_level'].sel(height_above_ground_level=2)
# )

    meso['mean_sea_level_pressure_meso'] = meso['ps']

    # Drop original variables and unnecessary coordinates
    meso = meso.drop_vars(['TC', 'u', 'v', 'ps', 'M', 'phi', 'prate', 'height_above_ground_level'])

    # Slice for all prediction_timedelta values larger than 0
    meso = meso.sel(prediction_timedelta=meso['prediction_timedelta'] > np.timedelta64(0, 'h'))

    return meso

def add_96_hour_lead_time(meso):
    # Identify the last time step (T23:50) in the dataset
    last_time_step = meso.isel(prediction_timedelta=-1)

    # Duplicate the last time step to create a new time step for T24:00
    new_time_step = last_time_step.copy(deep=True)
    new_time_step = new_time_step.assign_coords(prediction_timedelta=np.timedelta64(96, 'h'))

    # Concatenate the new time step to the existing dataset
    meso_extended = xr.concat([meso, new_time_step], dim='prediction_timedelta')

    return meso_extended

# Load the datasets
meso_0_24 = xr.open_dataset('/net/shared/student-projects/koenr/mesograsp_2022/extractorOutTFMetmast.000_leadtime-00-24.nc')
meso_24_48 = xr.open_dataset('/net/shared/student-projects/koenr/mesograsp_2022/extractorOutTFMetmast.000_leadtime-24-48.nc')
meso_48_72 = xr.open_dataset('/net/shared/student-projects/koenr/mesograsp_2022/extractorOutTFMetmast.000_leadtime-48-72.nc')
meso_72_96 = xr.open_dataset('/net/shared/student-projects/koenr/mesograsp_2022/extractorOutTFMetmast.000_leadtime-72-96.nc')

# Apply transformations
meso_0_24_transformed = transform_dataset(meso_0_24, start_hour=0, end_hour=24)
meso_24_48_transformed = transform_dataset(meso_24_48, start_hour=24, end_hour=48)
meso_48_72_transformed = transform_dataset(meso_48_72, start_hour=48, end_hour=72)
meso_72_96_transformed = transform_dataset(meso_72_96, start_hour=72, end_hour=96)

# Add the missing 96-hour lead time to the 72-96 dataset
meso_72_96_extended = add_96_hour_lead_time(meso_72_96_transformed)

# Concatenate the datasets along the prediction_timedelta dimension
meso_combined = xr.concat([
    meso_0_24_transformed, 
    meso_24_48_transformed, 
    meso_48_72_transformed, 
    meso_72_96_extended
], dim='prediction_timedelta')

# Ensure 'station' is correctly aligned with 'index'
if 'index' in meso_combined.coords and 'station' in meso_combined.coords:
    meso_combined = meso_combined.assign_coords(station=('index', meso_combined['station'].values))

# Now swap 'index' with 'station' and drop 'index'
meso_combined = meso_combined.swap_dims({'index': 'station'}).drop_vars('index')

meso_combined = meso_combined.drop_sel(station='06252')

In [207]:
meso_combined

### hourly averaged

In [3]:
def transform_dataset(meso, start_hour, end_hour):
    # Convert 10 minutes to hours (since rain intensity is in m/s)
    time_delta_hours = 10 / 60  # 10 minutes = 1/6 hours

    # Define the rolling window size, which corresponds to 6 hours
    rolling_window_size = int(6 / time_delta_hours)  # 36 time steps

    # Calculate the rolling sum over the last 6 hours and convert to mm
    total_precipitation_6hr_mm = (
        meso['prate']
        .rolling(time=rolling_window_size, center=True)
        .sum() * time_delta_hours * 3600 * 1000  # m/s to mm
    )

    # Add this as a new variable in the dataset
    meso['total_precipitation_6hr_meso'] = total_precipitation_6hr_mm

    # Define the rolling window size for the last hour (6 time steps)
    rolling_window_size_hour = 6

    # Compute rolling mean over the last hour for 2m temperature
    meso['2m_temperature_meso'] = (
        meso['TC']
        .sel(height_above_ground_level=2)
        .rolling(time=rolling_window_size_hour, center=True)
        .mean()
    )

    # Compute rolling mean over the last hour for 10m u component of wind
    meso['10m_u_component_of_wind_meso'] = (
        meso['u']
        .sel(height_above_ground_level=10)
        .rolling(time=rolling_window_size_hour, center=True)
        .mean()
    )

    # Compute rolling mean over the last hour for 10m v component of wind
    meso['10m_v_component_of_wind_meso'] = (
        meso['v']
        .sel(height_above_ground_level=10)
        .rolling(time=rolling_window_size_hour, center=True)
        .mean()
    )

    # Compute rolling mean over the last hour for mean sea level pressure
    meso['mean_sea_level_pressure_meso'] = (
        meso['ps']
        .rolling(time=rolling_window_size_hour, center=True)
        .mean()
    )

    # Define the date range based on your dataset's time dimension
    date_range = pd.date_range(start='2022-01-01', end='2023-01-01', freq='D')[:-1]

    # Extract the time values for each day (144 points per day, 10-minute intervals)
    time_points = pd.date_range('2022-01-01 00:00', periods=144, freq='10min')

    # Transform each variable with a time dimension
    data_vars = {
        var_name: (
            ('date', 'time') + var_data.dims[1:],
            var_data.values.reshape((len(date_range), len(time_points)) + var_data.shape[1:])
        )
        if 'time' in var_data.dims else var_data
        for var_name, var_data in meso.data_vars.items()
    }

    # Create the new dataset with date and time dimensions
    meso = xr.Dataset(
        data_vars=data_vars,
        coords={
            'date': date_range,
            'time': time_points,
            'height_above_ground_level': meso.coords['height_above_ground_level'],
            'station': meso['id'].data,
        }
    )

    # Drop unnecessary variables and coordinates
    meso = meso.drop_vars([
        'id', 'xf', 'yf', 'i', 'j', 'height_above_base_level',
        'TC', 'u', 'v', 'ps', 'M', 'phi', 'prate', 'height_above_ground_level'
    ])

    # Slice for hourly values (every hour)
    meso = meso.sel(time=meso['time'].dt.minute == 0)

    # Rename 'time' to 'prediction_timedelta' and convert to timedelta64
    meso = meso.rename({'time': 'prediction_timedelta'})
    prediction_timedelta = (
        meso['prediction_timedelta'].astype('datetime64[ns]')
        .astype('timedelta64[ns]') % np.timedelta64(1, 'D')
    )

    # Adjust prediction_timedelta for start and end hours
    prediction_timedelta += np.timedelta64(start_hour, 'h')

    # Assign the adjusted prediction_timedelta back to the dataset
    meso = meso.assign_coords(prediction_timedelta=prediction_timedelta)

    # Rename 'date' to 'time'
    meso = meso.rename({'date': 'time'})

    # Slice prediction_timedelta for every 6 hours within the specified range
    meso = meso.sel(prediction_timedelta=meso['prediction_timedelta'].isin(
        [np.timedelta64(i, 'h') for i in range(start_hour, end_hour + 1, 6)]
    ))

    # Slice for start date 2022-01-12
    meso = meso.sel(time=slice('2022-01-12', None))

    # Slice for all prediction_timedelta values larger than 0
    meso = meso.sel(prediction_timedelta=meso['prediction_timedelta'] > np.timedelta64(0, 'h'))

    return meso

# Load the datasets
meso_0_24 = xr.open_dataset('/net/shared/student-projects/koenr/mesograsp_2022/extractorOutTFMetmast.000_leadtime-00-24.nc')
meso_24_48 = xr.open_dataset('/net/shared/student-projects/koenr/mesograsp_2022/extractorOutTFMetmast.000_leadtime-24-48.nc')
meso_48_72 = xr.open_dataset('/net/shared/student-projects/koenr/mesograsp_2022/extractorOutTFMetmast.000_leadtime-48-72.nc')
meso_72_96 = xr.open_dataset('/net/shared/student-projects/koenr/mesograsp_2022/extractorOutTFMetmast.000_leadtime-72-96.nc')

# Apply transformations
meso_0_24_transformed = transform_dataset(meso_0_24, start_hour=0, end_hour=24)
meso_24_48_transformed = transform_dataset(meso_24_48, start_hour=24, end_hour=48)
meso_48_72_transformed = transform_dataset(meso_48_72, start_hour=48, end_hour=72)
meso_72_96_transformed = transform_dataset(meso_72_96, start_hour=72, end_hour=96)

# Concatenate the datasets along the prediction_timedelta dimension
meso_combined = xr.concat([
    meso_0_24_transformed, 
    meso_24_48_transformed, 
    meso_48_72_transformed, 
    meso_72_96_transformed
], dim='prediction_timedelta')

# Ensure 'station' is correctly aligned with 'index'
if 'index' in meso_combined.coords and 'station' in meso_combined.coords:
    meso_combined = meso_combined.assign_coords(station=('index', meso_combined['station'].values))

# Now swap 'index' with 'station' and drop 'index'
meso_combined = meso_combined.swap_dims({'index': 'station'}).drop_vars('index')

# Drop station '06252'
meso_combined = meso_combined.drop_sel(station='06252')

meso_combined

In [99]:
def add_96_hour_prediction(meso_72_96, meso_combined):
    # Define the rolling window size for the last hour (6 time steps = 1 hour)
    rolling_window_size_hour = 6
    
    # Extract the last hour of each day from meso_72_96
    last_hour = meso_72_96.sel(time=meso_72_96['time'].dt.hour == 23)
    
    # Apply time-averaging for relevant variables over the last hour of each day
    last_hour_transformed = xr.Dataset({
        'total_precipitation_6hr_meso': last_hour['total_precipitation_6hr_meso']
            .rolling(time=rolling_window_size_hour, center=True).mean(),
        '2m_temperature_meso': last_hour['TC']
            .sel(height_above_ground_level=2).rolling(time=rolling_window_size_hour, center=True).mean(),
        '10m_u_component_of_wind_meso': last_hour['u']
            .sel(height_above_ground_level=10).rolling(time=rolling_window_size_hour, center=True).mean(),
        '10m_v_component_of_wind_meso': last_hour['v']
            .sel(height_above_ground_level=10).rolling(time=rolling_window_size_hour, center=True).mean(),
        'mean_sea_level_pressure_meso': last_hour['ps']
            .rolling(time=rolling_window_size_hour, center=True).mean(),
    })

    # Create a new prediction_timedelta for the last hour (96 hours)
    prediction_timedelta_96 = np.timedelta64(96, 'h')
    
    # Assign the new prediction_timedelta coordinate to the transformed last hour data
    last_hour_transformed = last_hour_transformed.expand_dims(dim={'prediction_timedelta': [prediction_timedelta_96]})
    
    # Align and ensure dimensions/coordinates match before concatenation
    if 'height_above_ground_level' in meso_combined.dims and 'height_above_ground_level' in last_hour_transformed.dims:
        last_hour_transformed = last_hour_transformed.drop_vars('height_above_ground_level')
    
    # Concatenate the transformed last hour to meso_combined along the prediction_timedelta dimension
    meso_combined_updated = xr.concat([meso_combined, last_hour_transformed], dim='prediction_timedelta', compat='override')
    
    return meso_combined_updated

# Now use the function to add the 96-hour prediction to meso_combined
meso_combined_final = add_96_hour_prediction(meso_72_96, meso_combined)

print(meso_combined_final)


MergeError: conflicting values for variable 'height_above_ground_level' on objects to be combined. You can skip this check by specifying compat='override'.

In [69]:
# take the last to values from prediction_timedelta
meso_combined = meso_combined.isel(prediction_timedelta=slice(-2, None))
meso_combined

In [4]:
# Constants
g = 9.80665  # gravitational acceleration in m/s^2
R = 287.05   # specific gas constant for dry air in J/(kg·K)

# Extract surface pressure
P_surface = meso_combined['mean_sea_level_pressure_meso']

# Convert temperature from Celsius to Kelvin
T_kelvin = meso_combined['2m_temperature_meso'] + 273.15

# Extract the surface height above sea level (assuming height_above_ground_level == 0)
surface_height = meso_combined['height_above_sea_level'].sel(height_above_ground_level=0)

# Calculate mean sea level pressure using the barometric formula
mean_sea_level_pressure = P_surface * np.exp((g * surface_height) / (R * T_kelvin))

# Add the new variable to the dataset
meso_combined['mean_sea_level_pressure_meso'] = mean_sea_level_pressure


# drop height_above_sea_level
meso_combined = meso_combined.drop_vars('height_above_sea_level')

# meso['P0_approx'] = P0_approx
# Final combined dataset

print(meso_combined)

<xarray.Dataset> Size: 5MB
Dimensions:                       (time: 354, prediction_timedelta: 15,
                                   station: 47)
Coordinates:
  * time                          (time) datetime64[ns] 3kB 2022-01-12 ... 20...
  * prediction_timedelta          (prediction_timedelta) timedelta64[ns] 120B ...
  * station                       (station) <U5 940B '06201' '06203' ... '06212'
Data variables:
    total_precipitation_6hr_meso  (time, prediction_timedelta, station) float32 998kB ...
    2m_temperature_meso           (time, prediction_timedelta, station) float32 998kB ...
    10m_u_component_of_wind_meso  (time, prediction_timedelta, station) float32 998kB ...
    10m_v_component_of_wind_meso  (time, prediction_timedelta, station) float32 998kB ...
    mean_sea_level_pressure_meso  (time, prediction_timedelta, station) float32 998kB ...


In [5]:
meso_combined.to_netcdf('meso_2022_centered.nc')

### hourly fucking around

In [94]:
def transform_dataset(meso, start_hour, end_hour):
    # Convert 10 minutes to hours (since rain intensity is in m/s)
    time_delta_hours = 10 / 60  # 10 minutes = 1/6 hours

    # Define the rolling window size, which corresponds to 6 hours
    rolling_window_size = int(6 / time_delta_hours)  # 36 time steps

    # Calculate the rolling sum over the last 6 hours and convert to mm
    total_precipitation_6hr_mm = (
        meso['prate']
        .rolling(time=rolling_window_size, center=True)
        .sum() * time_delta_hours * 3600 * 1000  # m/s to mm
    )

    # Add this as a new variable in the dataset
    meso['total_precipitation_6hr_meso'] = total_precipitation_6hr_mm

    # Define the rolling window size for the last hour (6 time steps)
    rolling_window_size_hour = 6

    # Compute rolling mean over the last hour for 2m temperature
    meso['2m_temperature_meso'] = (
        meso['TC']
        .sel(height_above_ground_level=2)
        .rolling(time=rolling_window_size_hour, center=True)
        .mean()
    )

    # Compute rolling mean over the last hour for 10m u component of wind
    meso['10m_u_component_of_wind_meso'] = (
        meso['u']
        .sel(height_above_ground_level=10)
        .rolling(time=rolling_window_size_hour, center=True)
        .mean()
    )

    # Compute rolling mean over the last hour for 10m v component of wind
    meso['10m_v_component_of_wind_meso'] = (
        meso['v']
        .sel(height_above_ground_level=10)
        .rolling(time=rolling_window_size_hour, center=True)
        .mean()
    )

    # Compute rolling mean over the last hour for mean sea level pressure
    meso['mean_sea_level_pressure_meso'] = (
        meso['ps']
        .rolling(time=rolling_window_size_hour, center=True)
        .mean()
    )

    # Define the date range based on your dataset's time dimension
    date_range = pd.date_range(start='2022-01-01', end='2023-01-01', freq='D')[:-1]

    # Define time points within a day as time deltas (from midnight)
    time_points = pd.timedelta_range(start='0H', periods=144, freq='10min')

    # Transform each variable with a time dimension
    data_vars = {
        var_name: (
            ('date', 'time') + var_data.dims[1:],
            var_data.values.reshape((len(date_range), len(time_points)) + var_data.shape[1:])
        )
        if 'time' in var_data.dims else var_data
        for var_name, var_data in meso.data_vars.items()
    }

    # Create the new dataset with date and time dimensions
    meso = xr.Dataset(
        data_vars=data_vars,
        coords={
            'date': date_range,
            'time': time_points,
            'height_above_ground_level': meso.coords['height_above_ground_level'],
            'station': meso['id'].data,
        }
    )

    # Drop unnecessary variables and coordinates
    meso = meso.drop_vars([
        'id', 'xf', 'yf', 'i', 'j', 'height_above_base_level',
        'TC', 'u', 'v', 'ps', 'M', 'phi', 'prate', 'height_above_ground_level'
    ])

    # Slice for hourly values (every hour)
    meso = meso.sel(time=meso['time'].dt.components.minutes == 0)

    # Create 'datetime' coordinate by combining 'date' and 'time'
    meso = meso.assign_coords(
        datetime=(('date', 'time'),
                  meso['date'].values[:, np.newaxis] + meso['time'].values)
    )

    # Compute 'prediction_timedelta' as time since initial datetime plus 'start_hour'
    initial_datetime = meso['datetime'][0, 0]
    meso = meso.assign_coords(
        prediction_timedelta=(meso['datetime'] - initial_datetime + np.timedelta64(start_hour, 'h'))
    )

    # Flatten the dimensions
    meso = meso.stack(datetime_index=('date', 'time'))

    # Slice prediction_timedelta for every 6 hours within the specified range
    desired_prediction_timedeltas = [np.timedelta64(i, 'h') for i in range(start_hour, end_hour + 1, 6)]
    meso = meso.sel(prediction_timedelta=meso['prediction_timedelta'].isin(desired_prediction_timedeltas))

    # Slice for start date 2022-01-12
    meso = meso.sel(datetime_index=meso['datetime'] >= np.datetime64('2022-01-12'))

    # Drop any prediction_timedelta values less than or equal to 0
    meso = meso.sel(prediction_timedelta=meso['prediction_timedelta'] > np.timedelta64(0, 'h'))

    return meso

# Load the datasets
meso_0_24 = xr.open_dataset('/net/shared/student-projects/koenr/mesograsp_2022/extractorOutTFMetmast.000_leadtime-00-24.nc')
meso_24_48 = xr.open_dataset('/net/shared/student-projects/koenr/mesograsp_2022/extractorOutTFMetmast.000_leadtime-24-48.nc')
meso_48_72 = xr.open_dataset('/net/shared/student-projects/koenr/mesograsp_2022/extractorOutTFMetmast.000_leadtime-48-72.nc')
meso_72_96 = xr.open_dataset('/net/shared/student-projects/koenr/mesograsp_2022/extractorOutTFMetmast.000_leadtime-72-96.nc')

# Apply transformations using the corrected function
meso_0_24_transformed = transform_dataset(meso_0_24, start_hour=0, end_hour=24)
meso_24_48_transformed = transform_dataset(meso_24_48, start_hour=24, end_hour=48)
meso_48_72_transformed = transform_dataset(meso_48_72, start_hour=48, end_hour=72)
meso_72_96_transformed = transform_dataset(meso_72_96, start_hour=72, end_hour=96)

# Concatenate the datasets along the prediction_timedelta dimension
meso_combined = xr.concat([
    meso_0_24_transformed, 
    meso_24_48_transformed, 
    meso_48_72_transformed, 
    meso_72_96_transformed
], dim='prediction_timedelta')

# Ensure 'station' is correctly aligned with 'index'
if 'index' in meso_combined.coords and 'station' in meso_combined.coords:
    meso_combined = meso_combined.assign_coords(station=('index', meso_combined['station'].values))

# Now swap 'index' with 'station' and drop 'index'
meso_combined = meso_combined.swap_dims({'index': 'station'}).drop_vars('index')

# Drop station '06252' if needed
meso_combined = meso_combined.drop_sel(station='06252')

meso_combined

  time_points = pd.timedelta_range(start='0H', periods=144, freq='10min')


AttributeError: 'TimedeltaAccessor' object has no attribute 'components'

In [76]:
meso_combined

In [34]:
meso_combined.to_netcdf('meso_2022_hourly.nc')

In [16]:
meso_ondisk = xr.open_dataset('meso_2022.nc')

meso_ondisk.sel(time='2022-01-12', prediction_timedelta='18h', station='06380').compute()


In [18]:
print(meso_ondisk)

<xarray.Dataset> Size: 5MB
Dimensions:                       (time: 354, prediction_timedelta: 16,
                                   station: 47)
Coordinates:
  * time                          (time) datetime64[ns] 3kB 2022-01-12 ... 20...
  * prediction_timedelta          (prediction_timedelta) timedelta64[ns] 128B ...
  * station                       (station) <U5 940B '06201' '06203' ... '06212'
Data variables:
    total_precipitation_6hr_meso  (time, prediction_timedelta, station) float32 1MB ...
    2m_temperature_meso           (time, prediction_timedelta, station) float32 1MB ...
    10m_u_component_of_wind_meso  (time, prediction_timedelta, station) float32 1MB ...
    10m_v_component_of_wind_meso  (time, prediction_timedelta, station) float32 1MB ...
    mean_sea_level_pressure_meso  (time, prediction_timedelta, station) float32 1MB ...


In [20]:
meso_24_processed.sel(valid_time='2022-01-13T12', station='06203').compute()

In [5]:
meso_24_processed = xr.open_dataset('meso_24_processed.nc')
meso_24 = xr.open_dataset('meso_24.nc')

In [6]:
meso_24

In [39]:
# add the first datapoint of the 24-48 dataset to the 0-24 dataset
meso_0_24_incl = xr.concat([meso_0_24_transformed, meso_24_48_transformed.isel(prediction_timedelta=0)], dim='prediction_timedelta')

# Constants
g = 9.80665  # Acceleration due to gravity (m/s^2)
R = 287.05   # Specific gas constant for dry air (J/(kg·K))

# Extract surface pressure
P_surface = meso_0_24_incl['mean_sea_level_pressure_meso']

# Convert 2m temperature to Kelvin
T_kelvin = meso_0_24_incl['2m_temperature_meso'] + 273.15

# Extract the surface height above sea level (assuming height_above_ground_level == 0)
surface_height = meso_0_24_incl['height_above_sea_level'].sel(height_above_ground_level=0)

# Calculate mean sea level pressure using the barometric formula
mean_sea_level_pressure = P_surface * np.exp((g * surface_height) / (R * T_kelvin))

# Add the new variable to the dataset
meso_0_24_incl['mean_sea_level_pressure_meso'] = mean_sea_level_pressure /100


# drop height_above_sea_level
meso_0_24_incl = meso_0_24_incl.drop_vars('height_above_sea_level')

meso_0_24_incl = meso_0_24_incl.drop_sel(station='06252')
meso_0_24_incl = meso_0_24_incl.drop_sel(index=16)

# string the forecasts toegether along the prediction_timedelta dimension, the new index is time_contd = time + prediction_timedelta
meso_0_24_incl_stacked = meso_0_24_incl.stack(time_contd=('time', 'prediction_timedelta'))	

# create a valid_time dimension by adding the prediction_timedelta to the time dimension
meso_0_24_incl_stacked['valid_time'] = meso_0_24_incl_stacked.time + meso_0_24_incl_stacked.prediction_timedelta

# replace the time_contd index with the valid_time index
meso_0_24_incl_stacked = meso_0_24_incl_stacked.swap_dims({'time_contd': 'valid_time'})

# drop time_contd as a coordinate
meso_0_24_incl_stacked = meso_0_24_incl_stacked.drop_vars('time_contd')


# Ensure 'station' is correctly aligned with 'index'
if 'index' in meso_0_24_incl_stacked.coords and 'station' in meso_0_24_incl_stacked.coords:
    meso_0_24_incl_stacked = meso_0_24_incl_stacked.assign_coords(station=('index', meso_0_24_incl_stacked['station'].values))

# Now swap 'index' with 'station' and drop 'index'
meso_0_24_incl_stacked = meso_0_24_incl_stacked.swap_dims({'index': 'station'}).drop_vars('index')

print(meso_0_24_incl)

meso_0_24_incl_stacked.to_netcdf('meso_24_processed.nc')

<xarray.Dataset> Size: 1MB
Dimensions:                       (time: 354, prediction_timedelta: 4,
                                   index: 47, station: 47)
Coordinates:
  * index                         (index) int32 188B 0 1 2 3 4 ... 44 45 46 47
  * time                          (time) datetime64[ns] 3kB 2022-01-12 ... 20...
  * station                       (station) <U5 940B '06201' '06203' ... '06212'
  * prediction_timedelta          (prediction_timedelta) timedelta64[ns] 32B ...
Data variables:
    total_precipitation_6hr_meso  (time, prediction_timedelta, index) float32 266kB ...
    2m_temperature_meso           (time, prediction_timedelta, index) float32 266kB ...
    10m_u_component_of_wind_meso  (time, prediction_timedelta, index) float32 266kB ...
    10m_v_component_of_wind_meso  (time, prediction_timedelta, index) float32 266kB ...
    mean_sea_level_pressure_meso  (time, prediction_timedelta, index) float32 266kB ...


In [41]:
meso_0_24_incl.isel(station=30, time=0, prediction_timedelta=0, index=30)['mean_sea_level_pressure_meso']
# meso_0_24_incl['height_above_sea_level'].isel(height_above_ground_level=0,index=0)

ValueError: Dimensions {'prediction_timedelta', 'time', 'index'} do not exist. Expected one or more of FrozenMappingWarningOnValuesAccess({'station': 47, 'valid_time': 1416})

In [16]:
meso_0_24_incl_stacked_nobarometric.to_netcdf('meso_24_processed_nobarometric.nc')

In [15]:
suffix = '_nobarometric'

# add a suffix to all the variables in the dataset
new_vars = {}
for var in meso_0_24_incl_stacked_nobarometric.data_vars:
    new_vars[var + suffix] = meso_0_24_incl_stacked_nobarometric[var]

meso_0_24_incl_stacked_nobarometric = xr.Dataset(new_vars)

meso_0_24_incl_stacked_nobarometric

In [None]:
meso_0_24_incl_stacked.to_netcdf('meso_24_processed.nc')

In [None]:
# Choose variables and times for comparison
variables_to_check = ['2m_temperature', '10m_u_component_of_wind', 'total_precipitation_6hr']
times_to_check = ['2022-01-13', '2022-01-15']

# Define prediction times for 0-24 and 24-48 hours
prediction_times_0_24 = [ '6h', '12h', '18h']
prediction_times_24_48 = ['24h', '30h', '36h', '42h']

# Function to compare values
def compare_values(dataset1, dataset2, var, time, prediction_timedelta):
    value1 = dataset1[var].sel(time=time, prediction_timedelta=prediction_timedelta).values
    value2 = dataset2[var].sel(time=time, prediction_timedelta=prediction_timedelta).values
    return np.array_equal(value1, value2)

# Iterate over the selected variables, times, and prediction times
for var in variables_to_check:
    for time in times_to_check:
        # Compare values for the 0-24 hour predictions
        for pred_time in prediction_times_0_24:
            if compare_values(meso_combined, meso_0_24_transformed, var, time, pred_time):
                print(f"Values for {var} at time {time} and prediction_timedelta={pred_time} match in meso_0_24.")
            else:
                print(f"Mismatch found for {var} at time {time} and prediction_timedelta={pred_time} in meso_0_24.")
        
        # Compare values for the 24-48 hour predictions
        for pred_time in prediction_times_24_48:
            if compare_values(meso_combined, meso_24_48_transformed, var, time, pred_time):
                print(f"Values for {var} at time {time} and prediction_timedelta={pred_time} match in meso_24_48.")
            else:
                print(f"Mismatch found for {var} at time {time} and prediction_timedelta={pred_time} in meso_24_48.")


KeyError: "No variable named '2m_temperature'. Variables on the dataset include ['total_precipitation_6hr_meso', '2m_temperature_meso', '10m_u_component_of_wind_meso', '10m_v_component_of_wind_meso', 'mean_sea_level_pressure_meso', 'time', 'prediction_timedelta', 'station']"

In [None]:
# meso_combined.to_netcdf('meso_2022.nc')

# BIG RESET USING o1

In [17]:
# Define the file paths
file_0_24 = '/net/shared/student-projects/koenr/mesograsp_2022/extractorOutTFMetmast.000_leadtime-00-24.nc'
file_24_48 = '/net/shared/student-projects/koenr/mesograsp_2022/extractorOutTFMetmast.000_leadtime-24-48.nc'
file_48_72 = '/net/shared/student-projects/koenr/mesograsp_2022/extractorOutTFMetmast.000_leadtime-48-72.nc'
file_72_96 = '/net/shared/student-projects/koenr/mesograsp_2022/extractorOutTFMetmast.000_leadtime-72-96.nc'

# Load the datasets
meso_0_24 = xr.open_dataset(file_0_24)
meso_24_48 = xr.open_dataset(file_24_48)
meso_48_72 = xr.open_dataset(file_48_72)
meso_72_96 = xr.open_dataset(file_72_96)


In [217]:
def transform_meso_dataset(dataset):
    """
    Transforms the input meso dataset by:
    - Setting 'station' as the dimension instead of 'index'.
    - Dropping the station with ID '06252'.
    - Extracting specific variables.
    - Performing necessary calculations.
    - Returning a new dataset with specified variables.

    Parameters:
    - dataset: xarray.Dataset

    Returns:
    - transformed_dataset: xarray.Dataset
    """
    import xarray as xr
    import numpy as np

    # Constants
    g = 9.80665  # Acceleration due to gravity (m/s^2)
    R = 287.05   # Specific gas constant for dry air (J/(kg·K))
    delta_t = 10 * 60  # Time interval in seconds (10 minutes)
    intervals_per_hour = 6  # 6 intervals per hour
    intervals_in_6_hours = 6 * intervals_per_hour  # 36 intervals in 6 hours

    # ------------------------------
    # A. Prepare Dataset: Set 'station' as dimension and drop '06252'
    # ------------------------------

    # Set 'id' as a coordinate and rename it to 'station'
    if 'id' not in dataset.coords:
        dataset = dataset.set_coords('id')
    dataset = dataset.rename({'id': 'station'})

    # Swap 'index' dimension with 'station'
    dataset = dataset.swap_dims({'index': 'station'})

    # Drop 'index' variable (no longer needed)
    dataset = dataset.drop_vars('index')

    # Drop station '06252'
    if '06252' in dataset['station']:
        dataset = dataset.drop_sel(station='06252')

    # ------------------------------
    # 1. Extract Variables at Required Heights
    # ------------------------------

    # 2m Temperature (Convert from Celsius to Kelvin)
    temp_2m = dataset['TC'].sel(height_above_ground_level=2.0, method='nearest') + 273.15
    temp_2m = temp_2m.reset_coords(drop=True)
    temp_2m.name = '2m_temperature_meso'

    # 10m U Component of Wind
    u_10m = dataset['u'].sel(height_above_ground_level=10.0, method='nearest')
    u_10m = u_10m.reset_coords(drop=True)
    u_10m.name = '10m_u_component_of_wind_meso'

    # 10m V Component of Wind
    v_10m = dataset['v'].sel(height_above_ground_level=10.0, method='nearest')
    v_10m = v_10m.reset_coords(drop=True)
    v_10m.name = '10m_v_component_of_wind_meso'

    # 10m Wind Speed
    wind_speed_10m = dataset['M'].sel(height_above_ground_level=10.0, method='nearest')
    wind_speed_10m = wind_speed_10m.reset_coords(drop=True)
    wind_speed_10m.name = '10m_wind_speed_meso'

    # ------------------------------
    # 2. Calculate Mean Sea Level Pressure (MSLP)
    # ------------------------------

    # Surface Pressure
    ps = dataset['ps']  # Dimensions: (time, station)

    # Temperature at Surface Level (in Kelvin)
    temp_surface = temp_2m  # Already converted to Kelvin

    # Height Above Sea Level at 2m
    h = dataset['height_above_sea_level'].sel(height_above_ground_level=2.0, method='nearest')
    h = h.reset_coords(drop=True)

    # Ensure h has the same dimensions as ps and temp_surface
    if 'height_above_ground_level' in h.dims:
        h = h.squeeze('height_above_ground_level')
    if 'time' not in h.dims and 'time' in ps.dims:
        h = h.expand_dims({'time': ps['time']})

    # Calculate exponent
    exponent = (g * h) / (R * temp_surface)

    # Calculate Mean Sea Level Pressure
    mslp = ps * np.exp(exponent)
    mslp.name = 'mean_sea_level_pressure_meso'

    # ------------------------------
    # 3. Calculate Total Precipitation over the Last 6 Hours
    # ------------------------------

    # Precipitation Rate
    prate = dataset['prate']  # Dimensions: (time, station)

    # Precipitation per Interval (Convert rate to amount over interval)
    precip_per_interval = prate * delta_t  # Units: meters

    # Rolling Sum over 36 Intervals (6 hours)
    total_precip_6hr = precip_per_interval.rolling(time=intervals_in_6_hours, min_periods=1).sum()
    total_precip_6hr.name = 'total_precipitation_6hr_meso'

    # ------------------------------
    # 4. Create New Dataset with Specified Variables
    # ------------------------------

    # Combine all variables into a new dataset
    transformed_dataset = xr.Dataset({
        '2m_temperature_meso': temp_2m,
        '10m_u_component_of_wind_meso': u_10m,
        '10m_v_component_of_wind_meso': v_10m,
        '10m_wind_speed_meso': wind_speed_10m,
        'mean_sea_level_pressure_meso': mslp,
        'total_precipitation_6hr_meso': total_precip_6hr
    })

    # Ensure 'station' is a coordinate in the transformed dataset
    transformed_dataset = transformed_dataset.set_coords('station')

    
    transformed_dataset = transformed_dataset.rename({'time': 'valid_time'})

    return transformed_dataset

def add_initialization_time_and_prediction_timedelta(ds: xr.Dataset) -> xr.Dataset:
    # Convert 'valid_time' to pandas datetime
    valid_time = pd.to_datetime(ds['valid_time'].values)

    # Calculate initialization time
    time = []
    for vt in valid_time:
        if vt.hour == 0 and vt.minute == 0:
            init_time = vt
        elif vt.hour == 24:
            init_time = (vt - pd.Timedelta(days=1)).normalize()
        else:
            init_time = vt.normalize()
        time.append(init_time)

    time = pd.to_datetime(time)
    
    # Calculate lead time (prediction_timedelta)
    prediction_timedelta = pd.to_datetime(valid_time) - pd.to_datetime(time)

    # Expand both dimensions
    ds = ds.expand_dims('time')
    ds = ds.expand_dims('prediction_timedelta')

    # Swap dimensions to properly index by time and lead time
    # ds = ds.swap_dims({'valid_time': 'time'})

    # Optionally slice or reshape further based on your requirements (like the example with `time`)
    ds = ds.sel(prediction_timedelta=ds['prediction_timedelta'] > np.timedelta64(0, 'h'))

    return ds






In [213]:

# Load your datasets (assuming you haven't loaded them yet)
meso_0_24 = xr.open_dataset('/net/shared/student-projects/koenr/mesograsp_2022/extractorOutTFMetmast.000_leadtime-00-24.nc')
meso_24_48 = xr.open_dataset('/net/shared/student-projects/koenr/mesograsp_2022/extractorOutTFMetmast.000_leadtime-24-48.nc')
meso_48_72 = xr.open_dataset('/net/shared/student-projects/koenr/mesograsp_2022/extractorOutTFMetmast.000_leadtime-48-72.nc')
meso_72_96 = xr.open_dataset('/net/shared/student-projects/koenr/mesograsp_2022/extractorOutTFMetmast.000_leadtime-72-96.nc')

# Apply the transformation function to each dataset
meso_0_24_transformed = transform_meso_dataset(meso_0_24)
meso_24_48_transformed = transform_meso_dataset(meso_24_48)
meso_48_72_transformed = transform_meso_dataset(meso_48_72)
meso_72_96_transformed = transform_meso_dataset(meso_72_96)

meso_0_24_transformed

In [218]:
meso_0_24_transformed_init = add_initialization_time_and_prediction_timedelta(meso_0_24_transformed)
print(meso_0_24_transformed_init)

<xarray.Dataset> Size: 421kB
Dimensions:                       (station: 47, valid_time: 52560,
                                   prediction_timedelta: 0, time: 1)
Coordinates:
  * station                       (station) <U5 940B '06201' '06203' ... '06212'
  * valid_time                    (valid_time) datetime64[ns] 420kB 2022-01-0...
Dimensions without coordinates: prediction_timedelta, time
Data variables:
    2m_temperature_meso           (prediction_timedelta, time, valid_time, station) float32 0B ...
    10m_u_component_of_wind_meso  (prediction_timedelta, time, valid_time, station) float32 0B ...
    10m_v_component_of_wind_meso  (prediction_timedelta, time, valid_time, station) float32 0B ...
    10m_wind_speed_meso           (prediction_timedelta, time, valid_time, station) float32 0B ...
    mean_sea_level_pressure_meso  (prediction_timedelta, time, valid_time, station) float32 0B ...
    total_precipitation_6hr_meso  (prediction_timedelta, time, valid_time, station) float3

In [144]:
print(meso_0_24_transformed)
print(meso_24_48_transformed)
print(meso_48_72_transformed)
print(meso_72_96_transformed)

<xarray.Dataset> Size: 60MB
Dimensions:                       (station: 47, valid_time: 52560)
Coordinates:
  * station                       (station) <U5 940B '06201' '06203' ... '06212'
  * valid_time                    (valid_time) datetime64[ns] 420kB 2022-01-0...
Data variables:
    2m_temperature_meso           (valid_time, station) float32 10MB nan ... ...
    10m_u_component_of_wind_meso  (valid_time, station) float32 10MB ...
    10m_v_component_of_wind_meso  (valid_time, station) float32 10MB ...
    10m_wind_speed_meso           (valid_time, station) float32 10MB ...
    mean_sea_level_pressure_meso  (valid_time, station) float32 10MB nan ... ...
    total_precipitation_6hr_meso  (valid_time, station) float32 10MB nan ... 0.0
<xarray.Dataset> Size: 60MB
Dimensions:                       (station: 47, valid_time: 52560)
Coordinates:
  * station                       (station) <U5 940B '06201' '06203' ... '06212'
  * valid_time                    (valid_time) datetime64[ns] 4

In [33]:
# Concatenate datasets
combined_meso = xr.concat(
    [meso_0_24_transformed, meso_24_48_transformed, meso_48_72_transformed, meso_72_96_transformed],
    dim='lead_time'
)


In [38]:
# Ensure 'initialization_time' and 'station' are coordinates
combined_meso = combined_meso.set_coords(['initialization_time', 'station'])
combined_meso

In [37]:
combined_meso.sel(initialization_time='2022-05-11', lead_time='18h', station='06380').compute()

KeyError: "no index found for coordinate 'initialization_time'"