In [1]:
import xarray as xr
import pandas as pd
import numpy as np
import os
import time
import pyproj
import glob

In [2]:
airport_location = pd.read_csv('../data/airport_positions.csv')

In [3]:
data = xr.open_mfdataset('/lustre/storeB/immutable/archive/projects/metproduction/meps/2021/10/05/meps_lagged_6_h_subset_2_5km_20211005T00Z.nc', chunks={'time':1})
# Remove parenthesis for airport location in dataset -> split lon and lat values -> divide by 100 to obtain correct values
airport_location[['latitude', 'longitude']] = airport_location['position'].str.replace(r'[\(\)]', '').str.split(',', expand=True)
airport_location[['latitude', 'longitude']] = airport_location[['latitude', 'longitude']].astype('float') / 100

  airport_location[['latitude', 'longitude']] = airport_location['position'].str.replace(r'[\(\)]', '').str.split(',', expand=True)


In [4]:
parameters = ['air_temperature_0m', 'air_temperature_2m']

In [5]:
def transform_latitude_longitude_to_xy(latitude_values, longitude_values):
    # Create a pyproj CRS object
    proj4_str = '+proj=lcc +lat_0=63.3 +lon_0=15 +lat_1=63.3 +lat_2=63.3 +no_defs +R=6.371e+06'
    lcc_crs = pyproj.CRS.from_proj4(proj4_str)

    # Create a transformer for converting between lat/lon and x/y
    transformer = pyproj.Transformer.from_crs(pyproj.CRS("EPSG:4326"), lcc_crs, always_xy=True)

    # Transform lat/lon to x/y
    x_values, y_values = transformer.transform(longitude_values, latitude_values)

    return x_values, y_values

In [42]:
# Define folder with paths and prefix for files to read
folder_path = '/lustre/storeB/immutable/archive/projects/metproduction/meps/2023/'
file_prefix = 'meps_lagged_6_h_subset_2_5km_'
file_pattern = os.path.join(folder_path, file_prefix)
file_list = glob.glob(file_pattern)
output_folder = '/lustre/storeB/users/tonjek/msc/2024_msc_tonje_metar/extracted_netcdf/2023/'

In [35]:
for month_folder in os.listdir(folder_path):
    month_path = os.path.join(folder_path, month_folder)
    monthly_data = []

    for day_folder in os.listdir(month_path):
        day_path = os.path.join(month_path, day_folder)
        file_list = [file for file in os.listdir(day_path) if file.startswith(file_prefix)]

        all_timesteps_data = []
        print(f"Processing day: Month: {month_folder}, Day: {day_folder}")

        for file in file_list:
            file_path = os.path.join(day_path, file)
            start_time = time.time()

            data = xr.open_mfdataset(file_path, chunks={'time': 1})

            # Extracting parameter values only for matching nearest location
            latitude_values = airport_location.latitude.values
            longitude_values = airport_location.longitude.values

            x_values, y_values = transform_latitude_longitude_to_xy(latitude_values, longitude_values)

            airport_six_timesteps = []

            for airport_idx in range(len(latitude_values)):
                nearest_x, nearest_y = transform_latitude_longitude_to_xy(latitude_values[airport_idx], longitude_values[airport_idx])

                lat_from_data = data.latitude.sel(x=nearest_x, y=nearest_y, method='nearest').values.item()
                lon_from_data = data.longitude.sel(x=nearest_x, y=nearest_y, method='nearest').values.item()

                interpolated_data = data[parameters].interp(x=nearest_x, y=nearest_y)

                # Convert to pandas datetime
                start_datetime = pd.to_datetime(data.time.values[0])
                # Choosing 6 timesteps in total
                end_datetime = start_datetime + pd.DateOffset(hours=5)

                extracted_data = interpolated_data.sel(
                    ensemble_member=0,
                    time=slice(start_datetime, end_datetime)
                )

                extracted_data = extracted_data.expand_dims({'airport': [airport_idx + 1]})
                airport_six_timesteps.append(extracted_data)

            combined_data = xr.concat(airport_six_timesteps, dim='airport')
            all_timesteps_data.append(combined_data)

            end_time = time.time()
            print(f'Time taken to extract info from {file}: {end_time - start_time}')
            print(len(all_timesteps_data))

        # For all days
        daily_data = xr.concat(all_timesteps_data, dim='time')
        airport_netcdf_path = os.path.join(output_folder, f'all_airports_2023_{month_folder}_{day_folder}.nc')
        daily_data.to_netcdf(airport_netcdf_path)

Processing day: Month: 11, Day: 19



KeyboardInterrupt

