In [19]:
import xarray as xr
import pandas as pd
import numpy as np
import os
import time
import pyproj
import glob

In [20]:
%run transform_lat_lon_to_xy.ipynb

In [21]:
airport_location = pd.read_csv('../data/airport_positions.csv')

In [22]:
data = xr.open_mfdataset('/lustre/storeB/immutable/archive/projects/metproduction/meps/2021/10/05/meps_lagged_6_h_subset_2_5km_20211005T00Z.nc', chunks={'time':1})
# Remove parenthesis for airport location in dataset -> split lon and lat values -> divide by 100 to obtain correct values
airport_location[['latitude', 'longitude']] = airport_location['position'].str.replace(r'[\(\)]', '').str.split(',', expand=True)
airport_location[['latitude', 'longitude']] = airport_location[['latitude', 'longitude']].astype('float') / 100

  airport_location[['latitude', 'longitude']] = airport_location['position'].str.replace(r'[\(\)]', '').str.split(',', expand=True)


In [23]:
parameters = ['air_temperature_0m', 'air_temperature_2m']

In [24]:
# Define folder with paths and prefix for files to read
folder_path = '/lustre/storeB/immutable/archive/projects/metproduction/meps/2021/10/'
file_prefix = 'meps_lagged_6_h_subset_2_5km_'
file_pattern = os.path.join(folder_path, file_prefix)
file_list = glob.glob(file_pattern)

In [None]:
all_timesteps_data = []
for day_folder in os.listdir(folder_path):
        day_path = os.path.join(folder_path, day_folder)

        file_list = [file for file in os.listdir(day_path) if file.startswith(file_prefix)]

        for file in file_list:
            file_path = os.path.join(day_path, file)

            start_time = time.time()

            data = xr.open_mfdataset(file_path, chunks={'time':1})
            print(f"Processing file: {file_path}")

            # Extracting parameter values only for matching nearest location
            latitude_values = airport_location.latitude.values
            longitude_values = airport_location.longitude.values
            
            x_values, y_values = transform_latitude_longitude_to_xy(latitude_values, longitude_values)
            
            airport_six_timesteps = []

            for airport_idx in range(len(latitude_values)):
                nearest_x, nearest_y = transform_latitude_longitude_to_xy(latitude_values[airport_idx], longitude_values[airport_idx])
                
                lat_from_data = data.latitude.sel(x=nearest_x, y=nearest_y, method='nearest').values.item()
                lon_from_data = data.longitude.sel(x=nearest_x, y=nearest_y, method='nearest').values.item()


                interpolated_data = data[parameters].interp(x=nearest_x, y=nearest_y)

                # Convert to pandas datetime
                start_datetime = pd.to_datetime(data.time.values[0])
                # Choosing 6 timesteps in total
                end_datetime = start_datetime + pd.DateOffset(hours=5)
                

                extracted_data = interpolated_data.sel(
                    ensemble_member=0,
                    time=slice(start_datetime, end_datetime)
                )
                
                extracted_data = extracted_data.expand_dims({'airport': [airport_idx + 1]})

                airport_six_timesteps.append(extracted_data)
        combined_data = xr.concat(airport_six_timesteps, dim='airport')
        all_timesteps_data.append(combined_data)
        end_time = time.time()
        print(f'Time taken to extract info from {file}: {end_time - start_time}')
        print(len(all_timesteps_data))
final_combined_data = xr.concat(all_timesteps_data, dim='time')


Processing file: /lustre/storeB/immutable/archive/projects/metproduction/meps/2021/10/19/meps_lagged_6_h_subset_2_5km_20211019T12Z.nc
Processing file: /lustre/storeB/immutable/archive/projects/metproduction/meps/2021/10/19/meps_lagged_6_h_subset_2_5km_20211019T18Z.nc
Processing file: /lustre/storeB/immutable/archive/projects/metproduction/meps/2021/10/19/meps_lagged_6_h_subset_2_5km_20211019T00Z.nc
Processing file: /lustre/storeB/immutable/archive/projects/metproduction/meps/2021/10/19/meps_lagged_6_h_subset_2_5km_20211019T06Z.nc
Time taken to extract info from meps_lagged_6_h_subset_2_5km_20211019T06Z.nc: 2.781939744949341
1
Processing file: /lustre/storeB/immutable/archive/projects/metproduction/meps/2021/10/11/meps_lagged_6_h_subset_2_5km_20211011T00Z.nc
Processing file: /lustre/storeB/immutable/archive/projects/metproduction/meps/2021/10/11/meps_lagged_6_h_subset_2_5km_20211011T12Z.nc
Processing file: /lustre/storeB/immutable/archive/projects/metproduction/meps/2021/10/11/meps_lagg