In [14]:
import xarray as xr
import pandas as pd
import numpy as np
import os
import time
import pyproj
import glob
import pickle

In [15]:
airport_location = pd.read_csv('../data/airport_positions.csv')

In [16]:
data = xr.open_mfdataset('/lustre/storeB/immutable/archive/projects/metproduction/meps/2021/10/05/meps_lagged_6_h_subset_2_5km_20211005T00Z.nc', chunks={'time':1})
# Remove parenthesis for airport location in dataset -> split lon and lat values -> divide by 100 to obtain correct values
airport_location[['latitude', 'longitude']] = airport_location['position'].str.replace(r'[\(\)]', '').str.split(',', expand=True)
airport_location[['latitude', 'longitude']] = airport_location[['latitude', 'longitude']].astype('float') / 100

  airport_location[['latitude', 'longitude']] = airport_location['position'].str.replace(r'[\(\)]', '').str.split(',', expand=True)


In [17]:
parameters = ['air_temperature_0m', 'air_temperature_2m', 'air_temperature_pl', 
              'relative_humidity_2m', 'precipitation_amount_acc', 'x_wind_10m', 'y_wind_10m', 
             'x_wind_pl', 'y_wind_pl', 'fog_area_fraction', 'surface_air_pressure', 'air_pressure_at_sea_level']

In [18]:
def transform_latitude_longitude_to_xy(latitude_values, longitude_values):
    # Create a pyproj CRS object
    proj4_str = '+proj=lcc +lat_0=63.3 +lon_0=15 +lat_1=63.3 +lat_2=63.3 +no_defs +R=6.371e+06'
    lcc_crs = pyproj.CRS.from_proj4(proj4_str)

    # Create a transformer for converting between lat/lon and x/y
    transformer = pyproj.Transformer.from_crs(pyproj.CRS("EPSG:4326"), lcc_crs, always_xy=True)

    # Transform lat/lon to x/y
    x_values, y_values = transformer.transform(longitude_values, latitude_values)

    return x_values, y_values

# Extract from a specific file

In [19]:
latitude_values = airport_location.latitude.values
longitude_values = airport_location.longitude.values

x_values, y_values = transform_latitude_longitude_to_xy(latitude_values, longitude_values)

daily_airport_datasets = []

for airport_idx in range(len(latitude_values)):
    nearest_x = x_values[airport_idx]
    nearest_y = y_values[airport_idx]
    
    #print(f"AIRPORT {airport_idx + 1} COORDINATES:")
    #print(f"  Latitude: {latitude_values[airport_idx]}")
    #print(f"  Longitude: {longitude_values[airport_idx]}")
    #print(f"  Nearest x: {nearest_x}")
    #print(f"  Nearest y: {nearest_y}")
    
    # Extract latitude and longitude values from the data variable
    lat_from_data = data.latitude.sel(x=nearest_x, y=nearest_y, method='nearest').values.item()
    lon_from_data = data.longitude.sel(x=nearest_x, y=nearest_y, method='nearest').values.item()

    #print(f"  Latitude (Data): {lat_from_data}")
    #print(f"  Longitude (Data): {lon_from_data}")


    interpolated_data = data[parameters].interp(x=nearest_x, y=nearest_y)

    start_datetime = pd.to_datetime(data.time.values[0])  # Convert to pandas datetime
    end_datetime = start_datetime + pd.DateOffset(hours=5)  # Assuming each time step is 1 hour

    extracted_data = interpolated_data.sel(
        ensemble_member=0,
        time=slice(start_datetime, end_datetime)
    )
    # Add airport as a dimension with the corresponding index (+ 1)
    extracted_data = extracted_data.expand_dims({'airport': [airport_idx + 1]})
    daily_airport_datasets.append(extracted_data)
final_combined_data = xr.concat(daily_airport_datasets, dim='airport')

In [20]:
final_combined_data

Unnamed: 0,Array,Chunk
Bytes,720 B,8 B
Shape,"(90,)","(1,)"
Count,968 Graph Layers,90 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 720 B 8 B Shape (90,) (1,) Count 968 Graph Layers 90 Chunks Type float64 numpy.ndarray",90  1,

Unnamed: 0,Array,Chunk
Bytes,720 B,8 B
Shape,"(90,)","(1,)"
Count,968 Graph Layers,90 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,720 B,8 B
Shape,"(90,)","(1,)"
Count,968 Graph Layers,90 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 720 B 8 B Shape (90,) (1,) Count 968 Graph Layers 90 Chunks Type float64 numpy.ndarray",90  1,

Unnamed: 0,Array,Chunk
Bytes,720 B,8 B
Shape,"(90,)","(1,)"
Count,968 Graph Layers,90 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.11 kiB,4 B
Shape,"(90, 6, 1)","(1, 1, 1)"
Count,1077 Graph Layers,540 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 2.11 kiB 4 B Shape (90, 6, 1) (1, 1, 1) Count 1077 Graph Layers 540 Chunks Type float32 numpy.ndarray",1  6  90,

Unnamed: 0,Array,Chunk
Bytes,2.11 kiB,4 B
Shape,"(90, 6, 1)","(1, 1, 1)"
Count,1077 Graph Layers,540 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.11 kiB,4 B
Shape,"(90, 6, 1)","(1, 1, 1)"
Count,1077 Graph Layers,540 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 2.11 kiB 4 B Shape (90, 6, 1) (1, 1, 1) Count 1077 Graph Layers 540 Chunks Type float32 numpy.ndarray",1  6  90,

Unnamed: 0,Array,Chunk
Bytes,2.11 kiB,4 B
Shape,"(90, 6, 1)","(1, 1, 1)"
Count,1077 Graph Layers,540 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,12.66 kiB,24 B
Shape,"(90, 6, 6)","(1, 1, 6)"
Count,1077 Graph Layers,540 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 12.66 kiB 24 B Shape (90, 6, 6) (1, 1, 6) Count 1077 Graph Layers 540 Chunks Type float32 numpy.ndarray",6  6  90,

Unnamed: 0,Array,Chunk
Bytes,12.66 kiB,24 B
Shape,"(90, 6, 6)","(1, 1, 6)"
Count,1077 Graph Layers,540 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.11 kiB,4 B
Shape,"(90, 6, 1)","(1, 1, 1)"
Count,1077 Graph Layers,540 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 2.11 kiB 4 B Shape (90, 6, 1) (1, 1, 1) Count 1077 Graph Layers 540 Chunks Type float32 numpy.ndarray",1  6  90,

Unnamed: 0,Array,Chunk
Bytes,2.11 kiB,4 B
Shape,"(90, 6, 1)","(1, 1, 1)"
Count,1077 Graph Layers,540 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.11 kiB,4 B
Shape,"(90, 6, 1)","(1, 1, 1)"
Count,1077 Graph Layers,540 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 2.11 kiB 4 B Shape (90, 6, 1) (1, 1, 1) Count 1077 Graph Layers 540 Chunks Type float32 numpy.ndarray",1  6  90,

Unnamed: 0,Array,Chunk
Bytes,2.11 kiB,4 B
Shape,"(90, 6, 1)","(1, 1, 1)"
Count,1077 Graph Layers,540 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.11 kiB,4 B
Shape,"(90, 6, 1)","(1, 1, 1)"
Count,1077 Graph Layers,540 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 2.11 kiB 4 B Shape (90, 6, 1) (1, 1, 1) Count 1077 Graph Layers 540 Chunks Type float32 numpy.ndarray",1  6  90,

Unnamed: 0,Array,Chunk
Bytes,2.11 kiB,4 B
Shape,"(90, 6, 1)","(1, 1, 1)"
Count,1077 Graph Layers,540 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.11 kiB,4 B
Shape,"(90, 6, 1)","(1, 1, 1)"
Count,1077 Graph Layers,540 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 2.11 kiB 4 B Shape (90, 6, 1) (1, 1, 1) Count 1077 Graph Layers 540 Chunks Type float32 numpy.ndarray",1  6  90,

Unnamed: 0,Array,Chunk
Bytes,2.11 kiB,4 B
Shape,"(90, 6, 1)","(1, 1, 1)"
Count,1077 Graph Layers,540 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,12.66 kiB,24 B
Shape,"(90, 6, 6)","(1, 1, 6)"
Count,1077 Graph Layers,540 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 12.66 kiB 24 B Shape (90, 6, 6) (1, 1, 6) Count 1077 Graph Layers 540 Chunks Type float32 numpy.ndarray",6  6  90,

Unnamed: 0,Array,Chunk
Bytes,12.66 kiB,24 B
Shape,"(90, 6, 6)","(1, 1, 6)"
Count,1077 Graph Layers,540 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,12.66 kiB,24 B
Shape,"(90, 6, 6)","(1, 1, 6)"
Count,1077 Graph Layers,540 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 12.66 kiB 24 B Shape (90, 6, 6) (1, 1, 6) Count 1077 Graph Layers 540 Chunks Type float32 numpy.ndarray",6  6  90,

Unnamed: 0,Array,Chunk
Bytes,12.66 kiB,24 B
Shape,"(90, 6, 6)","(1, 1, 6)"
Count,1077 Graph Layers,540 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.11 kiB,4 B
Shape,"(90, 6, 1)","(1, 1, 1)"
Count,1077 Graph Layers,540 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 2.11 kiB 4 B Shape (90, 6, 1) (1, 1, 1) Count 1077 Graph Layers 540 Chunks Type float32 numpy.ndarray",1  6  90,

Unnamed: 0,Array,Chunk
Bytes,2.11 kiB,4 B
Shape,"(90, 6, 1)","(1, 1, 1)"
Count,1077 Graph Layers,540 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.11 kiB,4 B
Shape,"(90, 6, 1)","(1, 1, 1)"
Count,1077 Graph Layers,540 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 2.11 kiB 4 B Shape (90, 6, 1) (1, 1, 1) Count 1077 Graph Layers 540 Chunks Type float32 numpy.ndarray",1  6  90,

Unnamed: 0,Array,Chunk
Bytes,2.11 kiB,4 B
Shape,"(90, 6, 1)","(1, 1, 1)"
Count,1077 Graph Layers,540 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.11 kiB,4 B
Shape,"(90, 6, 1)","(1, 1, 1)"
Count,1077 Graph Layers,540 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 2.11 kiB 4 B Shape (90, 6, 1) (1, 1, 1) Count 1077 Graph Layers 540 Chunks Type float32 numpy.ndarray",1  6  90,

Unnamed: 0,Array,Chunk
Bytes,2.11 kiB,4 B
Shape,"(90, 6, 1)","(1, 1, 1)"
Count,1077 Graph Layers,540 Chunks
Type,float32,numpy.ndarray


# Expanded code: For all files in a day folder

In [21]:
# Define folder with paths and prefix for files to read
folder_path = '/lustre/storeB/immutable/archive/projects/metproduction/meps/2023/05/05/'
file_prefix = 'meps_lagged_6_h_subset_2_5km_*.nc'
file_pattern = os.path.join(folder_path, file_prefix)
file_list = glob.glob(file_pattern)
output_folder = '/lustre/storeB/users/tonjek/msc/2024_msc_tonje_metar/data_extraction/'

In [24]:
all_timesteps_data = []
for file_path in file_list:
    data = xr.open_mfdataset(file_path, chunks={'time':1})
    
    print(file_path)
    
    latitude_values = airport_location.latitude.values
    longitude_values = airport_location.longitude.values

    x_values, y_values = transform_latitude_longitude_to_xy(latitude_values, longitude_values)

    airport_six_timesteps = []

    for airport_idx in range(len(latitude_values)):
        nearest_x = x_values[airport_idx]
        nearest_y = y_values[airport_idx]

        #print(f"AIRPORT {airport_idx + 1} COORDINATES:")
        #print(f"  Latitude: {latitude_values[airport_idx]}")
        #print(f"  Longitude: {longitude_values[airport_idx]}")
        #print(f"  Nearest x: {nearest_x}")
        #print(f"  Nearest y: {nearest_y}")

        # Extract latitude and longitude values from the data variable
        lat_from_data = data.latitude.sel(x=nearest_x, y=nearest_y, method='nearest').values.item()
        lon_from_data = data.longitude.sel(x=nearest_x, y=nearest_y, method='nearest').values.item()

        #print(f"  Latitude (Data): {lat_from_data}")
        #print(f"  Longitude (Data): {lon_from_data}")


        interpolated_data = data[parameters].interp(x=nearest_x, y=nearest_y)

        start_datetime = pd.to_datetime(data.time.values[0])  # Convert to pandas datetime
        end_datetime = start_datetime + pd.DateOffset(hours=5)  # Assuming each time step is 1 hour

        extracted_data = interpolated_data.sel(
            ensemble_member=0,
            time=slice(start_datetime, end_datetime)
        )
        # Add airport as a dimension with the corresponding index (+ 1)
        extracted_data = extracted_data.expand_dims({'airport': [airport_idx + 1]})
        airport_six_timesteps.append(extracted_data)
    combined_data = xr.concat(airport_six_timesteps, dim='airport')
    all_timesteps_data.append(combined_data)
final_combined_data = xr.concat(all_timesteps_data, dim='time')


/lustre/storeB/immutable/archive/projects/metproduction/meps/2023/05/05/meps_lagged_6_h_subset_2_5km_20230505T00Z.nc
/lustre/storeB/immutable/archive/projects/metproduction/meps/2023/05/05/meps_lagged_6_h_subset_2_5km_20230505T18Z.nc
/lustre/storeB/immutable/archive/projects/metproduction/meps/2023/05/05/meps_lagged_6_h_subset_2_5km_20230505T06Z.nc
/lustre/storeB/immutable/archive/projects/metproduction/meps/2023/05/05/meps_lagged_6_h_subset_2_5km_20230505T12Z.nc


In [25]:
pickle_filename = f'2023_05_05_test.pkl'
pickle_filepath = os.path.join(output_folder, pickle_filename)

with open(pickle_filepath, 'wb') as pickle_file:
    pickle.dump(final_combined_data, pickle_file)

print(f"Saved pickle file for file {file_path}: {pickle_filepath}")

Saved pickle file for file /lustre/storeB/immutable/archive/projects/metproduction/meps/2023/05/05/meps_lagged_6_h_subset_2_5km_20230505T12Z.nc: /lustre/storeB/users/tonjek/msc/2024_msc_tonje_metar/data_extraction/2023_05_05_test.pkl


In [30]:
with open('2022/2022_10_test.pkl', 'rb') as pickle_file:
    loaded_data = pickle.load(pickle_file)

In [31]:
loaded_data

Unnamed: 0,Array,Chunk
Bytes,65.39 kiB,4 B
Shape,"(90, 186, 1)","(1, 1, 1)"
Count,27988 Graph Layers,16740 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 65.39 kiB 4 B Shape (90, 186, 1) (1, 1, 1) Count 27988 Graph Layers 16740 Chunks Type float32 numpy.ndarray",1  186  90,

Unnamed: 0,Array,Chunk
Bytes,65.39 kiB,4 B
Shape,"(90, 186, 1)","(1, 1, 1)"
Count,27988 Graph Layers,16740 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,65.39 kiB,4 B
Shape,"(90, 186, 1)","(1, 1, 1)"
Count,27988 Graph Layers,16740 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 65.39 kiB 4 B Shape (90, 186, 1) (1, 1, 1) Count 27988 Graph Layers 16740 Chunks Type float32 numpy.ndarray",1  186  90,

Unnamed: 0,Array,Chunk
Bytes,65.39 kiB,4 B
Shape,"(90, 186, 1)","(1, 1, 1)"
Count,27988 Graph Layers,16740 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,392.34 kiB,24 B
Shape,"(90, 186, 6)","(1, 1, 6)"
Count,27988 Graph Layers,16740 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 392.34 kiB 24 B Shape (90, 186, 6) (1, 1, 6) Count 27988 Graph Layers 16740 Chunks Type float32 numpy.ndarray",6  186  90,

Unnamed: 0,Array,Chunk
Bytes,392.34 kiB,24 B
Shape,"(90, 186, 6)","(1, 1, 6)"
Count,27988 Graph Layers,16740 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,65.39 kiB,4 B
Shape,"(90, 186, 1)","(1, 1, 1)"
Count,27988 Graph Layers,16740 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 65.39 kiB 4 B Shape (90, 186, 1) (1, 1, 1) Count 27988 Graph Layers 16740 Chunks Type float32 numpy.ndarray",1  186  90,

Unnamed: 0,Array,Chunk
Bytes,65.39 kiB,4 B
Shape,"(90, 186, 1)","(1, 1, 1)"
Count,27988 Graph Layers,16740 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,65.39 kiB,4 B
Shape,"(90, 186, 1)","(1, 1, 1)"
Count,27988 Graph Layers,16740 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 65.39 kiB 4 B Shape (90, 186, 1) (1, 1, 1) Count 27988 Graph Layers 16740 Chunks Type float32 numpy.ndarray",1  186  90,

Unnamed: 0,Array,Chunk
Bytes,65.39 kiB,4 B
Shape,"(90, 186, 1)","(1, 1, 1)"
Count,27988 Graph Layers,16740 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,65.39 kiB,4 B
Shape,"(90, 186, 1)","(1, 1, 1)"
Count,27988 Graph Layers,16740 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 65.39 kiB 4 B Shape (90, 186, 1) (1, 1, 1) Count 27988 Graph Layers 16740 Chunks Type float32 numpy.ndarray",1  186  90,

Unnamed: 0,Array,Chunk
Bytes,65.39 kiB,4 B
Shape,"(90, 186, 1)","(1, 1, 1)"
Count,27988 Graph Layers,16740 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,65.39 kiB,4 B
Shape,"(90, 186, 1)","(1, 1, 1)"
Count,27988 Graph Layers,16740 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 65.39 kiB 4 B Shape (90, 186, 1) (1, 1, 1) Count 27988 Graph Layers 16740 Chunks Type float32 numpy.ndarray",1  186  90,

Unnamed: 0,Array,Chunk
Bytes,65.39 kiB,4 B
Shape,"(90, 186, 1)","(1, 1, 1)"
Count,27988 Graph Layers,16740 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,392.34 kiB,24 B
Shape,"(90, 186, 6)","(1, 1, 6)"
Count,27988 Graph Layers,16740 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 392.34 kiB 24 B Shape (90, 186, 6) (1, 1, 6) Count 27988 Graph Layers 16740 Chunks Type float32 numpy.ndarray",6  186  90,

Unnamed: 0,Array,Chunk
Bytes,392.34 kiB,24 B
Shape,"(90, 186, 6)","(1, 1, 6)"
Count,27988 Graph Layers,16740 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,392.34 kiB,24 B
Shape,"(90, 186, 6)","(1, 1, 6)"
Count,27988 Graph Layers,16740 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 392.34 kiB 24 B Shape (90, 186, 6) (1, 1, 6) Count 27988 Graph Layers 16740 Chunks Type float32 numpy.ndarray",6  186  90,

Unnamed: 0,Array,Chunk
Bytes,392.34 kiB,24 B
Shape,"(90, 186, 6)","(1, 1, 6)"
Count,27988 Graph Layers,16740 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,65.39 kiB,4 B
Shape,"(90, 186, 1)","(1, 1, 1)"
Count,27988 Graph Layers,16740 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 65.39 kiB 4 B Shape (90, 186, 1) (1, 1, 1) Count 27988 Graph Layers 16740 Chunks Type float32 numpy.ndarray",1  186  90,

Unnamed: 0,Array,Chunk
Bytes,65.39 kiB,4 B
Shape,"(90, 186, 1)","(1, 1, 1)"
Count,27988 Graph Layers,16740 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,65.39 kiB,4 B
Shape,"(90, 186, 1)","(1, 1, 1)"
Count,27988 Graph Layers,16740 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 65.39 kiB 4 B Shape (90, 186, 1) (1, 1, 1) Count 27988 Graph Layers 16740 Chunks Type float32 numpy.ndarray",1  186  90,

Unnamed: 0,Array,Chunk
Bytes,65.39 kiB,4 B
Shape,"(90, 186, 1)","(1, 1, 1)"
Count,27988 Graph Layers,16740 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,65.39 kiB,4 B
Shape,"(90, 186, 1)","(1, 1, 1)"
Count,27988 Graph Layers,16740 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 65.39 kiB 4 B Shape (90, 186, 1) (1, 1, 1) Count 27988 Graph Layers 16740 Chunks Type float32 numpy.ndarray",1  186  90,

Unnamed: 0,Array,Chunk
Bytes,65.39 kiB,4 B
Shape,"(90, 186, 1)","(1, 1, 1)"
Count,27988 Graph Layers,16740 Chunks
Type,float32,numpy.ndarray
