In [1]:
import xarray as xr
import pandas as pd
import numpy as np
import os
import time
import pyproj
import glob
import pickle



In [2]:
airport_location = pd.read_csv('../../data/airport_positions_new.csv')

In [3]:
airport_location

Unnamed: 0,airport_identifier,latitude,longitude
0,ENAL,62.56,6.11
1,ENAN,69.29,16.14
2,ENAT,69.97,23.37
3,ENBJ,74.50,19.08
4,ENBL,61.39,5.75
...,...,...,...
64,ENUS,65.69,7.65
65,ENVA,63.45,10.92
66,ENVD,70.06,29.84
67,ENWV,56.27,3.39


In [4]:
parameters_sfc = ['air_temperature_0m', 'air_temperature_2m', 
              'relative_humidity_2m', 'precipitation_amount_acc', 'x_wind_10m', 'y_wind_10m', 
                  'fog_area_fraction', 'surface_air_pressure', 'air_pressure_at_sea_level']
parameters_pl = ['air_temperature_pl', 'x_wind_pl', 'y_wind_pl']

In [5]:
def transform_latitude_longitude_to_xy(latitude_values, longitude_values):
    # Create a pyproj CRS object
    proj4_str = '+proj=lcc +lat_0=63.3 +lon_0=15 +lat_1=63.3 +lat_2=63.3 +no_defs +R=6.371e+06'
    lcc_crs = pyproj.CRS.from_proj4(proj4_str)

    # Create a transformer for converting between lat/lon and x/y
    transformer = pyproj.Transformer.from_crs(pyproj.CRS("EPSG:4326"), lcc_crs, always_xy=True)

    # Transform lat/lon to x/y
    x_values, y_values = transformer.transform(longitude_values, latitude_values)

    return x_values, y_values

## All dayfolders in a month

In [6]:
# Define folder with paths and prefix for files to read
folder_path = '/lustre/storeB/immutable/archive/projects/metproduction/meps/2023/'
file_prefix_sfc = ['meps_sfc_00_*.nc', 'meps_sfc_01_*.nc', 'meps_sfc_02_*.nc', 'meps_sfc_03_*.nc', 
                   'meps_sfc_04_*.nc', 'meps_sfc_05_*.nc',]
file_prefix_pl = ['meps_pl_00_*.nc', 'meps_pl_01_*.nc', 'meps_pl_02_*.nc', 'meps_pl_03_*.nc', 
                   'meps_pl_04_*.nc', 'meps_pl_05_*.nc',]
# file_pattern = os.path.join(folder_path, file_prefix)
# file_list = glob.glob(file_pattern)
output_folder = '/lustre/storeB/users/tonjek/msc/2024_msc_tonje_metar/00_data_preparation/data_extraction/2023/'

In [9]:
all_timesteps_data = []
airport_identifier = airport_location['airport_identifier'].unique()

month_folder = '12'
time_folders = ['00', '06', '12', '18']
for day_folder in os.listdir(os.path.join(folder_path, month_folder)):
    day_path = os.path.join(folder_path, month_folder, day_folder)

    for time_folder in time_folders:
        specific_folder_path = os.path.join(day_path, time_folder, 'member_00')
        print(specific_folder_path)

        for file_prefix in file_prefix_pl or file_prefix_sfc:
            if file_prefix in file_prefix_pl:
                parameters = parameters_pl
            else:
                parameters = parameters_sfc
            
            file_pattern = os.path.join(specific_folder_path, file_prefix)
            file_list = glob.glob(file_pattern)

            for file_path in file_list:
                data = xr.open_mfdataset(file_path, chunks={'time':1})
                start_time = time.time()
                print(file_list)

                latitude_values = airport_location.latitude.values
                longitude_values = airport_location.longitude.values
                
                valid_indices = ~np.isnan(latitude_values) & ~np.isnan(longitude_values)
                x_values, y_values = transform_latitude_longitude_to_xy(latitude_values[valid_indices], 
                                                                        longitude_values[valid_indices])

                #x_values, y_values = transform_latitude_longitude_to_xy(latitude_values, longitude_values)

                airport_six_timesteps = []

                for airport_idx in range(len(latitude_values)):
                    nearest_x = x_values[airport_idx]
                    nearest_y = y_values[airport_idx]

                    #print(f"AIRPORT {airport_idx + 1} COORDINATES:")
                    #print(f"  Latitude: {latitude_values[airport_idx]}")
                    #print(f"  Longitude: {longitude_values[airport_idx]}")
                    #print(f"  Nearest x: {nearest_x}")
                    #print(f"  Nearest y: {nearest_y}")

                    # Extract latitude and longitude values from the data variable
                    lat_from_data = data.latitude.sel(x=nearest_x, y=nearest_y, method='nearest').values.item()
                    lon_from_data = data.longitude.sel(x=nearest_x, y=nearest_y, method='nearest').values.item()

                    #print(f"  Latitude (Data): {lat_from_data}")
                    #print(f"  Longitude (Data): {lon_from_data}")


                    interpolated_data = data[parameters].interp(x=nearest_x, y=nearest_y)

                    start_datetime = pd.to_datetime(data.time.values[0])  # Convert to pandas datetime
                    end_datetime = start_datetime + pd.DateOffset(hours=5)  # Assuming each time step is 1 hour

                    extracted_data = interpolated_data.sel(
                        time=slice(start_datetime, end_datetime),
                    )
                    # Add airport as a dimension with the corresponding index (+ 1)
                    extracted_data = extracted_data.expand_dims({'airport': [airport_identifier[airport_idx]]})
                    airport_six_timesteps.append(extracted_data)
                combined_data = xr.concat(airport_six_timesteps, dim='airport')
                all_timesteps_data.append(combined_data)
                end_time = time.time()
                print(f'Time taken to extract info from file: {end_time - start_time}')
            #print(f'Dimensions after concatenation for day {day_folder}: {combined_data.dims}')

final_combined_data = xr.concat(all_timesteps_data, dim='time')

# Save pickle
pickle_filename = f'2023_12_new_filepath.pkl'
pickle_filepath = os.path.join(output_folder, pickle_filename)

with open(pickle_filepath, 'wb') as pickle_file:
    pickle.dump(final_combined_data, pickle_file)

    print(f"Saved pickle file for file {file_path}: {pickle_filepath}")

/lustre/storeB/immutable/archive/projects/metproduction/meps/2023/12/19/00/member_00
['/lustre/storeB/immutable/archive/projects/metproduction/meps/2023/12/19/00/member_00/meps_sfc_00_20231219T00Z.nc']


KeyboardInterrupt: 