In [5]:
import xarray as xr
import pandas as pd
import numpy as np
import os
import time
import pickle

In [20]:
def extract_monthly_data_from_netcdf(year_path, output_netcdf_path):
    total_files = 0

    # Iterate through all month folders in the year folder
    for month_folder in os.listdir(year_path):
        month_path = os.path.join(year_path, month_folder)

        month_datasets = []  # List to store datasets from each day within the month

        # Iterate through all day folders in the month folder
        for day_folder in os.listdir(month_path):
            day_path = os.path.join(month_path, day_folder)

            file_list = [file for file in os.listdir(day_path) if file.startswith('meps_lagged_6_h_subset_2_5km_')]
            
            for file in file_list:
                file_path = os.path.join(day_path, file)

                start_time = time.time()

                #data = xr.open_dataset(file_path)
                data = xr.open_mfdataset(file_path, chunks={'time': 1})
                extracted_data = data[['air_temperature_0m', 'air_temperature_2m']].isel(ensemble_member=0, time=slice(None, 6))


                month_datasets.append(extracted_data)
                total_files += 1
                end_time = time.time()
                print(f'Time taken to extract info from {file_path}: {end_time - start_time}')

        # Concatenate datasets for the month
        month_combined_data = xr.concat(month_datasets, dim='time').compute()
        #month_combined_data = month_combined_data.sortby('time')

        print(f'Total number of files read: {total_files}')
    
        #---- Save result to netcdf ----#
        month_netcdf_path = os.path.join(output_netcdf_path, f'2023_{month_folder}.nc')
        try:
            month_combined_data.to_netcdf(month_netcdf_path)
            print(f'netCDF file saved successfully')
        except Exception as e:
            print(f'Not able to save netCDF file, error: {e}')

        #return all_year_combined_data
    return month_combined_data

In [18]:
# Paths
year_folder_path = '/lustre/storeB/immutable/archive/projects/metproduction/meps/2023/'
output_netcdf_path = '/lustre/storeB/users/tonjek/msc/2024_msc_tonje_metar/netcdf/2023/'
result = extract_monthly_data_from_netcdf(year_folder_path, output_netcdf_path)

Time taken to extract info from /lustre/storeB/immutable/archive/projects/metproduction/meps/2021/11/19/meps_lagged_6_h_subset_2_5km_20211119T06Z.nc: 2.5919535160064697
Time taken to extract info from /lustre/storeB/immutable/archive/projects/metproduction/meps/2021/11/19/meps_lagged_6_h_subset_2_5km_20211119T00Z.nc: 0.12510466575622559
Time taken to extract info from /lustre/storeB/immutable/archive/projects/metproduction/meps/2021/11/19/meps_lagged_6_h_subset_2_5km_20211119T12Z.nc: 0.12354421615600586
Time taken to extract info from /lustre/storeB/immutable/archive/projects/metproduction/meps/2021/11/19/meps_lagged_6_h_subset_2_5km_20211119T18Z.nc: 0.15836381912231445
Time taken to extract info from /lustre/storeB/immutable/archive/projects/metproduction/meps/2021/11/11/meps_lagged_6_h_subset_2_5km_20211111T18Z.nc: 0.12106585502624512


KeyboardInterrupt: 