## Setting up:

### Imports:

In [5]:
import os
from tqdm.notebook import tqdm
import re
import warnings
import re
import calendar
import cdsapi
import zipfile

warnings.filterwarnings("ignore")

from scripts.xgb_helpers import *
# from scripts.xgb_input import *
# from scripts.xgb_model import *
# from scripts.plots_clean import *
# from scripts.xgb_metrics import *

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Hourly data:

In [4]:
path_save_era5hourly = '../../data/MB_modeling/ERA5/ERA5-Land-hourly/'

### Net solar radiation:

In [6]:
RUN = False
if RUN:
    for year in tqdm(range(1950, 2022, 1), desc='years', position = 0):
        for month in tqdm(range(1, 13, 1), desc='months', position=1, leave=False):
            num_days = calendar.monthrange(year, month)[1]
            str_days = [makeStr(day) for day in range(1,num_days+1,1)]
            c = cdsapi.Client()

            c.retrieve(
                'reanalysis-era5-land', {
                    'variable':
                    'surface_net_solar_radiation',
                    'year':
                    str(year),
                    'month':
                    makeStr(month),
                    'day':
                    str_days,
                    'time': [
                        '00:00',
                        '01:00',
                        '02:00',
                        '03:00',
                        '04:00',
                        '05:00',
                        '06:00',
                        '07:00',
                        '08:00',
                        '09:00',
                        '10:00',
                        '11:00',
                        '12:00',
                        '13:00',
                        '14:00',
                        '15:00',
                        '16:00',
                        '17:00',
                        '18:00',
                        '19:00',
                        '20:00',
                        '21:00',
                        '22:00',
                        '23:00',
                    ],
                    'area': [
                        50,
                        0,
                        45,
                        20,
                    ],
                    'format':
                    'netcdf.zip',
                }, path_save_era5hourly+f'ssr/ssr-{str(year)}-{makeStr(month)}.netcdf.zip')

### Total precipitation:

In [7]:
RUN = False
if RUN:
    for year in tqdm(range(1950, 2022, 1), desc='years', position = 0):
        for month in tqdm(range(1, 13, 1), desc='months', position=1, leave=False):
            num_days = calendar.monthrange(year, month)[1]
            str_days = [makeStr(day) for day in range(1,num_days+1,1)]
            c = cdsapi.Client()

            c.retrieve(
                'reanalysis-era5-land', {
                    'variable':
                    'total_precipitation',
                    'year':
                    str(year),
                    'month':
                    makeStr(month),
                    'day':
                    str_days,
                    'time': [
                        '00:00',
                        '01:00',
                        '02:00',
                        '03:00',
                        '04:00',
                        '05:00',
                        '06:00',
                        '07:00',
                        '08:00',
                        '09:00',
                        '10:00',
                        '11:00',
                        '12:00',
                        '13:00',
                        '14:00',
                        '15:00',
                        '16:00',
                        '17:00',
                        '18:00',
                        '19:00',
                        '20:00',
                        '21:00',
                        '22:00',
                        '23:00',
                    ],
                    'area': [
                        50,
                        0,
                        45,
                        20,
                    ],
                    'format':
                    'netcdf.zip',
                }, path_save_era5hourly+f'tp/tp-{str(year)}-{makeStr(month)}.netcdf.zip')

### 2m Temperature:

In [8]:
RUN = False
if RUN:
    for year in tqdm(range(1950, 1959, 1), desc='years', position = 0):
        for month in tqdm(range(1, 13, 1), desc='months', position=1, leave=False):
            num_days = calendar.monthrange(year, month)[1]
            str_days = [makeStr(day) for day in range(1,num_days+1,1)]
            c = cdsapi.Client()
            c.retrieve(
                'reanalysis-era5-land', {
                    'variable':
                    '2m_temperature',
                    'year':
                    str(year),
                    'month':
                    makeStr(month),
                    'day':
                    str_days,
                    'time': [
                        '00:00',
                        '01:00',
                        '02:00',
                        '03:00',
                        '04:00',
                        '05:00',
                        '06:00',
                        '07:00',
                        '08:00',
                        '09:00',
                        '10:00',
                        '11:00',
                        '12:00',
                        '13:00',
                        '14:00',
                        '15:00',
                        '16:00',
                        '17:00',
                        '18:00',
                        '19:00',
                        '20:00',
                        '21:00',
                        '22:00',
                        '23:00',
                    ],
                    'area': [
                        50,
                        0,
                        45,
                        20,
                    ],
                    'format':
                    'netcdf.zip',
                }, path_save_era5hourly+f't2m/t2m-{str(year)}-{makeStr(month)}.netcdf.zip')

In [9]:
# unzip files
path_save_nc = '../../data/MB_modeling/ERA5/ERA5-Land-hourly/ncfiles/'
zipfiles  = [f for f  in os.listdir('../../data/MB_modeling/ERA5/ERA5-Land-hourly/') if f[-3:]=='zip']
for file in tqdm(zipfiles, desc = 'zip files'):
    with zipfile.ZipFile(
            '../../data/MB_modeling/ERA5/ERA5-Land-hourly/' + file,
            'r') as zip_ref:
        zip_ref.extractall(path_save_nc)
        new_name = re.split('.netcdf.zip', file)[0]
        os.rename(path_save_nc+'data.nc', path_save_nc+new_name + '.nc')

zip files: 0it [00:00, ?it/s]

### Monthly temperature at different pressure levels:

In [10]:
path_save_ERA5plevles= '../../data/MB_modeling/ERA5/'
RUN = False
if RUN:
    c = cdsapi.Client()
    c.retrieve(
        'reanalysis-era5-pressure-levels-monthly-means',
        {
            'product_type': 'monthly_averaged_reanalysis',
            'variable': [
                'geopotential', 'temperature',
            ],
            'pressure_level': [
                '500', '550', '600',
                '650', '700', '750',
                '775', '800', '825',
                '850', '875', '900',
                '925', '950', '975',
                '1000',
            ],
            'year': [
                '1950', '1951', '1952',
                '1953', '1954', '1955',
                '1956', '1957', '1958',
                '1959', '1960', '1961',
                '1962', '1963', '1964',
                '1965', '1966', '1967',
                '1968', '1969', '1970',
                '1971', '1972', '1973',
                '1974', '1975', '1976',
                '1977', '1978', '1979',
                '1980', '1981', '1982',
                '1983', '1984', '1985',
                '1986', '1987', '1988',
                '1989', '1990', '1991',
                '1992', '1993', '1994',
                '1995', '1996', '1997',
                '1998', '1999', '2000',
                '2001', '2002', '2003',
                '2004', '2005', '2006',
                '2007', '2008', '2009',
                '2010', '2011', '2012',
                '2013', '2014', '2015',
                '2016', '2017', '2018',
                '2019', '2020', '2021',
                '2022', '2023',
            ],
            'month': [
                '01', '02', '03',
                '04', '05', '06',
                '07', '08', '09',
                '10', '11', '12',
            ],
            'time': '00:00',
            'area': [
                50, 0, 45,
                20,
            ],
            'format': 'netcdf',
        },
        path_save_ERA5plevles+f'era5-monthly-t2m-geopot-plevels.nc')