# Import modules

In [None]:
import pandas as pd
import os
import numpy as np
import datetime

# Setup diretories

In [3]:
# root directory
wdir = os.getcwd()

In [10]:
# References:
# [1] https://towardsdatascience.com/different-ways-to-connect-google-drive-to-a-google-colab-notebook-pt-1-de03433d2f7a
# [2] https://stackoverflow.com/questions/54351852/accessing-shared-with-me-with-colab
# https://stackoverflow.com/questions/53581278/test-if-notebook-is-running-on-google-colab

try:
    from google.colab import drive
    from google.colab import files
    IN_COLAB = True
except:
    IN_COLAB = False

if IN_COLAB:

    # mount google drive 
    drive.mount('/content/gdrive/', force_remount=True)

    #change directory
    try:
        os.chdir('/content/gdrive/MyDrive/MCED/TAAC_VC_PROJECT')

    except:
        os.chdir('/content/gdrive/MyDrive/TAAC_VC_PROJECT')

wdir = os.getcwd()

print(wdir)

Mounted at /content/gdrive/
/content/gdrive/.shortcut-targets-by-id/1v5wjtF3FR1_SrbCpPXH3gkApatlnQW4s/TAAC_VC_PROJECT


In [11]:
os.chdir(wdir)

# check for ECMWF directory existence
if not os.path.isdir('ECMWF'):
    os.makedirs('ECMWF')

# check for ECMWF directory existence
if not os.path.isdir('IMAGES'):
    os.makedirs('IMAGES')

ECMWF_dir = wdir + '/ECMWF'
IMAGES_dir = wdir + '/IMAGES'

In [12]:
os.chdir(ECMWF_dir)

os.getcwd()

'/content/gdrive/.shortcut-targets-by-id/1v5wjtF3FR1_SrbCpPXH3gkApatlnQW4s/TAAC_VC_PROJECT/ECMWF'

# Read NetCDF data

In [14]:
#Open the NetCDF

# https://pratiman-91.github.io/2020/08/01/NetCDF-to-GeoTIFF-using-Python.html

import xarray as xr
import rioxarray as rio


#Download the sample from https://www.unidata.ucar.edu/software/netcdf/examples/sresa1b_ncar_ccsm3-example.nc

ncfname = '1985_T2m_tcc_tp.nc'
ncfile = xr.open_dataset(ncfname)

# Inspect the file
ncfile

In [15]:
date= '19850901'

In [16]:
# https://docs.xarray.dev/en/stable/user-guide/indexing.html
date_slice = ncfile.sel(time=slice(date, date))
date_slice

In [None]:
date_slice['t2m']

In [None]:
# https://stackoverflow.com/questions/23943379/swapping-the-dimensions-of-a-numpy-array
# https://numpy.org/doc/stable/reference/generated/numpy.transpose.html

import numpy as np

# temperature
# slices
t2m = date_slice['t2m'].to_numpy()
# change the order of the indices
t2m = np.transpose(t2m, (1, 2, 0))
t2m.shape

(30, 42, 24)

In [None]:
# total precipitation
tp = date_slice['tp'].to_numpy()
tp = np.transpose(tp, (1, 2, 0))

# total cloud cover
tcc= date_slice['tcc'].to_numpy()
tcc = np.transpose(tcc, (1, 2, 0))


# Save images

In [None]:
# change directory
os.chdir(IMAGES_dir)

In [None]:
# https://stackoverflow.com/questions/59307148/python-opencv-how-to-save-a-5-channel-image
np.savez_compressed(date, t2m=t2m, tp=tp, tcc=tcc)

# Load images

In [None]:
# https://numpy.org/doc/stable/reference/generated/numpy.savez_compressed.html

loaded = np.load(f'{date}.npz')

In [None]:
t2m_loaded = loaded['t2m']
t2m_loaded

array([[[291.79688, 291.76004, 291.69562, ..., 291.25162, 291.3026 ,
         291.3019 ],
        [291.7551 , 291.72534, 291.66232, ..., 291.24313, 291.29413,
         291.2906 ],
        [291.8167 , 291.8061 , 291.64746, ..., 291.31464, 291.22968,
         291.24667],
        ...,
        [292.46674, 292.04257, 289.91898, ..., 293.77603, 291.08734,
         290.95847],
        [292.70184, 292.28476, 290.14417, ..., 294.02103, 291.3253 ,
         291.2233 ],
        [292.93692, 292.5262 , 290.36932, ..., 294.26675, 291.5632 ,
         291.48813]],

       [[291.8174 , 291.77988, 291.71616, ..., 291.27713, 291.32245,
         291.32245],
        [291.77493, 291.74448, 291.68286, ..., 291.26862, 291.31393,
         291.3104 ],
        [291.83652, 291.8259 , 291.6687 , ..., 291.34015, 291.2495 ,
         291.2665 ],
        ...,
        [292.44125, 292.0348 , 289.89066, ..., 293.83835, 291.0994 ,
         290.97336],
        [292.67633, 292.27698, 290.1151 , ..., 294.08334, 291.3373 ,
   

# Cycles are years and save images to compressed numpy arrays

In [None]:
# REFERENCES:
# https://stackoverflow.com/questions/23943379/swapping-the-dimensions-of-a-numpy-array
# https://numpy.org/doc/stable/reference/generated/numpy.transpose.html

import numpy as np
from datetime import datetime, timedelta

def save_npz(ncfile, date):
    # https://docs.xarray.dev/en/stable/user-guide/indexing.html
    date_slice = ncfile.sel(time=slice(date, date))


    # https://stackoverflow.com/questions/23943379/swapping-the-dimensions-of-a-numpy-array
    # https://numpy.org/doc/stable/reference/generated/numpy.transpose.html

    # temperature
    t2m = date_slice['t2m'].to_numpy()
    t2m = np.transpose(t2m, (1, 2, 0))

    # total precipitation
    tp = date_slice['tp'].to_numpy()
    tp = np.transpose(tp, (1, 2, 0))

    # total cloud cover

    tcc= date_slice['tcc'].to_numpy()
    tcc = np.transpose(tcc, (1, 2, 0))

    # display(date)

    # https://stackoverflow.com/questions/59307148/python-opencv-how-to-save-a-5-channel-image
    np.savez_compressed(date, t2m=t2m, tp=tp, tcc=tcc)


# Change diretory

In [None]:
os.chdir(ECMWF_dir)

os.getcwd()

'h:\\My Drive\\MCED\\TAAC_VC_PROJECT\\ECMWF'

In [None]:
# list of years to process

year_start = 1985
year_end = 1995

In [None]:
# https://stackoverflow.com/questions/61810757/find-total-number-of-days-in-a-year-pandas
import datetime
import calendar

# function to compute the number of days in a year
def days_in_year(year=datetime.datetime.now().year):
    return 365 + calendar.isleap(year)


In [None]:
from datetime import datetime, timedelta
from tqdm import tqdm

for year in range(year_start, year_end + 1):

    ndays = days_in_year(year)

    day_start = datetime(year, 1, 1, 0, 0)

    os.chdir(ECMWF_dir)
    ncfname = str(year) + '_T2m_tcc_tp.nc'
    ncfile = xr.open_dataset(ncfname)

    print(f'Processing file {ncfname}')

    for d in tqdm(range(ndays)):
        # Compute current date
        date = day_start + timedelta(days = d)
        date = date.strftime('%Y%m%d')
        try:
            os.chdir(IMAGES_dir)
            save_npz(ncfile, date)
        except:
            continue


Processing file 1985_T2m_tcc_tp.nc


100%|██████████| 365/365 [00:28<00:00, 12.60it/s]


Processing file 1986_T2m_tcc_tp.nc


100%|██████████| 365/365 [00:33<00:00, 11.00it/s]


Processing file 1987_T2m_tcc_tp.nc


100%|██████████| 365/365 [00:37<00:00,  9.77it/s]


Processing file 1988_T2m_tcc_tp.nc


100%|██████████| 366/366 [00:44<00:00,  8.31it/s]


Processing file 1989_T2m_tcc_tp.nc


100%|██████████| 365/365 [00:49<00:00,  7.45it/s]


Processing file 1990_T2m_tcc_tp.nc


100%|██████████| 365/365 [00:53<00:00,  6.84it/s]


Processing file 1991_T2m_tcc_tp.nc


100%|██████████| 365/365 [00:57<00:00,  6.30it/s]


Processing file 1992_T2m_tcc_tp.nc


100%|██████████| 366/366 [01:02<00:00,  5.84it/s]


Processing file 1993_T2m_tcc_tp.nc


100%|██████████| 365/365 [01:07<00:00,  5.37it/s]


Processing file 1994_T2m_tcc_tp.nc


100%|██████████| 365/365 [01:22<00:00,  4.43it/s]


Processing file 1995_T2m_tcc_tp.nc


100%|██████████| 365/365 [01:37<00:00,  3.74it/s]


FileNotFoundError: [Errno 2] No such file or directory: b'h:\\My Drive\\MCED\\TAAC_VC_PROJECT\\ECMWF\\1996_T2m_tcc_tp.nc'