# Import modules

In [None]:
import os
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

# Mount google drive

In [None]:
# References:
# [1] https://towardsdatascience.com/different-ways-to-connect-google-drive-to-a-google-colab-notebook-pt-1-de03433d2f7a
# [2] https://stackoverflow.com/questions/54351852/accessing-shared-with-me-with-colab
# [3] https://stackoverflow.com/questions/53581278/test-if-notebook-is-running-on-google-colab

try:
    from google.colab import drive
    from google.colab import files
    IN_COLAB = True
except:
    IN_COLAB = False

if IN_COLAB:

    # mount google drive 
    drive.mount('/content/gdrive/', force_remount=True)

    #change directory
    try:
        os.chdir('/content/gdrive/MyDrive/MCED/TAAC_VC_PROJECT')

    except:
        os.chdir('/content/gdrive/MyDrive/TAAC_VC_PROJECT')

wdir = os.getcwd()

print(wdir)

Mounted at /content/gdrive/
/content/gdrive/MyDrive/MCED/TAAC_VC_PROJECT


# Setup directories

In [None]:
# root directory
wdir = os.getcwd()

wdir

'/content/gdrive/MyDrive/MCED/TAAC_VC_PROJECT'

In [None]:
os.chdir(wdir)

ECMWF_dir = wdir + '/ECMWF'
IMAGES_dir = wdir + '/IMAGES'
SNIRH_dir = wdir + '/SNIRH'
CNN_dir = wdir + '/CNN'


# Device selection

In [None]:
#include libraries
import torch
import torch.nn as nn
import torch.nn.functional as F

# Define your execution device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("The model will be running on", device, "device")

The model will be running on cpu device


# Load compressed numpy arrays (IMAGES)

In [None]:
# change directory
os.chdir(IMAGES_dir)

In [None]:

from datetime import datetime, timedelta

# We will count time relative to the day_start
# class datetime.datetime
# A combination of a date and a time. Attributes: year, month, day, hour, minute, second, microsecond, and tzinfo.
# SNIRH has data marked at 9:00

day_start = datetime(1985, 1, 1, 9, 0)
day_end = datetime(1995, 12, 31, 9, 0)


In [None]:
# Number of days to process
ndays = (day_end - day_start).days + 1

print(f'Number of days to download: {ndays}')

Number of days to download: 4017


In [None]:
# Reads csv file with river basin rainfall
df_snirh = pd.read_csv(SNIRH_dir + "/Mondego_tp24h_Thiessen.csv")

# converts date column to datetime type
df_snirh['date']  = pd.to_datetime(df_snirh['date'] )

# date becomes the index
# https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.set_index.html
# df_snirh = df_snirh.set_index(['date'])

In [None]:
# iterating the columns
for col in label_dataset.columns:
    print(col)

date
daily tp [mm]


In [None]:
label_dataset['daily tp [mm]'] = label_dataset['daily tp [mm]'] /1000           # converts from mm to m
label_dataset.columns = ['date', 'daily tp [m]']

In [None]:
from datetime import datetime, timedelta
from tqdm import tqdm

# # We will count time relative to the day_start
# # class datetime.datetime
# # A combination of a date and a time. Attributes: year, month, day, hour, minute, second, microsecond, and tzinfo.
# # SNIRH has data marked at 9:00
# day_start = datetime(year_start, 1, 1, 9, 0)


# list of rows
l_row = []
lk = []

for d in tqdm(range(ndays)):
    # Compute current date, which is d days after day_start
    date = day_start + timedelta(days = d)

    fname = date.strftime('%Y%m%d')

    # https://numpy.org/doc/stable/reference/generated/numpy.savez_compressed.html
    # loads data
    loaded = np.load(f'{fname}.npz')

    t2m_loaded = loaded['t2m']
    tp_loaded = loaded['tp']
    tcc_loaded = loaded['tcc']

    dict_row = {
                'date': date.strftime('%Y-%m-%d %H:%M:%S'),
                'tp' : tp_loaded,
                't2m' : t2m_loaded,
                'tcc' : tcc_loaded
    }

    l_row.append(dict_row) 
    # i = i +1

df_images_raw = pd.DataFrame(l_row)


100%|██████████| 4017/4017 [15:59<00:00,  4.18it/s]


In [None]:
# converts date column to datetime type
df_images_raw['date']  = pd.to_datetime(df_images_raw['date'] )

In [None]:
# converts columns to numpy arrays
arr_date = df_images_raw['date'].to_numpy()
arr_tp = df_images_raw['tp'].to_numpy()
arr_t2m = df_images_raw['t2m'].to_numpy()
arr_tcc = df_images_raw['tcc'].to_numpy()

In [None]:
# saves dataset to compressed numpy array
np.savez_compressed('images_raw', date = arr_date, tp=arr_tp, t2m=arr_t2m, tcc=arr_tcc)

In [None]:
loaded = np.load(f'images_raw.npz',  allow_pickle=True)

In [None]:
date_loaded = loaded['date']
t2m_loaded = loaded['t2m']
tp_loaded = loaded['tp']
tcc_loaded = loaded['tcc']

In [None]:
df = pd.DataFrame([date_loaded, t2m_loaded, tp_loaded, tcc_loaded]).transpose()

In [None]:
df.columns = ['date', 't2m', 'tp', 'tcc']

In [None]:
df

Unnamed: 0,date,t2m,tp,tcc
0,1985-01-01 09:00:00,"[[[285.21503, 284.8107, 284.51685, 284.32495, ...","[[[4.656613e-10, 4.656613e-10, 4.656613e-10, 4...","[[[0.0, 0.0, 0.0, 0.0, 0.0028077662, 0.0084538..."
1,1985-01-02 09:00:00,"[[[284.189, 284.06293, 283.8838, 283.5793, 283...","[[[4.656613e-10, 6.9569796e-07, 2.087094e-06, ...","[[[0.8726815, 0.8507687, 0.863541, 0.9651089, ..."
2,1985-01-03 09:00:00,"[[[283.66144, 283.7011, 283.80093, 283.72092, ...","[[[4.656613e-10, 4.656613e-10, 4.656613e-10, 2...","[[[0.74192196, 0.72745585, 0.8582917, 0.844817..."
3,1985-01-04 09:00:00,"[[[285.32904, 285.27875, 285.27026, 285.32407,...","[[[4.656613e-10, 8.349307e-07, 1.4747493e-05, ...","[[[0.9917064, 0.99718463, 1.0000076, 1.0000076..."
4,1985-01-05 09:00:00,"[[[285.58112, 285.66183, 285.14987, 284.6613, ...","[[[0.00077465223, 0.00023554033, 0.0004929225,...","[[[0.9999924, 0.9895395, 0.9141876, 0.9604089,..."
...,...,...,...,...
4012,1995-12-27 09:00:00,"[[[287.23373, 287.20642, 287.03433, 286.94897,...","[[[2.9290095e-07, 5.6562945e-05, 0.00031139096...","[[[1.0000076, 0.99610114, 0.98889863, 0.991935..."
4013,1995-12-28 09:00:00,"[[[287.96783, 287.98764, 287.52533, 288.05115,...","[[[0.0013031538, 0.00096538593, 0.00038905581,...","[[[0.67697704, 0.7769122, 0.73043144, 0.815259..."
4014,1995-12-29 09:00:00,"[[[288.41922, 288.23962, 288.18158, 288.0129, ...","[[[0.00054951385, 0.0012004313, 0.0014387006, ...","[[[1.0000076, 0.9999924, 1.0000076, 1.0000076,..."
4015,1995-12-30 09:00:00,"[[[289.89218, 290.04037, 290.0001, 289.89627, ...","[[[9.686081e-05, 5.392544e-05, 8.762907e-05, 7...","[[[0.352741, 0.5784, 0.45375958, 0.72095525, 0..."


In [None]:
df_images_raw

Unnamed: 0,date,tp,t2m,tcc
0,1985-01-01 09:00:00,"[[[4.656613e-10, 4.656613e-10, 4.656613e-10, 4...","[[[285.21503, 284.8107, 284.51685, 284.32495, ...","[[[0.0, 0.0, 0.0, 0.0, 0.0028077662, 0.0084538..."
1,1985-01-02 09:00:00,"[[[4.656613e-10, 6.9569796e-07, 2.087094e-06, ...","[[[284.189, 284.06293, 283.8838, 283.5793, 283...","[[[0.8726815, 0.8507687, 0.863541, 0.9651089, ..."
2,1985-01-03 09:00:00,"[[[4.656613e-10, 4.656613e-10, 4.656613e-10, 2...","[[[283.66144, 283.7011, 283.80093, 283.72092, ...","[[[0.74192196, 0.72745585, 0.8582917, 0.844817..."
3,1985-01-04 09:00:00,"[[[4.656613e-10, 8.349307e-07, 1.4747493e-05, ...","[[[285.32904, 285.27875, 285.27026, 285.32407,...","[[[0.9917064, 0.99718463, 1.0000076, 1.0000076..."
4,1985-01-05 09:00:00,"[[[0.00077465223, 0.00023554033, 0.0004929225,...","[[[285.58112, 285.66183, 285.14987, 284.6613, ...","[[[0.9999924, 0.9895395, 0.9141876, 0.9604089,..."
...,...,...,...,...
4012,1995-12-27 09:00:00,"[[[2.9290095e-07, 5.6562945e-05, 0.00031139096...","[[[287.23373, 287.20642, 287.03433, 286.94897,...","[[[1.0000076, 0.99610114, 0.98889863, 0.991935..."
4013,1995-12-28 09:00:00,"[[[0.0013031538, 0.00096538593, 0.00038905581,...","[[[287.96783, 287.98764, 287.52533, 288.05115,...","[[[0.67697704, 0.7769122, 0.73043144, 0.815259..."
4014,1995-12-29 09:00:00,"[[[0.00054951385, 0.0012004313, 0.0014387006, ...","[[[288.41922, 288.23962, 288.18158, 288.0129, ...","[[[1.0000076, 0.9999924, 1.0000076, 1.0000076,..."
4015,1995-12-30 09:00:00,"[[[9.686081e-05, 5.392544e-05, 8.762907e-05, 7...","[[[289.89218, 290.04037, 290.0001, 289.89627, ...","[[[0.352741, 0.5784, 0.45375958, 0.72095525, 0..."
