## A. Importing necessary files and modules

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
import os
from pathlib import Path
from tqdm.auto import tqdm

CWD = Path(os.getcwd())
PYFILES_PATH = CWD.parent / "PyFiles"
sys.path.append(str(PYFILES_PATH))

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import xarray as xr
from glob import glob

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import *
from tensorflow.keras import Sequential

# these are in the PYFILES_PATH
from utils import *
import settings as bc
import my_neural_networks as my_nn

import pickle

2023-10-03 11:41:27.492953: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-10-03 11:41:27.534224: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
plt.rcParams["savefig.dpi"] = 400
plt.rcParams["font.size"] = 13
plt.rcParams["legend.frameon"] = False

## B. Data preprocessing

### 1. Importing the datasets into test and training sets

In [4]:
# `prepare_predictor` and `prepare_predictand` are all in
# the file `utils.py`.
train_scenarios = ["historical", "ssp585", "ssp126", "ssp370", "hist-aer", "hist-GHG"]
test_scenario = "ssp245"

X_train_xr, X_length = prepare_predictor(train_scenarios, bc.TRAIN_PATH)
y_train_xr, y_length = prepare_predictand(train_scenarios, bc.TRAIN_PATH)

X_test_xr, _ = prepare_predictor(test_scenario, bc.TEST_PATH)
y_test_xr, _ = prepare_predictand(test_scenario, bc.TEST_PATH)

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

### 2. Select relevant variables

We select the predictors as CO2 and CH4, and the predictand as `tas`. Notice that the `y` variable data (`tas`) are formatted differently from the vanilla NN example, as they are now **2D** maps.

In [14]:
X_train_df = pd.DataFrame(
    data={x: X_train_xr[x] for x in ["CO2", "CH4"]},
    index=X_train_xr["CO2"].coords["time"].values,
)
X_test_df = pd.DataFrame(
    data={x: X_test_xr[x] for x in ["CO2", "CH4"]},
    index=X_test_xr["CO2"].coords["time"].values,
)

# no "flattening"! Keeping the 2D structure for CNN implementation
y_train = y_train_xr["tas"].data
y_test = y_test_xr["tas"].data

### 3. Data Normalization

In [15]:
normalization = [X_train_df.mean(), X_train_df.std()]
X_train_df = (X_train_df - normalization[0]) / normalization[1]
X_test_df = (X_test_df - normalization[0]) / normalization[1]

X_train, X_test = X_train_df.to_numpy(), X_test_df.to_numpy()

for i in ["predictor", "predictand"]:
    for j in ["test", "training"]:
        info = X_train
        if (j == "test") and (i == "predictor"):
            info = X_test
        elif (j == "test") and (i == "predictand"):
            info = y_test
        elif (j == "training") and (i == "predictand"):
            info = y_train

        dimensions = ", ".join([str(x) for x in info.shape])
        print(f"The dimesions for {j} data ({i}) are: ({dimensions})")

The dimesions for test data (predictor) are: (86, 2)
The dimesions for training data (predictor) are: (753, 2)
The dimesions for test data (predictand) are: (86, 96, 144)
The dimesions for training data (predictand) are: (753, 96, 144)


In [10]:
list(info.shape)

[86, 2]