# Make inputs for model:

## Setting up

In [1]:
# run script that installs missing libraries
! chmod 755 ../scripts.sh
! ../scripts.sh

Collecting plotly==5.6.0
  Using cached plotly-5.6.0-py2.py3-none-any.whl (27.7 MB)
Collecting tenacity>=6.2.0
  Using cached tenacity-8.0.1-py3-none-any.whl (24 kB)
Installing collected packages: tenacity, plotly
Successfully installed plotly-5.6.0 tenacity-8.0.1
Collecting jupyter-dash
  Using cached jupyter_dash-0.4.2-py3-none-any.whl (23 kB)
Collecting dash
  Using cached dash-2.3.1-py3-none-any.whl (9.6 MB)
Collecting ansi2html
  Using cached ansi2html-1.7.0-py3-none-any.whl (15 kB)
Collecting flask
  Using cached Flask-2.1.1-py3-none-any.whl (95 kB)
Collecting retrying
  Using cached retrying-1.3.3-py3-none-any.whl
Collecting dash-table==5.0.0
  Using cached dash_table-5.0.0-py3-none-any.whl (3.9 kB)
Collecting dash-html-components==2.0.0
  Using cached dash_html_components-2.0.0-py3-none-any.whl (4.1 kB)
Collecting flask-compress
  Using cached Flask_Compress-1.11-py3-none-any.whl (7.9 kB)
Collecting dash-core-components==2.0.0
  Using cached dash_core_components-2.0.0-py3-none-

In [1]:
import os
from matplotlib import pyplot as plt
import matplotlib.path as mpath
import numpy as np
import pandas as pd
import xarray as xr
import cartopy
import cf_units
from datetime import datetime
from datetime import timedelta
import rasterio
import cartopy.crs as ccrs
import gcsfs
from tqdm import tqdm
import pyproj
from pyproj import Transformer
from google.cloud import storage
from re import search
from os import listdir
from os.path import isfile, join
from scipy import ndimage
from math import cos,sin,pi

from process_pangeo import *
from GC_scripts import *
from processRCM import *
from reprojectionFunctions import *
from MakeInputFunctions import *

%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
import torch
import keras
import tensorflow as tf 
from keras import backend as K
from tensorflow.python.keras.backend import set_session

from keras.models import load_model, Model
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, UpSampling2D, Conv2DTranspose, Reshape, concatenate, BatchNormalization, Activation
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint
from keras.models import Sequential

2022-04-07 09:09:03.584231: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-04-07 09:09:03.584280: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [3]:
if torch.cuda.is_available():
    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    tf.config.experimental.set_memory_growth(physical_devices[0], True)

## Z and X:

### Z:
- (ignore for now) External forcing also given to RCM → total concentration of greenhouse gases and solar and ozone forcings
- Cosinus, sinus vector to encode information about day of year
- Daily spatial means and standard deviations time series for each $X_{i,j,m}$ (because normalising 2D variables removes temporal information)

### X: 
SHAPE [nbmonths, x, y, nb_vars]

In [4]:
INPUT_2D_ARRAY,INPUT_1D_ARRAY = input_maker(fileGC,
                pathGC, 
                stand = True,  # standardization   
                seas = True,   # put a cos,sin vector to control the season, format : bool
                means = True,   # add the mean of the variables raw or stdz, format : r,s,n
                stds = True)

Creating 2D input X:
 -------------------


ValueError: One or more of the specified variables cannot be found in this dataset

## U-Net:

In [22]:
#########################################
####  EMUL-UNET ARCHITECTURE DESIGNER ### 
#########################################

# This file propose a main function to create the UNET architecture used for the Emulator introduced in Doury et al. (2022). 
# We work here with Keras and Tensorflow
### We first define some function which are useful for the rest 

# The RMSE loss
def rmse_k(y_true, y_pred):
    return keras.backend.sqrt(keras.backend.mean(keras.backend.square(y_pred - y_true), axis=-1))

#A basic CNN with few convolutions and MaxPooling : 
def block_conv(conv, filters):
    conv = Conv2D(filters, 3, padding='same')(conv)
    conv = BatchNormalization()(conv)
    conv = Activation('relu')(conv)
    conv = Conv2D(filters, 3, padding='same')(conv)
    conv = BatchNormalization()(conv)
    conv = Activation('relu')(conv)
    return conv

# A UP-scaling block used in the decoding part. This block also concatenate with the output of the decoding part.  
def block_up_conc(conv, filters,conv_conc):
    conv = Conv2DTranspose(filters, (3, 3), strides=(2, 2), padding='same', activation='relu')(conv)
    conv = concatenate([conv,conv_conc])
    conv = block_conv(conv, filters)
    return conv

# An other UP-scaling block with no concatenation as our UNET expand the decoding part. 
def block_up(conv, filters):
    conv = Conv2DTranspose(filters, (3, 3), strides=(2, 2), padding='same', activation='relu')(conv)
    conv = block_conv(conv, filters)
    return conv

# A quick function to get the highest power of two close to n.
def highestPowerof2(n):
    res = 0;
    for i in range(n, 0, -1):
        # If i is a power of 2
        if ((i & (i - 1)) == 0):
            res = i;
            break;
    return res;

### This is the function drawing the UNET. It is designed to adapt to any size of inputs and outputs maps. 
### To recall : the emulator proposed in Doury et al (2022) takes two sources of inputs : a set of 2D variables and a 1D vector. 
### This function also build the Emul-UNET with only the 2D variables as input. 
### This is set with the variable "nb_inputs" : 1 or 2 sources of inputs.
### 
### The function needs the size of the output map ( "size_target_domain" ). 
### And the shape of the inputs as a list of lists: must be under the form [[width of 2D var,height of 2D var,nb of 2D var],[1,1, nb_of_1D_var]] if nb_inputs=2 and
### [[width of 2D var,height of 2D var,nb of 2D var]] if nb_inputs=1.
### The function returns a Keras model. 

def unet_maker( nb_inputs,size_target_domain,shape_inputs, filters = 64,seed=123):
    from math import log2,pow
    import os
    import numpy as np
    inputs_list=[]
    size=np.min([highestPowerof2(shape_inputs[0][0]),highestPowerof2(shape_inputs[0][1])])

    if nb_inputs==1:
        inputs = keras.Input(shape = shape_inputs[0])
        conv_down=[]
        diff_lat=inputs.shape[1]-size+1
        diff_lon=inputs.shape[2]-size+1
        conv0=Conv2D(32, (diff_lat,diff_lon))(inputs)
        conv0=BatchNormalization()(conv0)
        conv0=Activation('relu')(conv0)
        prev=conv0
        for i in range(int(log2(size))):
            conv=block_conv(prev, filters*int(pow(2,i)))
            pool=MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same')(conv)
            conv_down.append(conv)
            prev=pool
        up=block_conv(prev, filters*int(pow(2,i)))
        k=log2(size)
        for i in range(1,int(log2(size_target_domain)+1)):
            if i<=k:
                up=block_up_conc(up,filters*int(pow(2,k-i)),conv_down[int(k-i)])
            else :
                up=block_up(up,filters)
        inputs_list.append(inputs)     
                
    if nb_inputs==2:
        inputs = keras.Input(shape = shape_inputs[0])
        conv_down=[]
        diff_lat=inputs.shape[1]-size+1
        diff_lon=inputs.shape[2]-size+1
        conv0=Conv2D(32, (diff_lat,diff_lon))(inputs)
        conv0=BatchNormalization()(conv0)
        conv0=Activation('relu')(conv0)
        prev=conv0
        for i in range(int(log2(size))):
            conv=block_conv(prev, filters*int(pow(2,i)))
            pool=MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same')(conv)
            conv_down.append(conv)
            prev=pool
        
        last_conv=block_conv(prev, filters*int(pow(2,i)))
        inputs2 = keras.Input(shape=shape_inputs[1])
        model2 = Dense(filters)(inputs2)
        for i in range(1,int(log2(size))):
            model2 = Dense(filters*int(pow(2,i)))(model2)
    
        merged = concatenate([last_conv,model2])
        up=merged
        k=log2(size)
        for i in range(1,int(log2(size_target_domain)+1)):
            if i<=k:
                up=block_up_conc(up,filters*int(pow(2,k-i)),conv_down[int(k-i)])
            else :
                conv=block_up(up,filters)
                up=conv
        inputs_list.append(inputs)
        inputs_list.append(inputs2)
    last=up
        
    lastconv=Conv2D(1, 1, padding='same')(last)
    return (keras.models.Model(inputs=inputs_list, outputs=lastconv))

### Fit:

In [24]:
import sys
import xarray as xr
import numpy as np
import os
from netCDF4 import Dataset
import pandas as pd 
import random as rn
from sklearn.model_selection import train_test_split

#import the defined functions 
#from INPUT_MAKER import * 
#from make_unet import *


SCENARIO=['HIST' , 'RCP85']
var_list = ['zg850','zg700','zg500',
   'ta850','ta700','ta500',
   'hus850','hus700','hus500',
   'ua850','ua700','ua500',
   'va850','va700','va500',
   'uas','vas','psl'] 

inputs_2D=[]
inputs_1D=[]
target_times=[]
targets=[]

i2D , i1D  = input_maker(fileGC,
                pathGC, 
                stand = True,  # standardization   
                seas = True,   # put a cos,sin vector to control the season, format : bool
                means = True,   # add the mean of the variables raw or stdz, format : r,s,n
                stds = True)

inputs_1D.append(i1D)
inputs_2D.append(i2D)


filepath_target= '/' # path to target file
target_dataset = xr.open_dataset(filepath_target)
targets.append(target_tas.values-273.16)
target_times.append(target_dataset.time.values)

Creating 2D input X:
 -------------------
Number of variables: 7
Dataset shape: Frozen({'x': 90, 'y': 25, 'time': 1452})
INPUT_2D shape: (1452, 90, 25, 7)
Creating 1D input Z:
 -------------------
SpatialMean/std shape: (1452, 1, 1, 7)
Cos/sin encoding shape: (1452, 1, 1, 1)
INPUT_1D shape: (1452, 1, 1, 16)
