In [86]:
import numpy as np
import xarray as xr
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.layers import *
import tensorflow.keras.backend as K
from src.score import *
import re

import generate_data as gd

In [2]:
class PeriodicPadding2D(tf.keras.layers.Layer):
    def __init__(self, pad_width, **kwargs):
        super().__init__(**kwargs)
        self.pad_width = pad_width

    def call(self, inputs, **kwargs):
        if self.pad_width == 0:
            return inputs
        inputs_padded = tf.concat(
            [inputs[:, :, -self.pad_width:, :], inputs, inputs[:, :, :self.pad_width, :]], axis=2)
        # Zero padding in the lat direction
        inputs_padded = tf.pad(inputs_padded, [[0, 0], [self.pad_width, self.pad_width], [0, 0], [0, 0]])
        return inputs_padded

    def get_config(self):
        config = super().get_config()
        config.update({'pad_width': self.pad_width})
        return config


class PeriodicConv2D(tf.keras.layers.Layer):
    def __init__(self, filters,
                 kernel_size,
                 conv_kwargs={},
                 **kwargs, ):
        super().__init__(**kwargs)
        self.filters = filters
        self.kernel_size = kernel_size
        self.conv_kwargs = conv_kwargs
        if type(kernel_size) is not int:
            assert kernel_size[0] == kernel_size[1], 'PeriodicConv2D only works for square kernels'
            kernel_size = kernel_size[0]
        pad_width = (kernel_size - 1) // 2
        self.padding = PeriodicPadding2D(pad_width)
        self.conv = Conv2D(
            filters, kernel_size, padding='valid', **conv_kwargs
        )

    def call(self, inputs):
        return self.conv(self.padding(inputs))

    def get_config(self):
        config = super().get_config()
        config.update({'filters': self.filters, 'kernel_size': self.kernel_size, 'conv_kwargs': self.conv_kwargs})
        return config

In [3]:
def build_cnn(filters, kernels, input_shape, dr=0):
    """Fully convolutional network"""
    x = input = Input(shape=input_shape)
    for f, k in zip(filters[:-1], kernels[:-1]):
        x = PeriodicConv2D(f, k)(x)
        x = LeakyReLU()(x)
    output = PeriodicConv2D(filters[-1], kernels[-1])(x)
    return keras.models.Model(input, output)

def create_predictions(model, dg):
    """Create non-iterative predictions"""
    preds_un = xr.DataArray(
        model.predict(dg),
        dims=['time', 'lat', 'lon', 'level'],
        coords={'time': dg.valid_time, 'lat': dg.data.lat, 'lon': dg.data.lon,
                'level': dg.data.isel(level=dg.output_idxs).level,
                'level_names': dg.data.isel(level=dg.output_idxs).level_names
               },
    )
    # Unnormalize
    preds = (preds_un * dg.std.isel(level=dg.output_idxs).values +
             dg.mean.isel(level=dg.output_idxs).values)
    unique_vars = list(set([l.split('_')[0] for l in preds.level_names.values])); unique_vars

    das = []
    for v in unique_vars:
        idxs = [i for i, vv in enumerate(preds.level_names.values) if vv.split('_')[0] in v]
        #print(v, idxs)
        da = preds.isel(level=idxs).squeeze().drop('level_names')
        if not 'level' in da.dims: da.drop('level')
        das.append({v: da})
    return preds_un, xr.merge(das, compat = 'override').drop('level')

In [4]:
def predictions(var_name, valid):
    dg_train, dg_valid, dg_test = gd.create_data(var_name)

    if var_name == 'specific_humidity':
        cnn = build_cnn([64, 64, 64, 64, 2], [5, 5, 5, 5, 5], (32, 64, 4))
    elif var_name == 'pot_vort':
        cnn = build_cnn([64, 64, 64, 64, 2], [5, 5, 5, 5, 5], (32, 64, 4))
    elif var_name == 'const':
        cnn = build_cnn([64, 64, 64, 64, 2], [5, 5, 5, 5, 5], (32, 64, 5))
    elif var_name == 'orig':
        cnn = build_cnn([64, 64, 64, 64, 2], [5, 5, 5, 5, 5], (32, 64, 2))
    else:
        cnn = build_cnn([64, 64, 64, 64, 2], [5, 5, 5, 5, 5], (32, 64, 3))

    cnn.compile(keras.optimizers.Adam(1e-4), 'mse')

    cnn.load_weights('/rds/general/user/mc4117/home/WeatherBench/saved_models/72_gpu_annual' + str(var_name) + '.h5')

    number_of_forecasts = 1

    pred_ensemble=np.ndarray(shape=(2, 17448, 32, 64, number_of_forecasts),dtype=np.float32)
    print(pred_ensemble.shape)
    forecast_counter=np.zeros(number_of_forecasts,dtype=int)

    for j in range(number_of_forecasts):
        print(j)
        preds, output = create_predictions(cnn, dg_test)
        pred2 = np.asarray(output.to_array(), dtype=np.float32).squeeze()
        pred_ensemble[:,:,:,:,j]=pred2
        forecast_counter[j]=j+1
        filename2 = '/rds/general/user/mc4117/ephemeral/saved_pred/72_gpu_annual' + str(var_name)
        np.save(filename2 + '.npy', pred_ensemble)
        


    return preds, compute_weighted_rmse(output, valid).compute()

In [5]:
DATADIR = '/rds/general/user/mc4117/home/WeatherBench/data/'

z500_valid = load_test_data(f'{DATADIR}geopotential_500', 'z')
t850_valid = load_test_data(f'{DATADIR}temperature_850', 't')
valid = xr.merge([z500_valid, t850_valid])


In [6]:
preds_orig, rmse = predictions('orig', valid)
rmse

Loading data into RAM
Loading data into RAM
Loading data into RAM
(2, 17448, 32, 64, 1)
0


In [7]:
preds_sh, rmse_sh = predictions('specific_humidity', valid)
rmse_sh

Loading data into RAM
Loading data into RAM
Loading data into RAM
(2, 17448, 32, 64, 1)
0


In [8]:
preds_pv, rmse_pv = predictions('pot_vort', valid)
rmse_pv

Loading data into RAM
Loading data into RAM
Loading data into RAM
(2, 17448, 32, 64, 1)
0


In [9]:
preds_const, rmse_const = predictions('const', valid)
rmse_const

Loading data into RAM
Loading data into RAM
Loading data into RAM
(2, 17448, 32, 64, 1)
0


In [10]:
preds_2mt, rmse_2mt = predictions('2m temp', valid)
rmse_2mt

Loading data into RAM
Loading data into RAM
Loading data into RAM
(2, 17448, 32, 64, 1)
0


In [11]:
preds_sr, rmse_sr = predictions('solar rad', valid)
rmse_sr

Loading data into RAM
Loading data into RAM
Loading data into RAM
(2, 17448, 32, 64, 1)
0


In [12]:
stack_out = np.stack([preds_orig, preds_sh, preds_pv, preds_const, 
                      preds_2mt,  preds_sr])  

In [13]:
dg_train, dg_valid, dg_test = gd.create_data('orig')
X1, y1 = dg_test[0]

for i in range(1, len(dg_test)):
    X2, y2 = dg_test[i]
    y1 = np.concatenate((y1, y2))

Loading data into RAM
Loading data into RAM
Loading data into RAM


In [14]:
stack_out.shape

(6, 17448, 32, 64, 2)

In [19]:
stack_test_list = []

for i in range(6):
    stack_test_list.append(stack_out[i][...,0])

In [21]:
print(stack_test_list[0].shape)
print(y1[...,0].shape)

(17448, 32, 64)
(17448, 32, 64)


## Linear Regression

In [22]:
_, nlat, nlon = stack_test_list[0].shape; nlat, nlon

(32, 64)

In [57]:
stacked_stack_reshape = np.stack(stack_test_list).transpose((1, 0, 2, 3))
stacked_stack_reshape.shape

(17448, 6, 32, 64)

In [58]:
y1[...,0].shape

(17448, 32, 64)

In [59]:
stacked_stack_reshape[:, :, 0, 0].shape

(17448, 6)

In [65]:
from sklearn.linear_model import LinearRegression

linear_regressor_array = []

for i in range(32):
    linear_regressor_list = []
    for j in range(64):
        lr = LinearRegression()
        lr.fit(stacked_stack_reshape[:, :, i, j], y1[:,i,j, 0])
        linear_regressor_list.append(lr)
    linear_regressor_array.append(linear_regressor_list)

In [75]:
reg_array = np.zeros((32, 64, 17448))

for i in range(32):
    for j in range(64):
        reg_array[i][j] = linear_regressor_array[i][j].predict(stacked_stack_reshape[:, :, i, j])

In [76]:
fc = xr.DataArray(
            reg_array * dg_test.std.isel(level=[0]).values+dg_test.mean.isel(level=[0]).values,
            dims=['lat', 'lon', 'time'],
            coords={'lat': dg_test.data.lat, 'lon': dg_test.data.lon, 'time':dg_test.data.time[72:]
                },
    )

In [80]:

real_unnorm =y1* dg_test.std.isel(level=[0]).values+dg_test.mean.isel(level=[0]).values

real_ds = xr.Dataset({
    'z': xr.DataArray(
        real_unnorm[..., 0],
        dims=['time', 'lat', 'lon'],
        coords={'time':dg_test.data.time[72:], 'lat': dg_test.data.lat, 'lon': dg_test.data.lon,
                },
    ),
})

In [81]:
compute_weighted_rmse(fc, real_ds).compute()

In [95]:
# Testing linear regression

def testing_predictions(var_name):
    dg_test2 = gd.create_data_second_test(var_name)

    if var_name == 'specific_humidity':
        cnn = build_cnn([64, 64, 64, 64, 2], [5, 5, 5, 5, 5], (32, 64, 4))
    elif var_name == 'pot_vort':
        cnn = build_cnn([64, 64, 64, 64, 2], [5, 5, 5, 5, 5], (32, 64, 4))
    elif var_name == 'const':
        cnn = build_cnn([64, 64, 64, 64, 2], [5, 5, 5, 5, 5], (32, 64, 5))
    elif var_name == 'orig':
        cnn = build_cnn([64, 64, 64, 64, 2], [5, 5, 5, 5, 5], (32, 64, 2))
    else:
        cnn = build_cnn([64, 64, 64, 64, 2], [5, 5, 5, 5, 5], (32, 64, 3))

    cnn.load_weights('/rds/general/user/mc4117/home/WeatherBench/saved_models/72_gpu_annual' + str(var_name) + '.h5')

    preds, output = create_predictions(cnn, dg_test2)     

    return preds, output

In [96]:
preds_test, output_test = testing_predictions('orig')
preds_test_sh, output_test_sh = testing_predictions('specific_humidity')
preds_test_pv, output_test_pv = testing_predictions('pot_vort')
preds_test_2mt, output_test_2mt = testing_predictions('2m temp')
preds_test_const, output_test_const = testing_predictions('const')
preds_test_sr, output_test_sr = testing_predictions('solar rad')


Loading data into RAM
Loading data into RAM
Loading data into RAM
Loading data into RAM
Loading data into RAM
Loading data into RAM
Loading data into RAM
Loading data into RAM
Loading data into RAM
Loading data into RAM
Loading data into RAM
Loading data into RAM


In [97]:
stack_out_test = np.stack([preds_test, preds_test_sh, preds_test_pv, preds_test_const, 
                      preds_test_2mt,  preds_test_sr])  

In [98]:
dg_test2 = gd.create_data_second_test('orig')
X1, y1_2 = dg_test2[0]

for i in range(1, len(dg_test2)):
    X2, y2 = dg_test2[i]
    y1_2 = np.concatenate((y1_2, y2))

Loading data into RAM
Loading data into RAM


In [99]:
stack_test_list2 = []

for i in range(6):
    stack_test_list2.append(stack_out_test[i][..., 0])

In [100]:
stacked_stack_reshape = np.stack(stack_test_list2).transpose((1, 0, 2, 3))
stacked_stack_reshape.shape

(17448, 6, 32, 64)

In [101]:
reg_array_2 = np.zeros((32, 64, 17448))

for i in range(32):
    for j in range(64):
        reg_array_2[i][j] = linear_regressor_array[i][j].predict(stacked_stack_reshape[:, :, i, j])

In [102]:
fc_valid = xr.DataArray(
            reg_array * dg_test2.std.isel(level=[0]).values+dg_test2.mean.isel(level=[0]).values,
            dims=['lat', 'lon', 'time'],
            coords={'lat': dg_test2.data.lat, 'lon': dg_test2.data.lon, 'time':dg_test2.data.time[72:]
                },
    )

In [103]:
real_unnorm_valid =y1_2* dg_test2.std.isel(level=[0]).values+dg_test2.mean.isel(level=[0]).values

real_ds_valid = xr.Dataset({
    'z': xr.DataArray(
        real_unnorm_valid[..., 0],
        dims=['time', 'lat', 'lon'],
        coords={'time':dg_test2.data.time[72:], 'lat': dg_test2.data.lat, 'lon': dg_test2.data.lon,
                },
    ),
})

In [104]:
compute_weighted_rmse(fc_valid, real_ds_valid).compute()