# Train a CNN

Add more data but actually get more error

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# Depending on your combination of package versions, this can raise a lot of TF warnings... 
import numpy as np
import matplotlib.pyplot as plt
import xarray as xr
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.layers import *
import tensorflow.keras.backend as K
import seaborn as sns
import pickle
from src.score import *
from collections import OrderedDict
import re

In [3]:
tf.__version__

'2.1.0'

In [4]:
sns.set_style('darkgrid')
sns.set_context('notebook')

In [5]:
DATADIR = '/rds/general/user/mc4117/home/WeatherBench/data/'

## Create data generator

First up, we want to write our own Keras data generator. The key advantage to just feeding in numpy arrays is that we don't have to load the data twice because our intputs and outputs are the same data just offset by the lead time. Since the dataset is quite large and we might run out of CPU RAM this is important.

In [6]:
class DataGenerator(keras.utils.Sequence):
    def __init__(self, ds, var_dict, lead_time, batch_size=32, shuffle=True, load=True, 
                 mean=None, std=None, output_vars=None):
        """
        Data generator for WeatherBench data.
        Template from https://stanford.edu/~shervine/blog/keras-how-to-generate-data-on-the-fly
        Args:
            ds: Dataset containing all variables
            var_dict: Dictionary of the form {'var': level}. Use None for level if data is of single level
            lead_time: Lead time in hours
            batch_size: Batch size
            shuffle: bool. If True, data is shuffled.
            load: bool. If True, datadet is loaded into RAM.
            mean: If None, compute mean from data.
            std: If None, compute standard deviation from data.
        """

        self.ds = ds
        self.var_dict = var_dict
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.lead_time = lead_time

        data = []
        level_names = []
        generic_level = xr.DataArray([1], coords={'level': [1]}, dims=['level'])
        for long_var, params in var_dict.items():
            if long_var == 'constants': 
                for var in params:
                    data.append(ds[var].expand_dims(
                        {'level': generic_level, 'time': ds.time}, (1, 0)
                    ))
                    level_names.append(var)
            else:
                var, levels = params
                try:
                    data.append(ds[var].sel(level=levels))
                    level_names += [f'{var}_{level}' for level in levels]
                except ValueError:
                    data.append(ds[var].expand_dims({'level': generic_level}, 1))
                    level_names.append(var)

        self.data = xr.concat(data, 'level').transpose('time', 'lat', 'lon', 'level')
        self.data['level_names'] = xr.DataArray(
            level_names, dims=['level'], coords={'level': self.data.level})
        if output_vars is None:
            self.output_idxs = range(len(dg_valid.data.level))
        else:
            self.output_idxs = [i for i, l in enumerate(self.data.level_names.values) 
                                if any([bool(re.match(o, l)) for o in output_vars])]
        
        # Normalize
        self.mean = self.data.mean(('time', 'lat', 'lon')).compute() if mean is None else mean
#         self.std = self.data.std('time').mean(('lat', 'lon')).compute() if std is None else std
        self.std = self.data.std(('time', 'lat', 'lon')).compute() if std is None else std
        self.data = (self.data - self.mean) / self.std
        
        self.n_samples = self.data.isel(time=slice(0, -lead_time)).shape[0]
        self.init_time = self.data.isel(time=slice(None, -lead_time)).time
        self.valid_time = self.data.isel(time=slice(lead_time, None)).time

        self.on_epoch_end()

        # For some weird reason calling .load() earlier messes up the mean and std computations
        if load: print('Loading data into RAM'); self.data.load()

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.ceil(self.n_samples / self.batch_size))

    def __getitem__(self, i):
        'Generate one batch of data'
        idxs = self.idxs[i * self.batch_size:(i + 1) * self.batch_size]
        X = self.data.isel(time=idxs).values
        y = self.data.isel(time=idxs + self.lead_time, level=self.output_idxs).values
        return X, y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.idxs = np.arange(self.n_samples)
        if self.shuffle == True:
            np.random.shuffle(self.idxs)

var_dict = {
    'geopotential': ('z', [500, 850]),
    'temperature': ('t', [500, 850]),
    'specific_humidity': ('q', [850]),
    '2m_temperature': ('t2m', None),
    'potential_vorticity': ('pv', [50, 100]),
    'constants': ['lsm', 'orography']
}

ds = [xr.open_mfdataset(f'{DATADIR}/{var}/*.nc', combine='by_coords') for var in var_dict.keys()]

ds_whole = xr.merge(ds)

ds_train = ds_whole.sel(time=slice('2014', '2015'))
ds_valid = ds_whole.sel(time=slice('2016', '2016'))
ds_test = ds_whole.sel(time=slice('2017', '2018'))

bs=32
lead_time=72
output_vars = ['z_500', 't_850']

# Create a training and validation data generator. Use the train mean and std for validation as well.
dg_train = DataGenerator(ds_train, var_dict, lead_time, batch_size=bs, load=True, 
                         output_vars=output_vars)
dg_valid = DataGenerator(ds_valid, var_dict, lead_time, batch_size=bs, mean=dg_train.mean, std=dg_train.std, 
                         shuffle=False, output_vars=output_vars)

dg_test = DataGenerator(ds_test, var_dict, lead_time, batch_size=bs, mean=dg_train.mean, std=dg_train.std, 
                         shuffle=False, output_vars=output_vars)

X, y = dg_train[0]; 

print(X.shape)
print(y.shape)

Loading data into RAM
Loading data into RAM
Loading data into RAM
(32, 32, 64, 10)
(32, 32, 64, 2)


## Load models

In [7]:
class PeriodicPadding2D(tf.keras.layers.Layer):
    def __init__(self, pad_width, **kwargs):
        super().__init__(**kwargs)
        self.pad_width = pad_width

    def call(self, inputs, **kwargs):
        if self.pad_width == 0:
            return inputs
        inputs_padded = tf.concat(
            [inputs[:, :, -self.pad_width:, :], inputs, inputs[:, :, :self.pad_width, :]], axis=2)
        # Zero padding in the lat direction
        inputs_padded = tf.pad(inputs_padded, [[0, 0], [self.pad_width, self.pad_width], [0, 0], [0, 0]])
        return inputs_padded

    def get_config(self):
        config = super().get_config()
        config.update({'pad_width': self.pad_width})
        return config


class PeriodicConv2D(tf.keras.layers.Layer):
    def __init__(self, filters,
                 kernel_size,
                 conv_kwargs={},
                 **kwargs, ):
        super().__init__(**kwargs)
        self.filters = filters
        self.kernel_size = kernel_size
        self.conv_kwargs = conv_kwargs
        if type(kernel_size) is not int:
            assert kernel_size[0] == kernel_size[1], 'PeriodicConv2D only works for square kernels'
            kernel_size = kernel_size[0]
        pad_width = (kernel_size - 1) // 2
        self.padding = PeriodicPadding2D(pad_width)
        self.conv = Conv2D(
            filters, kernel_size, padding='valid', **conv_kwargs
        )

    def call(self, inputs):
        return self.conv(self.padding(inputs))

    def get_config(self):
        config = super().get_config()
        config.update({'filters': self.filters, 'kernel_size': self.kernel_size, 'conv_kwargs': self.conv_kwargs})
        return config
    
def build_cnn(filters, kernels, input_shape, dr=0):
    """Fully convolutional network"""
    x = input = Input(shape=input_shape)
    for f, k in zip(filters[:-1], kernels[:-1]):
        x = PeriodicConv2D(f, k)(x)
        x = LeakyReLU()(x)
        x = BatchNormalization()(x)
        if dr > 0: x = Dropout(dr)(x, training = True)
    output = PeriodicConv2D(filters[-1], kernels[-1])(x)
    return keras.models.Model(input, output)

def create_predictions(model, dg):
    """Create non-iterative predictions"""
    preds = xr.DataArray(
        model.predict_generator(dg),
        dims=['time', 'lat', 'lon', 'level'],
        coords={'time': dg.valid_time, 'lat': dg.data.lat, 'lon': dg.data.lon, 
                'level': dg.data.isel(level=dg.output_idxs).level,
                'level_names': dg.data.isel(level=dg.output_idxs).level_names
               },
    )
    # Unnormalize
    preds = (preds * dg.std.isel(level=dg.output_idxs).values + 
             dg.mean.isel(level=dg.output_idxs).values)
    unique_vars = list(set([l.split('_')[0] for l in preds.level_names.values])); unique_vars
    
    das = []
    for v in unique_vars:
        idxs = [i for i, vv in enumerate(preds.level_names.values) if vv.split('_')[0] in v]
        #print(v, idxs)
        da = preds.isel(level=idxs).squeeze().drop('level_names')
        if not 'level' in da.dims: da.drop('level')
        das.append({v: da})
    return xr.merge(das, compat = 'override').drop('level')

In [8]:
cnn_1 = build_cnn([64, 64, 64, 64, 2], [5, 5, 5, 5, 5], (32, 64, 10), dr = 0.1)
cnn_1.compile(keras.optimizers.Adam(1e-4), 'mse')
cnn_1.load_weights('/rds/general/user/mc4117/ephemeral/saved_models/train_72_multi_data_gpu_0.h5')

cnn_2 = build_cnn([64, 64, 64, 64, 2], [5, 5, 5, 5, 5], (32, 64, 10), dr = 0.1)
cnn_2.compile(keras.optimizers.Adam(1e-4), 'mse')
cnn_2.load_weights('/rds/general/user/mc4117/ephemeral/saved_models/train_72_multi_data_gpu_1.h5')

cnn_3 = build_cnn([64, 64, 64, 64, 2], [5, 5, 5, 5, 5], (32, 64, 10), dr = 0.1)
cnn_3.compile(keras.optimizers.Adam(1e-4), 'mse')
cnn_3.load_weights('/rds/general/user/mc4117/ephemeral/saved_models/train_72_multi_data_gpu_2.h5')

cnn_4 = build_cnn([64, 64, 64, 64, 2], [5, 5, 5, 5, 5], (32, 64, 10), dr = 0.1)
cnn_4.compile(keras.optimizers.Adam(1e-4), 'mse')
cnn_4.load_weights('/rds/general/user/mc4117/ephemeral/saved_models/train_72_multi_data_gpu_3.h5')

cnn_5 = build_cnn([64, 64, 64, 64, 2], [5, 5, 5, 5, 5], (32, 64, 10), dr = 0.1)
cnn_5.compile(keras.optimizers.Adam(1e-4), 'mse')
cnn_5.load_weights('/rds/general/user/mc4117/ephemeral/saved_models/train_72_multi_data_gpu_4.h5')

cnn_6 = build_cnn([64, 64, 64, 64, 2], [5, 5, 5, 5, 5], (32, 64, 10), dr = 0.1)
cnn_6.compile(keras.optimizers.Adam(1e-4), 'mse')
cnn_6.load_weights('/rds/general/user/mc4117/ephemeral/saved_models/train_72_multi_data_gpu_5.h5')

cnn_7 = build_cnn([64, 64, 64, 64, 2], [5, 5, 5, 5, 5], (32, 64, 10), dr = 0.1)
cnn_7.compile(keras.optimizers.Adam(1e-4), 'mse')
cnn_7.load_weights('/rds/general/user/mc4117/ephemeral/saved_models/train_72_multi_data_gpu_6.h5')

In [9]:
model_list = [cnn_1, cnn_2, cnn_3, cnn_4, cnn_5, cnn_6, cnn_7]

for i in range(len(model_list)):
    model = model_list[i]
    for layer in model.layers:
        layer.trainable = False
        layer._name = 'ensemble_' + str(i+1) + '_' + layer.name

In [10]:
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import concatenate
from tensorflow.keras.utils import plot_model
from tensorflow.keras.models import Model

# define multi-headed input
ensemble_visible = [model.input for model in model_list]
# concatenate merge output from each model
ensemble_outputs = [model.output for model in model_list]
merge = concatenate(ensemble_outputs)
hidden = Dense(10, activation='relu')(merge)
output = Dense(2)(hidden)
ensemble_model = Model(inputs=ensemble_visible, outputs=output)
# plot graph of ensemble
plot_model(ensemble_model, show_shapes=True)
# compile
#model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
ensemble_model.compile(keras.optimizers.Adam(1e-4), 'mse')

## Fit model

In [11]:
# feed the model the training data again

X1, y1 = dg_train[0]

for i in range(1, len(dg_train)):
    X2, y2 = dg_train[i]
    X1 = np.concatenate((X1, X2))
    y1 = np.concatenate((y1, y2))  

In [12]:
output.shape

TensorShape([None, 32, 64, 2])

In [13]:
y1.shape

(17448, 32, 64, 2)

In [14]:
X = [X1 for _ in range(len(model_list))]
# fit model
ensemble_model.fit(X, y1, shuffle = True, epochs=20, validation_split = 0.2, verbose=1, callbacks=[tf.keras.callbacks.EarlyStopping(
                        monitor='val_loss',
                        min_delta=0,
                        patience=1,
                        verbose=1, 
                        mode='auto'
                    )])

Train on 13958 samples, validate on 3490 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x2ab9b6932cf8>

## Create predictions

In [15]:
def create_predictions(model, dg):
    """Create non-iterative predictions"""
    preds = xr.DataArray(
        model.predict_generator(dg),
        dims=['time', 'lat', 'lon', 'level'],
        coords={'time': dg.valid_time, 'lat': dg.data.lat, 'lon': dg.data.lon, 
                'level': dg.data.isel(level=dg.output_idxs).level,
                'level_names': dg.data.isel(level=dg.output_idxs).level_names
               },
    )
    # Unnormalize
    preds = (preds * dg.std.isel(level=dg.output_idxs).values + 
             dg.mean.isel(level=dg.output_idxs).values)
    unique_vars = list(set([l.split('_')[0] for l in preds.level_names.values])); unique_vars
    
    das = []
    for v in unique_vars:
        idxs = [i for i, vv in enumerate(preds.level_names.values) if vv.split('_')[0] in v]
        print(v, idxs)
        da = preds.isel(level=idxs).squeeze().drop('level_names')
        if not 'level' in da.dims: da.drop('level')
        das.append({v: da})
    return xr.merge(das, compat = 'override').drop('level')

In [16]:
ensemble_model.save_weights('/rds/general/user/mc4117/ephemeral/saved_models/ensemble_model.h5')

In [17]:
fc_1 = create_predictions(cnn_1, dg_test)
fc_2 = create_predictions(cnn_2, dg_test)
fc_3 = create_predictions(cnn_3, dg_test)
fc_4 = create_predictions(cnn_4, dg_test)
fc_5 = create_predictions(cnn_5, dg_test)
fc_6 = create_predictions(cnn_6, dg_test)
fc_7 = create_predictions(cnn_7, dg_test)

Instructions for updating:
Please use Model.predict, which supports generators.
z [0]
t [1]
z [0]
t [1]
z [0]
t [1]
z [0]
t [1]
z [0]
t [1]
z [0]
t [1]
z [0]
t [1]


In [18]:
# feed the model the training data again

X1_test, y1_test = dg_test[0]

for i in range(1, len(dg_test)):
    X2_test, y2_test = dg_test[i]
    X1_test = np.concatenate((X1_test, X2_test))
    y1_test = np.concatenate((y1_test, y2_test))  

In [19]:
X_test = [X1_test for _ in range(len(model_list))]
# make prediction
fc_ensemble = ensemble_model.predict(X_test)

#create_predictions(ensemble_model, X_test)   

In [20]:
preds = xr.DataArray(
        fc_ensemble,
        dims=['time', 'lat', 'lon', 'level'],
        coords={'time': dg_test.valid_time, 'lat': dg_test.data.lat, 'lon': dg_test.data.lon, 
                'level': dg_test.data.isel(level=dg_test.output_idxs).level,
                'level_names': dg_test.data.isel(level=dg_test.output_idxs).level_names
               },
    )
# Unnormalize
preds = (preds * dg_test.std.isel(level=dg_test.output_idxs).values + 
        dg_test.mean.isel(level=dg_test.output_idxs).values)
unique_vars = list(set([l.split('_')[0] for l in preds.level_names.values])); unique_vars
    
das = []
for v in unique_vars:
    idxs = [i for i, vv in enumerate(preds.level_names.values) if vv.split('_')[0] in v]
    print(v, idxs)
    da = preds.isel(level=idxs).squeeze().drop('level_names')
    if not 'level' in da.dims: da.drop('level')
    das.append({v: da})
fc_ensemble_unnorm = xr.merge(das, compat = 'override').drop('level')

z [0]
t [1]


In [21]:
fc_ensemble_unnorm

In [22]:
real_unnorm =y1_test* dg_test.std.isel(level=[0,3]).values+dg_test.mean.isel(level=[0,3]).values

real_ds = xr.Dataset({
    'z': xr.DataArray(
        real_unnorm[..., 0],
        dims=['time', 'lat', 'lon'],
        coords={'time':dg_test.data.time[72:], 'lat': dg_test.data.lat, 'lon': dg_test.data.lon,
                },
    ),
    't': xr.DataArray(
        real_unnorm[..., 1],
        dims=['time', 'lat', 'lon'],
        coords={'time':dg_test.data.time[72:], 'lat': dg_test.data.lat, 'lon': dg_test.data.lon,
                },
    )
})

In [23]:
compute_weighted_rmse(fc_ensemble_unnorm, real_ds).compute()

In [24]:
compute_weighted_rmse(fc_1, real_ds).compute()

In [25]:
fc_ensemble_train = ensemble_model.predict(X)

In [26]:

preds = xr.DataArray(
        fc_ensemble_train,
        dims=['time', 'lat', 'lon', 'level'],
        coords={'time': dg_train.valid_time, 'lat': dg_train.data.lat, 'lon': dg_train.data.lon, 
                'level': dg_train.data.isel(level=dg_train.output_idxs).level,
                'level_names': dg_train.data.isel(level=dg_train.output_idxs).level_names
               },
    )
# Unnormalize
preds = (preds * dg_train.std.isel(level=dg_train.output_idxs).values + 
        dg_train.mean.isel(level=dg_train.output_idxs).values)
unique_vars = list(set([l.split('_')[0] for l in preds.level_names.values])); unique_vars
    
das = []
for v in unique_vars:
    idxs = [i for i, vv in enumerate(preds.level_names.values) if vv.split('_')[0] in v]
    print(v, idxs)
    da = preds.isel(level=idxs).squeeze().drop('level_names')
    if not 'level' in da.dims: da.drop('level')
    das.append({v: da})
fc_ensemble_train_unnorm = xr.merge(das, compat = 'override').drop('level')

z [0]
t [1]


In [27]:
real_unnorm_train =y1* dg_train.std.isel(level=[0,3]).values+dg_train.mean.isel(level=[0,3]).values

real_ds_train = xr.Dataset({
    'z': xr.DataArray(
        real_unnorm_train[..., 0],
        dims=['time', 'lat', 'lon'],
        coords={'time':dg_train.data.time[72:], 'lat': dg_train.data.lat, 'lon': dg_train.data.lon,
                },
    ),
    't': xr.DataArray(
        real_unnorm_train[..., 1],
        dims=['time', 'lat', 'lon'],
        coords={'time':dg_train.data.time[72:], 'lat': dg_train.data.lat, 'lon': dg_train.data.lon,
                },
    )
})

compute_weighted_rmse(fc_ensemble_train_unnorm, real_ds_train)

In [28]:
fc_average = (fc_1 + fc_2 + fc_3 + fc_4 + fc_5 + fc_6 + fc_7)/7
compute_weighted_rmse(fc_average, real_ds).compute()