# Train a CNN

Add more data but actually get more error

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# Depending on your combination of package versions, this can raise a lot of TF warnings... 
import numpy as np
import matplotlib.pyplot as plt
import xarray as xr
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.layers import *
import tensorflow.keras.backend as K
import seaborn as sns
import pickle
from src.score import *
from collections import OrderedDict
import re

In [3]:
tf.__version__

'2.1.0'

In [4]:
sns.set_style('darkgrid')
sns.set_context('notebook')

In [5]:
DATADIR = '/rds/general/user/mc4117/home/WeatherBench/data/'

## Create data generator

First up, we want to write our own Keras data generator. The key advantage to just feeding in numpy arrays is that we don't have to load the data twice because our intputs and outputs are the same data just offset by the lead time. Since the dataset is quite large and we might run out of CPU RAM this is important.

In [6]:
class DataGenerator(keras.utils.Sequence):
    def __init__(self, ds, var_dict, lead_time, batch_size=32, shuffle=True, load=True, 
                 mean=None, std=None, output_vars=None):
        """
        Data generator for WeatherBench data.
        Template from https://stanford.edu/~shervine/blog/keras-how-to-generate-data-on-the-fly
        Args:
            ds: Dataset containing all variables
            var_dict: Dictionary of the form {'var': level}. Use None for level if data is of single level
            lead_time: Lead time in hours
            batch_size: Batch size
            shuffle: bool. If True, data is shuffled.
            load: bool. If True, datadet is loaded into RAM.
            mean: If None, compute mean from data.
            std: If None, compute standard deviation from data.
        """

        self.ds = ds
        self.var_dict = var_dict
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.lead_time = lead_time

        data = []
        level_names = []
        generic_level = xr.DataArray([1], coords={'level': [1]}, dims=['level'])
        for long_var, params in var_dict.items():
            if long_var == 'constants': 
                for var in params:
                    data.append(ds[var].expand_dims(
                        {'level': generic_level, 'time': ds.time}, (1, 0)
                    ))
                    level_names.append(var)
            else:
                var, levels = params
                try:
                    data.append(ds[var].sel(level=levels))
                    level_names += [f'{var}_{level}' for level in levels]
                except ValueError:
                    data.append(ds[var].expand_dims({'level': generic_level}, 1))
                    level_names.append(var)

        self.data = xr.concat(data, 'level').transpose('time', 'lat', 'lon', 'level')
        self.data['level_names'] = xr.DataArray(
            level_names, dims=['level'], coords={'level': self.data.level})
        if output_vars is None:
            self.output_idxs = range(len(dg_valid.data.level))
        else:
            self.output_idxs = [i for i, l in enumerate(self.data.level_names.values) 
                                if any([bool(re.match(o, l)) for o in output_vars])]
        
        # Normalize
        self.mean = self.data.mean(('time', 'lat', 'lon')).compute() if mean is None else mean
#         self.std = self.data.std('time').mean(('lat', 'lon')).compute() if std is None else std
        self.std = self.data.std(('time', 'lat', 'lon')).compute() if std is None else std
        self.data = (self.data - self.mean) / self.std
        
        self.n_samples = self.data.isel(time=slice(0, -lead_time)).shape[0]
        self.init_time = self.data.isel(time=slice(None, -lead_time)).time
        self.valid_time = self.data.isel(time=slice(lead_time, None)).time

        self.on_epoch_end()

        # For some weird reason calling .load() earlier messes up the mean and std computations
        if load: print('Loading data into RAM'); self.data.load()

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.ceil(self.n_samples / self.batch_size))

    def __getitem__(self, i):
        'Generate one batch of data'
        idxs = self.idxs[i * self.batch_size:(i + 1) * self.batch_size]
        X = self.data.isel(time=idxs).values
        y = self.data.isel(time=idxs + self.lead_time, level=self.output_idxs).values
        return X, y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.idxs = np.arange(self.n_samples)
        if self.shuffle == True:
            np.random.shuffle(self.idxs)

var_dict = {
    'geopotential': ('z', [500, 850]),
    'temperature': ('t', [500, 850]),
    'specific_humidity': ('q', [850]),
    '2m_temperature': ('t2m', None),
    'potential_vorticity': ('pv', [50, 100]),
    'constants': ['lsm', 'orography']
}

ds = [xr.open_mfdataset(f'{DATADIR}/{var}/*.nc', combine='by_coords') for var in var_dict.keys()]

ds_whole = xr.merge(ds)

ds_train = ds_whole.sel(time=slice('2014', '2015'))
ds_valid = ds_whole.sel(time=slice('2016', '2016'))
ds_test = ds_whole.sel(time=slice('2017', '2018'))

bs=32
lead_time=72
output_vars = ['z_500', 't_850']

# Create a training and validation data generator. Use the train mean and std for validation as well.
dg_train = DataGenerator(ds_train, var_dict, lead_time, batch_size=bs, load=True, 
                         output_vars=output_vars)
dg_valid = DataGenerator(ds_valid, var_dict, lead_time, batch_size=bs, mean=dg_train.mean, std=dg_train.std, 
                         shuffle=False, output_vars=output_vars)

dg_test = DataGenerator(ds_test, var_dict, lead_time, batch_size=bs, mean=dg_train.mean, std=dg_train.std, 
                         shuffle=False, output_vars=output_vars)

X, y = dg_train[0]; 

print(X.shape)
print(y.shape)

Loading data into RAM
Loading data into RAM
Loading data into RAM
(32, 32, 64, 10)
(32, 32, 64, 2)


## Load models

In [8]:
class PeriodicPadding2D(tf.keras.layers.Layer):
    def __init__(self, pad_width, **kwargs):
        super().__init__(**kwargs)
        self.pad_width = pad_width

    def call(self, inputs, **kwargs):
        if self.pad_width == 0:
            return inputs
        inputs_padded = tf.concat(
            [inputs[:, :, -self.pad_width:, :], inputs, inputs[:, :, :self.pad_width, :]], axis=2)
        # Zero padding in the lat direction
        inputs_padded = tf.pad(inputs_padded, [[0, 0], [self.pad_width, self.pad_width], [0, 0], [0, 0]])
        return inputs_padded

    def get_config(self):
        config = super().get_config()
        config.update({'pad_width': self.pad_width})
        return config


class PeriodicConv2D(tf.keras.layers.Layer):
    def __init__(self, filters,
                 kernel_size,
                 conv_kwargs={},
                 **kwargs, ):
        super().__init__(**kwargs)
        self.filters = filters
        self.kernel_size = kernel_size
        self.conv_kwargs = conv_kwargs
        if type(kernel_size) is not int:
            assert kernel_size[0] == kernel_size[1], 'PeriodicConv2D only works for square kernels'
            kernel_size = kernel_size[0]
        pad_width = (kernel_size - 1) // 2
        self.padding = PeriodicPadding2D(pad_width)
        self.conv = Conv2D(
            filters, kernel_size, padding='valid', **conv_kwargs
        )

    def call(self, inputs):
        return self.conv(self.padding(inputs))

    def get_config(self):
        config = super().get_config()
        config.update({'filters': self.filters, 'kernel_size': self.kernel_size, 'conv_kwargs': self.conv_kwargs})
        return config
    
def build_cnn(filters, kernels, input_shape, dr=0):
    """Fully convolutional network"""
    x = input = Input(shape=input_shape)
    for f, k in zip(filters[:-1], kernels[:-1]):
        x = PeriodicConv2D(f, k)(x)
        x = LeakyReLU()(x)
    output = PeriodicConv2D(filters[-1], kernels[-1])(x)
    return keras.models.Model(input, output)

def create_predictions(model, dg):
    """Create non-iterative predictions"""
    preds = xr.DataArray(
        model.predict_generator(dg),
        dims=['time', 'lat', 'lon', 'level'],
        coords={'time': dg.valid_time, 'lat': dg.data.lat, 'lon': dg.data.lon, 
                'level': dg.data.isel(level=dg.output_idxs).level,
                'level_names': dg.data.isel(level=dg.output_idxs).level_names
               },
    )
    # Unnormalize
    preds = (preds * dg.std.isel(level=dg.output_idxs).values + 
             dg.mean.isel(level=dg.output_idxs).values)
    unique_vars = list(set([l.split('_')[0] for l in preds.level_names.values])); unique_vars
    
    das = []
    for v in unique_vars:
        idxs = [i for i, vv in enumerate(preds.level_names.values) if vv.split('_')[0] in v]
        #print(v, idxs)
        da = preds.isel(level=idxs).squeeze().drop('level_names')
        if not 'level' in da.dims: da.drop('level')
        das.append({v: da})
    return xr.merge(das, compat = 'override').drop('level')

In [11]:
cnn_1 = build_cnn([64, 64, 64, 64, 2], [5, 5, 5, 5, 5], (32, 64, 10))
cnn_1.load_weights('/rds/general/user/mc4117/ephemeral/saved_pred/multi_data_gpu_no_bn_do0.h5')

cnn_2 = build_cnn([64, 64, 64, 64, 2], [5, 5, 5, 5, 5], (32, 64, 10), dr = 0.1)
cnn_2.load_weights('/rds/general/user/mc4117/ephemeral/saved_pred/multi_data_gpu_no_bn_do1.h5')

cnn_3 = build_cnn([64, 64, 64, 64, 2], [5, 5, 5, 5, 5], (32, 64, 10), dr = 0.1)
cnn_3.load_weights('/rds/general/user/mc4117/ephemeral/saved_pred/multi_data_gpu_no_bn_do2.h5')
                   
cnn_4 = build_cnn([64, 64, 64, 64, 2], [5, 5, 5, 5, 5], (32, 64, 10), dr = 0.1)
cnn_4.load_weights('/rds/general/user/mc4117/ephemeral/saved_pred/multi_data_gpu_no_bn_do3.h5')

cnn_5 = build_cnn([64, 64, 64, 64, 2], [5, 5, 5, 5, 5], (32, 64, 10), dr = 0.1)
cnn_5.load_weights('/rds/general/user/mc4117/ephemeral/saved_pred/multi_data_gpu_no_bn_do4.h5')

## Create predictions

In [12]:
def create_predictions(model, dg):
    """Create non-iterative predictions"""
    preds = xr.DataArray(
        model.predict_generator(dg),
        dims=['time', 'lat', 'lon', 'level'],
        coords={'time': dg.valid_time, 'lat': dg.data.lat, 'lon': dg.data.lon, 
                'level': dg.data.isel(level=dg.output_idxs).level,
                'level_names': dg.data.isel(level=dg.output_idxs).level_names
               },
    )
    # Don't unnormalize

    unique_vars = list(set([l.split('_')[0] for l in preds.level_names.values])); unique_vars
    
    das = []
    for v in unique_vars:
        idxs = [i for i, vv in enumerate(preds.level_names.values) if vv.split('_')[0] in v]
        #print(v, idxs)
        da = preds.isel(level=idxs).squeeze().drop('level_names')
        if not 'level' in da.dims: da.drop('level')
        das.append({v: da})
    return xr.merge(das, compat = 'override').drop('level')

In [13]:
fc_1 = create_predictions(cnn_1, dg_test)
fc_2 = create_predictions(cnn_2, dg_test)
fc_3 = create_predictions(cnn_3, dg_test)
fc_4 = create_predictions(cnn_4, dg_test)
fc_5 = create_predictions(cnn_5, dg_test)

Instructions for updating:
Please use Model.predict, which supports generators.


In [15]:
preds = np.stack((fc_1.to_array(), fc_2.to_array(), fc_3.to_array(), fc_4.to_array(), fc_5.to_array()))

In [16]:
preds.shape

(5, 2, 17448, 32, 64)

In [17]:
def build_cnn_stack(filters, kernels, input_shape, dr=0):
    """Fully convolutional network"""
    x = input = Input(shape=input_shape)
    for f, k in zip(filters[:-1], kernels[:-1]):
        x = PeriodicConv2D(f, k)(x)
        x = LeakyReLU()(x)
        #x = BatchNormalization()(x)
        if dr > 0: x = Dropout(dr)(x, training = True)
    output = PeriodicConv2D(filters[-1], kernels[-1])(x)
    return keras.models.Model(input, output)


In [18]:
X1, y1 = dg_test[0]

for i in range(1, len(dg_test)):
    X2, y2 = dg_test[i]
    y1 = np.concatenate((y1, y2))  

In [21]:
preds_rearranged = np.transpose(preds, axes = (2, 3, 4, 0, 1))
preds_rearranged.shape

(17448, 32, 64, 5, 2)

In [19]:
y1.shape

(17448, 32, 64, 2)

In [24]:
cnn = build_cnn_stack([64, 64, 64, 64, 1], [5, 5, 5, 5, 5], (32, 64, 5), dr = 0.1)

cnn.compile(keras.optimizers.Adam(1e-4), 'mse')

print(cnn.summary())

Model: "model_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_8 (InputLayer)         [(None, 32, 64, 5)]       0         
_________________________________________________________________
periodic_conv2d_35 (Periodic (None, 32, 64, 64)        8064      
_________________________________________________________________
leaky_re_lu_28 (LeakyReLU)   (None, 32, 64, 64)        0         
_________________________________________________________________
dropout_4 (Dropout)          (None, 32, 64, 64)        0         
_________________________________________________________________
periodic_conv2d_36 (Periodic (None, 32, 64, 64)        102464    
_________________________________________________________________
leaky_re_lu_29 (LeakyReLU)   (None, 32, 64, 64)        0         
_________________________________________________________________
dropout_5 (Dropout)          (None, 32, 64, 64)        0   

In [25]:
cnn.fit(x= preds_rearranged[..., 0], y= y1[..., 0], shuffle = True, epochs = 10, validation_split = 0.2,
          callbacks=[tf.keras.callbacks.EarlyStopping(
                        monitor='val_loss',
                        min_delta=0,
                        patience=1,
                        verbose=1, 
                        mode='auto'
                    )])

Train on 13958 samples, validate on 3490 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 00003: early stopping


<tensorflow.python.keras.callbacks.History at 0x2b07b02f9278>

In [26]:
cnn.save_weights('/rds/general/user/mc4117/ephemeral/saved_models/stack2.h5')

## Testing

In [27]:
ds_valid_2 = ds_whole.sel(time=slice('2013', '2014'))

dg_valid_2 = DataGenerator(ds_valid_2, var_dict, lead_time, batch_size=bs, mean=dg_train.mean, std=dg_train.std, 
                         shuffle=False, output_vars=output_vars)


Loading data into RAM


In [28]:
fc_1_valid = create_predictions(cnn_1, dg_valid_2)
fc_2_valid = create_predictions(cnn_2, dg_valid_2)
fc_3 = create_predictions(cnn_3, dg_valid_2)
fc_4 = create_predictions(cnn_4, dg_valid_2)
fc_5 = create_predictions(cnn_5, dg_valid_2)

NameError: name 'cnn_6' is not defined

In [29]:
preds_valid = np.stack((fc_1_valid.to_array(), fc_2_valid.to_array(), fc_3.to_array(), fc_4.to_array(), fc_5.to_array()))

In [30]:
X1_valid_2, y1_valid_2 = dg_valid_2[0]

for i in range(1, len(dg_valid_2)):
    X2_valid_2, y2_valid_2 = dg_valid_2[i]
    y1_valid_2 = np.concatenate((y1_valid_2, y2_valid_2))  

In [31]:
preds_rearranged = np.transpose(preds, axes = (2, 3, 4, 0, 1))
preds_rearranged.shape

(17448, 32, 64, 5, 2)

In [32]:
y1_valid_2.shape

(17448, 32, 64, 2)

In [33]:
arr_pred_stack = cnn.predict(preds_rearranged[..., 0])

In [34]:
arr_pred_stack.shape

(17448, 32, 64, 1)

In [35]:
pred_stack = xr.Dataset({
    'z': xr.DataArray(
        arr_pred_stack[..., 0],
        dims=['time', 'lat', 'lon'],
        coords={'time':dg_valid_2.data.time[72:], 'lat': dg_valid_2.data.lat, 'lon': dg_valid_2.data.lon,
                },
    ),})

In [36]:
pred_stack

In [37]:
#real_unnorm =y1_valid_2* dg_valid_2.std.isel(level=[0]).values+dg_valid_2.mean.isel(level=[0]).values

real_ds = xr.Dataset({
    'z': xr.DataArray(
        y1_valid_2[..., 0],
        dims=['time', 'lat', 'lon'],
        coords={'time':dg_valid_2.data.time[72:], 'lat': dg_valid_2.data.lat, 'lon': dg_valid_2.data.lon,
                },
    )
})

In [38]:
real_ds

In [39]:
compute_weighted_rmse(pred_stack, real_ds).compute()

In [40]:
print(compute_weighted_rmse(fc_1_valid, real_ds).compute())
print(compute_weighted_rmse(fc_2_valid, real_ds).compute())
print(compute_weighted_rmse(fc_3, real_ds).compute())
print(compute_weighted_rmse(fc_4, real_ds).compute())
print(compute_weighted_rmse(fc_5, real_ds).compute())

<xarray.Dataset>
Dimensions:  ()
Data variables:
    z        float64 0.188
<xarray.Dataset>
Dimensions:  ()
Data variables:
    z        float64 0.1877
<xarray.Dataset>
Dimensions:  ()
Data variables:
    z        float64 0.188
<xarray.Dataset>
Dimensions:  ()
Data variables:
    z        float64 0.1877
<xarray.Dataset>
Dimensions:  ()
Data variables:
    z        float64 0.1883


In [41]:
fc_train = cnn.predict(preds_rearranged[...,0])

In [42]:
real_ds_test = xr.Dataset({
    'z': xr.DataArray(
        y1[..., 0],
        dims=['time', 'lat', 'lon'],
        coords={'time':dg_test.data.time[72:], 'lat': dg_test.data.lat, 'lon': dg_test.data.lon,
                },
    )
})

pred_train = xr.Dataset({
    'z': xr.DataArray(
        fc_train[..., 0],
        dims=['time', 'lat', 'lon'],
        coords={'time':dg_test.data.time[72:], 'lat': dg_test.data.lat, 'lon': dg_test.data.lon,
                },
    ),})

compute_weighted_rmse(fc_train[...,0], real_ds_test).compute()

In [43]:
for i in range(5):
    pred0_init_train = xr.Dataset({
    'z': xr.DataArray(
        preds[i, 0, ...],
        dims=['time', 'lat', 'lon'],
        coords={'time':dg_test.data.time[72:], 'lat': dg_test.data.lat, 'lon': dg_test.data.lon,
                },
    ),})

    print(compute_weighted_rmse(pred0_init_train, real_ds_test).compute())

<xarray.Dataset>
Dimensions:  ()
Data variables:
    z        float64 0.202
<xarray.Dataset>
Dimensions:  ()
Data variables:
    z        float64 0.2014
<xarray.Dataset>
Dimensions:  ()
Data variables:
    z        float64 0.2024
<xarray.Dataset>
Dimensions:  ()
Data variables:
    z        float64 0.2023
<xarray.Dataset>
Dimensions:  ()
Data variables:
    z        float64 0.2027


In [45]:
pred_mean = sum([preds[i, 0, ...] for i in range(5)])/5


In [46]:

    pred_mean_train = xr.Dataset({
    'z': xr.DataArray(
        pred_mean[...],
        dims=['time', 'lat', 'lon'],
        coords={'time':dg_test.data.time[72:], 'lat': dg_test.data.lat, 'lon': dg_test.data.lon,
                },
    ),})

In [47]:
compute_weighted_rmse(pred_mean_train, real_ds_test).compute()