tgb - 2/5/2019 - The goal of this notebook is to save a model's weights, saved in a .h5 file, to a .txt file so that it can be used by CAM.  
The notebook closely follows the function save_weights.py coded by Stephan Rasp:  
https://github.com/raspstephan/CBRAIN-CAM/blob/master/save_weights.py

In [1]:
from keras.layers import *
from keras.models import *
from cbrain.imports import *
from keras.utils.generic_utils import get_custom_objects
metrics_dict = dict([(f.__name__, f) for f in all_metrics])
get_custom_objects().update(metrics_dict)
import h5py
import os, sys
import netCDF4 as nc
import numpy as np
import tensorflow as tf
import tensorflow.math as tfm
fmt = '%.6e'

Using TensorFlow backend.
  return f(*args, **kwds)


In [2]:
TRAINDIR = '/local/Tom.Beucler/SPCAM_PHYS/'
DATADIR = '/project/meteo/w2w/A6/S.Rasp/SP-CAM/sp32fbp_andkua/'
%cd $TRAINDIR
!ls

/data/Tom.Beucler/SPCAM_PHYS
32_col_lgsc_12m_train_features.nc
32_col_lgsc_12m_train_norm.nc
32_col_lgsc_12m_train_oldnorm.nc
32_col_lgsc_12m_train_shuffle_features.nc
32_col_lgsc_12m_train_shuffle_targets.nc
32_col_lgsc_12m_train_targets.nc
32_col_lgsc_12m_valid_features.nc
32_col_lgsc_12m_valid_shuffle_features.nc
32_col_lgsc_12m_valid_shuffle_targets.nc
32_col_lgsc_12m_valid_targets.nc
32_col_lgsc_1m_train_features.nc
32_col_lgsc_1m_train_norm.nc
32_col_lgsc_1m_train_oldnorm.nc
32_col_lgsc_1m_train_shuffle_features.nc
32_col_lgsc_1m_train_shuffle_targets.nc
32_col_lgsc_1m_train_targets.nc
32_col_lgsc_1m_valid_features.nc
32_col_lgsc_1m_valid_shuffle_features.nc
32_col_lgsc_1m_valid_shuffle_targets.nc
32_col_lgsc_1m_valid_targets.nc
HDF5_DATA
local
TXT_DATA


tgb - 2/5/2019 - Subtlties with load_model discussed here:  
https://github.com/keras-team/keras/issues/4871  
Ideally, I would implement get_config() in the custom layer, then use custom_objects to pass a dictionary to load_model.  
In practice, I'll redefine my layers and my model below and just load the weights.  
  
Careful: The mass and enthalpy conservation layers depend on the input shape, so make sure to use the right one.

In [3]:
PREFIX = '32_col_lgsc_12m_'
# 1) Open the file containing the normalization of the targets
ds = xr.open_dataset(TRAINDIR + PREFIX + 'train_norm.nc')
# 2) Open the pickle files containing the pressure converters
with open(os.path.join('/filer/z-sv-pool12c/t/Tom.Beucler/SPCAM/CBRAIN-CAM/cbrain', 'hyai_hybi.pkl'), 'rb') as f:
            hyai, hybi = pickle.load(f)
# 3) Define fsub, fdiv, normq
fsub = ds.feature_means.values
fdiv = ds.feature_stds_by_var.values
normq = ds.target_conv.values
print('fsub.shape=',fsub.shape)
print('fdiv.shape=',fdiv.shape)
print('normq.shape=',normq.shape)
print('hyai.shape=',hyai.shape)
print('hybi.shape=',hybi.shape)

ds.close()

fsub.shape= (304,)
fdiv.shape= (304,)
normq.shape= (158,)
hyai.shape= (31,)
hybi.shape= (31,)


In [7]:
# tgb - 2/5/2019 - Adapated the mass conservation layer to new input format
class MasConsLay(Layer):
    
    def __init__(self, fsub, fdiv, normq, hyai, hybi, output_dim, **kwargs):
        self.fsub = fsub # Subtraction for normalization of inputs 
        self.fdiv = fdiv # Division for normalization of inputs
        self.normq = normq # Normalization of output's water concentration
        self.hyai = hyai # CAM constants to calculate d_pressure
        self.hybi = hybi # CAM constants to calculate d_pressure
        self.output_dim = output_dim # Dimension of output
        super().__init__(**kwargs)
        
    def build(self, input_shape):
        super().build(input_shape)  # Be sure to call this somewhere!
        
    # tgb - 2/6/2019 - following https://github.com/keras-team/keras/issues/4871
    def get_config(self):
        config = {'fsub': list(self.fsub), 'fdiv': list(self.fdiv),
                  'normq': list(self.normq), 'hyai': list(self.hyai),
                  'hybi': list(self.hybi), 'output_dim': list(self.output_dim)}
        base_config = super(MasConsLayer, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))
        
    def call(self, arrs):
        # arrs (for arrays) is a list with 
        # [inputs=inp and the output of the previous layer=densout]
        # inputs will be [n_sample, 304 = 30*10+4] with
        # [QBP, QCBP, QIBP, TBP, VBP, Qdt_adiabatic, QCdt_adiabatic, QIdt_adiabatic,
        # Tdt_adiabatic, Vdt_adiabatic, PS, SOLIN, SHFLX, LHFLX]
        # outputs of the previous dense layer will be [n_samples, 124 = 30*4+6-2] with
        # [DELQ\{PHQ AT LOWEST LVL}, DELCLDLIQ, DELCLDICE, 
        # TPHYSTND\{TPHYSTND AT LOWEST LVL}, FSNT, FSNS, FLNT, FLNS, PRECT, PRECTEND]
        
        # Split between the inputs inp & the output of the densely connected
        # neural network, densout
        inp, densout = arrs
        
        # 0) Constants
        G = 9.80616; # Reference gravity constant [m.s-2]
        L_V = 2.501e6; # Latent heat of vaporization of water [W.kg-1]
        P0 = 1e5; # Reference surface pressure [Pa]
        
        # 1) Get non-dimensional pressure differences (p_tilde above)
        # In the input vector, PS is the 151st element after 
        # the first elements = [QBP, ..., VBP with shape 30*5=150]
        PS = tfm.add( tfm.multiply( inp[:,300], self.fdiv[300]), self.fsub[300])
        # Reference for calculation of d_pressure is cbrain/models.py (e.g. QLayer)
        P = tfm.add( tfm.multiply( P0, self.hyai), \
                    tfm.multiply( PS[:,None], self.hybi))
        dP = tfm.subtract( P[:, 1:], P[:, :-1])
        # norm_output = dp_norm * L_V/G so dp_norm = norm_output * G/L_V
        dP_NORM = tfm.divide( \
                             tfm.multiply(self.normq[:30], \
                                   G), L_V)
        # dp_tilde = dp/dp_norm
        # Wondering about broadcasting here...
        # tf.div or simply \ would support broadcasting 
        dP_TILD = tfm.divide( dP, dP_NORM)
        
        # 2) Calculate cloud water vertical integral from level 1 to level 30
        # The indices are tricky here because we are missing del(q_v)@(level 30)
        # so e.g. q_liq@(level 1) is the 30th element of the output of the 
        # previous dense layer
        CLDVEC = tfm.multiply( dP_TILD, \
                                  tfm.add( densout[:, 29:59], densout[:, 59:89]))
        CLDINT = tfm.reduce_sum( CLDVEC, axis=1)
        
        # 3) Calculate water vapor vertical integral from level 1 to level 29
        VAPVEC = tfm.multiply( dP_TILD[:, :29], \
                                  densout[:, :29])
        VAPINT = tfm.reduce_sum( VAPVEC, axis=1)
        
        # 4) Calculate forcing on the right-hand side (Net Evaporation-Precipitation)
        # E-P is already normalized to units W.m-2 in the output vector
        # so all we need to do is input-unnormalize LHF that is taken from the input vector
        LHF = tfm.add( tfm.multiply( inp[:,303], self.fdiv[303]), self.fsub[303])
        # Note that total precipitation = PRECT + 1e-3*PRECTEND in the CAM model
        # PRECTEND already multiplied by 1e-3 in output vector so no need to redo it
        PREC = tfm.add( densout[:, 152], densout[:, 153])
        
        # 5) Infer water vapor tendency at level 30 as a residual
        # Composing tfm.add 3 times because not sure how to use tfm.add_n
        DELQV30 = tfm.divide( \
                             tfm.add( tfm.add( tfm.add (\
                                                        LHF, tfm.negative(PREC)), \
                                              tfm.negative(CLDINT)), \
                                     tfm.negative(VAPINT)), \
                             dP_TILD[:, 29])
        
        # 6) Concatenate the water tendencies with the newly inferred tendency
        # to get the final vector out of shape (#samples,125) with
        # [DELQ, DELCLDLIQ, DELCLDICE, 
        # TPHYSTND\{TPHYSTND AT SURFACE}, FSNT, FSNS, FLNT, FLNS, PRECT PRECTEND]
        # Uses https://www.tensorflow.org/api_docs/python/tf/concat
        DELQV30 = tf.expand_dims(DELQV30,1) # Adds dimension=1 to axis=1
        out = tf.concat([densout[:, :29], DELQV30, densout[:, 29:]], 1)
        return out
    
    def compute_output_shape(self, input_shape):
        return (input_shape[0][0], self.output_dim) # The output has size 125=30*4+6-1
    # and is ready to be fed to the energy conservation layer
    # before we reach the total number of outputs = 126
    
# tgb - 2/5/2019 - Change to adapt to new input format
class EntConsLay(Layer):
    
    def __init__(self, fsub, fdiv, normq, hyai, hybi, output_dim, **kwargs):
        self.fsub = fsub # Subtraction for normalization of inputs 
        self.fdiv = fdiv # Division for normalization of inputs
        self.normq = normq # Normalization of output's water concentration
        self.hyai = hyai # CAM constants to calculate d_pressure
        self.hybi = hybi # CAM constants to calculate d_pressure
        self.output_dim = output_dim # Dimension of output
        super().__init__(**kwargs)
        
    def build(self, input_shape):
        super().build(input_shape)  # Be sure to call this somewhere!
        
    # tgb - 2/6/2019 - following https://github.com/keras-team/keras/issues/4871
    def get_config(self):
        config = {'fsub': list(self.fsub), 'fdiv': list(self.fdiv),
                  'normq': list(self.normq), 'hyai': list(self.hyai),
                  'hybi': list(self.hybi), 'output_dim': list(self.output_dim)}
        base_config = super(MasConsLayer, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))
        
    def call(self, arrs):
        # arrs (for arrays) is a list with 
        # [inputs=inp and the output of the previous layer=massout]
        # inputs will be [n_sample, 304 = 30*10+4] with
        # [QBP, QCBP, QIBP, TBP, VBP, Qdt_adiabatic, QCdt_adiabatic, QIdt_adiabatic,
        # Tdt_adiabatic, Vdt_adiabatic, PS, SOLIN, SHFLX, LHFLX]
        # outputs of the previous dense layer will be [n_samples, 157 = 30*5+8-1] with
        # [DELQ, DELCLDLIQ, DELCLDICE, 
        # TPHYSTND\{TPHYSTND AT LOWEST LVL}, DTVKE,
        # FSNT, FSNS, FLNT, FLNS, PRECT, PRECTEND, PRECST, PRECSTEN]
        
        # Split between the inputs inp & the output of the densely connected
        # neural network, massout
        inp, massout = arrs
        
        # 0) Constants
        G = 9.80616; # Reference gravity constant [m.s-2]
        L_F = 3.337e5; # Latent heat of fusion of water [W.kg-1]
        L_V = 2.501e6; # Latent heat of vaporization of water [W.kg-1]
        P0 = 1e5; # Reference surface pressure [Pa]
        
        # 1) Get non-dimensional pressure differences (p_tilde above)
        # In the input vector, PS is the 151st element after 
        # the first elements = [QBP, ..., VBP with shape 30*5=150]
        PS = tfm.add( tfm.multiply( inp[:,300], self.fdiv[300]), self.fsub[300])
        # Reference for calculation of d_pressure is cbrain/models.py (e.g. QLayer)
        P = tfm.add( tfm.multiply( P0, self.hyai), \
                    tfm.multiply( PS[:,None], self.hybi))
        dP = tfm.subtract( P[:, 1:], P[:, :-1])
        # norm_output = dp_norm * L_V/G so dp_norm = norm_output * G/L_V
        dP_NORM = tfm.divide( \
                             tfm.multiply(self.normq[:30], \
                                          G),\
                             L_V)
        # dp_tilde = dp/dp_norm
        dP_TILD = tfm.divide( dP, dP_NORM)
        
        # 2) Calculate net energy input from phase change and precipitation
        # PHAS = Lf/Lv*((PRECST+PRECSTEN)-(PRECT+PRECTEND))
        PHAS = tfm.divide( tfm.multiply( tfm.subtract(\
                                                      tfm.add( massout[:,155], massout[:,156]),\
                                                      tfm.add( massout[:,153], massout[:,154])),\
                                        L_F),\
                          L_V)
        
        # 3) Calculate net energy input from radiation, sensible heat flux and turbulent KE
        # 3.1) RAD = FSNT-FSNS-FLNT+FLNS
        RAD = tfm.add(\
                      tfm.subtract( massout[:,149], massout[:,150]),\
                      tfm.subtract( massout[:,152], massout[:,151]))
        # 3.2) Unnormalize sensible heat flux
        SHF = tfm.add( tfm.multiply( inp[:,302], self.fdiv[302]), self.fsub[302])
        # 3.3) Net turbulent kinetic energy dissipative heating is the column-integrated 
        # turbulent kinetic energy energy dissipative heating
        KEDVEC = tfm.multiply( dP_TILD, massout[:, 119:149])
        KEDINT = tfm.reduce_sum( KEDVEC, axis=1)
        
        # 4) Calculate tendency of normalized column water vapor due to phase change
        # 4.1) Unnormalize latent heat flux
        LHF = tfm.add( tfm.multiply( inp[:,303], self.fdiv[303]), self.fsub[303])
        # 4.2) Column water vapor is the column integral of specific humidity
        PHQVEC = tfm.multiply( dP_TILD, massout[:, :30])
        PHQINT = tfm.reduce_sum( PHQVEC, axis=1)
        # 4.3) Multiply by L_S/L_V to normalize (explanation above)
        SPDQINT = tfm.divide( tfm.multiply( tfm.subtract(\
                                                         PHQINT, LHF),\
                                           L_S),\
                             L_V)
        
        # 5) Same operation for liquid water tendency but multiplied by L_F/L_V
        SPDQCINT = tfm.divide( tfm.multiply(\
                                            tfm.reduce_sum(\
                                                           tfm.multiply( dP_TILD, massout[:, 30:60]),\
                                                           axis=1),\
                                            L_F),\
                              L_V)
        
        # 6) Same operation for temperature but only integrate from level 1 to level 29
        DTINT = tfm.reduce_sum( tfm.multiply( dP_TILD[:, :29], massout[:, 90:119]), axis=1)

        # 7) Now calculate dT30 as a residual
        dT30 = tfm.divide(tfm.add(tfm.add(tfm.add(tfm.add(tfm.add(tfm.add(\
                                                                          PHAS,RAD),\
                                                                  SHF),\
                                                          KEDINT),\
                                                  tfm.negative( SPDQINT)),\
                                          tfm.negative( SPDQCINT)),\
                                  tfm.negative( DTINT)),\
                          dP_TILD[:, 29])
        dT30 = tf.expand_dims(dT30,1)

        out = tf.concat([massout[:, :119], dT30, massout[:, 119:]], 1)
        return out
    
    def compute_output_shape(self, input_shape):
        return (input_shape[0][0], self.output_dim)
    # and is ready to be used in the cost function
    
# tgb - 2/6/2019 - Adding get    
from keras.utils.generic_utils import get_custom_objects
metrics_dict = dict([(f.__name__, f) for f in all_metrics])
get_custom_objects().update(metrics_dict)
get_custom_objects().update({
    'MasConsLay': MasConsLay,
    'EntConsLay': EntConsLay,
    })
from configargparse import ArgParser

TODO: Properly save the conserving models with get_config() to avoid having to redefine them when loading them  
tgb - 2/5/2019 - Now ready to save the weights and the norm to .txt format

In [8]:
exp_name = 'mod_cons_5dens'
save_dir = f'./TXT_DATA/'+exp_name+'/'
print(save_dir)
if not os.path.exists(save_dir):
        os.makedirs(save_dir, exist_ok=True)
model = load_model(f'./HDF5_DATA/'+exp_name+'.h5')
model.save_weights(save_dir + 'weights.h5')

./TXT_DATA/mod_cons_5dens/


TypeError: __init__() missing 6 required positional arguments: 'fsub', 'fdiv', 'normq', 'hyai', 'hybi', and 'output_dim'

In [66]:
weight_file = save_dir + 'weights.h5'
weights = []; biases = []
with h5py.File(weight_file, 'r') as f:
    layer_names = [n.decode('utf8') for n in f.attrs['layer_names']
                   if 'dense' in n.decode('utf8')]
    for il, l in enumerate(layer_names):
        g = f[l]
        w = g[l + '/kernel:0'][:]
        b = g[l + '/bias:0'][:]
        weights.append(w); biases.append(b)
        np.savetxt(save_dir+f'/layer{il+1}_kernel.txt', w.T, fmt=fmt,
                   delimiter=',')
        np.savetxt(save_dir + f'/layer{il+1}_bias.txt', b.reshape(1, -1),
                   fmt=fmt, delimiter=',')

In [67]:
norm_path = TRAINDIR + '32_col_lgsc_1m_train_norm.nc'
with nc.Dataset(norm_path) as ds:
    np.savetxt(
        save_dir + '/inp_means.txt', ds['feature_means'][:].reshape(1, -1),
        fmt=fmt, delimiter=',')
    np.savetxt(
        save_dir + '/inp_stds.txt', ds['feature_stds'][:].reshape(1, -1),
        fmt=fmt, delimiter=',')
    np.savetxt(
        save_dir + '/inp_mins.txt', ds['feature_mins'][:].reshape(1, -1),
        fmt=fmt, delimiter=',')
    np.savetxt(
        save_dir + '/inp_maxs.txt', ds['feature_maxs'][:].reshape(1, -1),
        fmt=fmt, delimiter=',')
    np.savetxt(
        save_dir + '/outp_mins.txt', ds['target_mins'][:].reshape(1, -1),
        fmt=fmt, delimiter=',')
    np.savetxt(
        save_dir + '/outp_maxs.txt', ds['target_maxs'][:].reshape(1, -1),
        fmt=fmt, delimiter=',')
    # tgb - 2/6/2019 - Adding output conversion file to not have to hardcode output unnormalization
    # in the CAM code
    np.savetxt(
        save_dir + '/outp_conv.txt', ds['target_conv'][:].reshape(1, -1),
        fmt=fmt, delimiter=',')
    np.savetxt(
        save_dir + '/inp_stds_by_var.txt', ds['feature_stds_by_var'][:].reshape(1, -1),
        fmt=fmt, delimiter=',')
    np.savetxt(
        save_dir + '/inp_max_rs.txt',
        np.maximum(ds['feature_stds_by_var'][:],
                   ds['feature_maxs'][:] - ds['feature_mins'][:]).reshape(1, -1),
        fmt=fmt, delimiter=',')