In [14]:
# their resnet
"""
Code adapted from https://github.com/xternalz/WideResNet-pytorch
Modifications = return activations for use in attention transfer,
as done before e.g in https://github.com/BayesWatch/pytorch-moonshine
"""


import math
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchsummary import summary


class BasicBlock(nn.Module):
    def __init__(self, in_planes, out_planes, stride, dropRate=0.0):
        super(BasicBlock, self).__init__()
        self.bn1 = nn.BatchNorm2d(in_planes)
        self.relu1 = nn.ReLU(inplace=True)
        self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                               padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_planes)
        self.relu2 = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_planes, out_planes, kernel_size=3, stride=1,
                               padding=1, bias=False)
        self.droprate = dropRate
        self.equalInOut = (in_planes == out_planes)
        self.convShortcut = (not self.equalInOut) and nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride,
                               padding=0, bias=False) or None

    def forward(self, x):
        if not self.equalInOut:
            x = self.relu1(self.bn1(x))
        else:
            out = self.relu1(self.bn1(x))
        out = self.relu2(self.bn2(self.conv1(out if self.equalInOut else x)))
        if self.droprate > 0:
            out = F.dropout(out, p=self.droprate, training=self.training)
        out = self.conv2(out)
        return torch.add(x if self.equalInOut else self.convShortcut(x), out)

class NetworkBlock(nn.Module):
    def __init__(self, nb_layers, in_planes, out_planes, block, stride, dropRate=0.0):
        super(NetworkBlock, self).__init__()
        self.layer = self._make_layer(block, in_planes, out_planes, nb_layers, stride, dropRate)

    def _make_layer(self, block, in_planes, out_planes, nb_layers, stride, dropRate):
        layers = []
        for i in range(int(nb_layers)):
            layers.append(block(i == 0 and in_planes or out_planes, out_planes, i == 0 and stride or 1, dropRate))
        return nn.Sequential(*layers)

    def forward(self, x):
        return self.layer(x)

class WideResNet(nn.Module):
    def __init__(self, depth, num_classes, widen_factor=1, dropRate=0.0):
        super(WideResNet, self).__init__()
        nChannels = [16, 16*widen_factor, 32*widen_factor, 64*widen_factor]
        assert((depth - 4) % 6 == 0)
        n = (depth - 4) / 6
        block = BasicBlock
        # 1st conv before any network block
        self.conv1 = nn.Conv2d(3, nChannels[0], kernel_size=3, stride=1,
                               padding=1, bias=False)
        # 1st block
        self.block1 = NetworkBlock(n, nChannels[0], nChannels[1], block, 1, dropRate)
        # 2nd block
        self.block2 = NetworkBlock(n, nChannels[1], nChannels[2], block, 2, dropRate)
        # 3rd block
        self.block3 = NetworkBlock(n, nChannels[2], nChannels[3], block, 2, dropRate)
        # global average pooling and classifier
        self.bn1 = nn.BatchNorm2d(nChannels[3])
        self.relu = nn.ReLU(inplace=True)
        self.fc = nn.Linear(nChannels[3], num_classes)
        self.nChannels = nChannels[3]

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                m.bias.data.zero_()


    def forward(self, x):
        out = self.conv1(x)
        out = self.block1(out)
        activation1 = out
        out = self.block2(out)
        activation2 = out
        out = self.block3(out)
        activation3 = out
        out = self.relu(self.bn1(out))
        out = F.avg_pool2d(out, 8)
        out = out.view(-1, self.nChannels)
        return self.fc(out), activation1, activation2, activation3
    
    def printBlock(self):
        print('Block 1 summary')
        print(summary(self.block1,(16,32,32)))
        print('Block 2 summary')
        print(summary(self.block2,(32,32,32)))
        print('Block 3 summary')
        print(summary(self.block3,(64,32,32)))
    

#device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # PyTorch v0.4.0
#model = WideResNet(depth=16, num_classes=10, widen_factor=2, dropRate=0.0).to(device)
#summary(model, input_size=(3, 32, 32))
    
    
#x = torch.FloatTensor(64, 3, 32, 32).uniform_(0, 1)
#model = WideResNet(depth=16, num_classes=10, widen_factor=2, dropRate=0.0)
#print(summary(model, input_size=(3, 32, 32)))

def main():
    import random
    import time
    from torchsummary import summary

    device = torch.device("cuda:0")
    device_cpu = torch.device("cpu")
    #x = torch.FloatTensor(64, 3, 32, 32).uniform_(0, 1)
    #x = x.to(device)

    ### WideResNets
    # Notation: W-depth-widening_factor
    #model = WideResNet(depth=16, num_classes=10, widen_factor=1, dropRate=0.0)
    model = WideResNet(depth=16, num_classes=10, widen_factor=2, dropRate=0.0)
    #model = WideResNet(depth=16, num_classes=10, widen_factor=8, dropRate=0.0)
    #model = WideResNet(depth=16, num_classes=10, widen_factor=10, dropRate=0.0)
    #model = WideResNet(depth=22, num_classes=10, widen_factor=8, dropRate=0.0)
    #model = WideResNet(depth=34, num_classes=10, widen_factor=2, dropRate=0.0)
    #model = WideResNet(depth=40, num_classes=10, widen_factor=10, dropRate=0.0)
    #model = WideResNet(depth=40, num_classes=10, widen_factor=1, dropRate=0.0)
    #model = WideResNet(depth=40, num_classes=10, widen_factor=2, dropRate=0.0)
    ###model = WideResNet(depth=50, num_classes=10, widen_factor=2, dropRate=0.0)
    #model = model.to(device)
    #t0 = time.time()
    #output, *act = model(x)
    #print("Time taken for forward pass: {} s".format(time.time() - t0))
    
    model = model.to(device)
    model.printBlock()
    
    #print("\nOUTPUT SHPAE: ", output.shape)


    print(summary(model, input_size=(3, 32, 32)))
    
main()

Block 1 summary
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
       BatchNorm2d-1           [-1, 16, 32, 32]              32
              ReLU-2           [-1, 16, 32, 32]               0
            Conv2d-3           [-1, 32, 32, 32]           4,608
       BatchNorm2d-4           [-1, 32, 32, 32]              64
              ReLU-5           [-1, 32, 32, 32]               0
            Conv2d-6           [-1, 32, 32, 32]           9,216
            Conv2d-7           [-1, 32, 32, 32]             512
        BasicBlock-8           [-1, 32, 32, 32]               0
       BatchNorm2d-9           [-1, 32, 32, 32]              64
             ReLU-10           [-1, 32, 32, 32]               0
           Conv2d-11           [-1, 32, 32, 32]           9,216
      BatchNorm2d-12           [-1, 32, 32, 32]              64
             ReLU-13           [-1, 32, 32, 32]               0
           Conv2d-14   

In [22]:
# keras resnet

from __future__ import print_function

import numpy as np
import sklearn.metrics as metrics

from keras.models import Model
from keras.layers import Input, Add, Activation, Dropout, Flatten, Dense
from keras.layers.convolutional import Convolution2D, MaxPooling2D, AveragePooling2D
from keras.layers.normalization import BatchNormalization
from keras.regularizers import l2
from keras import backend as K

#import wide_residual_network as wrn
from keras.datasets import cifar10
import keras.callbacks as callbacks
import keras.utils.np_utils as kutils
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import plot_model

import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
from keras.datasets import cifar10
from keras.callbacks import LearningRateScheduler
import numpy as np
from keras import optimizers



weight_decay = 0.0005

def initial_conv(input):
    x = Convolution2D(16, (3, 3), padding='same', kernel_initializer='he_normal',
                      W_regularizer=l2(weight_decay),
                      use_bias=False)(input)

    channel_axis = 1 if K.image_data_format() == "channels_first" else -1

    x = BatchNormalization(axis=channel_axis, momentum=0.1, epsilon=1e-5, gamma_initializer='uniform')(x)
    x = Activation('relu')(x)
    return x


def expand_conv(init, base, k, strides=(1, 1)):
    x = Convolution2D(base * k, (3, 3), padding='same', strides=strides, kernel_initializer='he_normal',
                      W_regularizer=l2(weight_decay),
                      use_bias=False)(init)

    channel_axis = 1 if K.image_data_format() == "channels_first" else -1

    x = BatchNormalization(axis=channel_axis, momentum=0.1, epsilon=1e-5, gamma_initializer='uniform')(x)
    x = Activation('relu')(x)

    x = Convolution2D(base * k, (3, 3), padding='same', kernel_initializer='he_normal',
                      W_regularizer=l2(weight_decay),
                      use_bias=False)(x)

    skip = Convolution2D(base * k, (1, 1), padding='same', strides=strides, kernel_initializer='he_normal',
                      W_regularizer=l2(weight_decay),
                      use_bias=False)(init)

    m = Add()([x, skip])

    return m


def conv1_block(input, k=1, dropout=0.0):
    init = input

    channel_axis = 1 if K.image_data_format() == "channels_first" else -1

    x = BatchNormalization(axis=channel_axis, momentum=0.1, epsilon=1e-5, gamma_initializer='uniform')(input)
    x = Activation('relu')(x)
    x = Convolution2D(16 * k, (3, 3), padding='same', kernel_initializer='he_normal',
                      W_regularizer=l2(weight_decay),
                      use_bias=False)(x)

    if dropout > 0.0: x = Dropout(dropout)(x)

    x = BatchNormalization(axis=channel_axis, momentum=0.1, epsilon=1e-5, gamma_initializer='uniform')(x)
    x = Activation('relu')(x)
    x = Convolution2D(16 * k, (3, 3), padding='same', kernel_initializer='he_normal',
                      W_regularizer=l2(weight_decay),
                      use_bias=False)(x)

    m = Add()([init, x])
    return m

def conv2_block(input, k=1, dropout=0.0):
    init = input

    #channel_axis = 1 if K.image_dim_ordering() == "th" else -1
    channel_axis = -1

    x = BatchNormalization(axis=channel_axis, momentum=0.1, epsilon=1e-5, gamma_initializer='uniform')(input)
    x = Activation('relu')(x)
    x = Convolution2D(32 * k, (3, 3), padding='same', kernel_initializer='he_normal',
                      W_regularizer=l2(weight_decay),
                      use_bias=False)(x)

    if dropout > 0.0: x = Dropout(dropout)(x)

    x = BatchNormalization(axis=channel_axis, momentum=0.1, epsilon=1e-5, gamma_initializer='uniform')(x)
    x = Activation('relu')(x)
    x = Convolution2D(32 * k, (3, 3), padding='same', kernel_initializer='he_normal',
                      W_regularizer=l2(weight_decay),
                      use_bias=False)(x)

    m = Add()([init, x])
    return m

def conv3_block(input, k=1, dropout=0.0):
    init = input

    #channel_axis = 1 if K.image_dim_ordering() == "th" else -1
    channel_axis = -1
    
    x = BatchNormalization(axis=channel_axis, momentum=0.1, epsilon=1e-5, gamma_initializer='uniform')(input)
    x = Activation('relu')(x)
    x = Convolution2D(64 * k, (3, 3), padding='same', kernel_initializer='he_normal',
                      W_regularizer=l2(weight_decay),
                      use_bias=False)(x)

    if dropout > 0.0: x = Dropout(dropout)(x)

    x = BatchNormalization(axis=channel_axis, momentum=0.1, epsilon=1e-5, gamma_initializer='uniform')(x)
    x = Activation('relu')(x)
    x = Convolution2D(64 * k, (3, 3), padding='same', kernel_initializer='he_normal',
                      W_regularizer=l2(weight_decay),
                      use_bias=False)(x)

    m = Add()([init, x])
    return m

def create_wide_residual_network(input_dim, nb_classes=100, N=2, k=1, dropout=0.0, verbose=1):
    """
    Creates a Wide Residual Network with specified parameters

    :param input: Input Keras object
    :param nb_classes: Number of output classes
    :param N: Depth of the network. Compute N = (n - 4) / 6.
              Example : For a depth of 16, n = 16, N = (16 - 4) / 6 = 2
              Example2: For a depth of 28, n = 28, N = (28 - 4) / 6 = 4
              Example3: For a depth of 40, n = 40, N = (40 - 4) / 6 = 6
    :param k: Width of the network.
    :param dropout: Adds dropout if value is greater than 0.0
    :param verbose: Debug info to describe created WRN
    :return:
    """
    channel_axis = 1 if K.image_data_format() == "channels_first" else -1

    ip = Input(shape=input_dim)

    x = initial_conv(ip)
    nb_conv = 4

    x = expand_conv(x, 16, k)
    nb_conv += 2

    for i in range(N - 1):
        x = conv1_block(x, k, dropout)
        nb_conv += 2

    print('First activation summary')
    out1 = x
    a = Model(ip,out1)
    a.summary()
        
    x = BatchNormalization(axis=channel_axis, momentum=0.1, epsilon=1e-5, gamma_initializer='uniform')(x)
    x = Activation('relu')(x)

    x = expand_conv(x, 32, k, strides=(2, 2))
    nb_conv += 2
    
    

    for i in range(N - 1):
        x = conv2_block(x, k, dropout)
        nb_conv += 2
        
    print('Second activation summary')
    out2 = x
    b = Model(ip,out2)
    b.summary()

    x = BatchNormalization(axis=channel_axis, momentum=0.1, epsilon=1e-5, gamma_initializer='uniform')(x)
    x = Activation('relu')(x)

    x = expand_conv(x, 64, k, strides=(2, 2))
    nb_conv += 2

    for i in range(N - 1):
        x = conv3_block(x, k, dropout)
        nb_conv += 2
    
    print('Third activation summary')
    out3 = x
    c = Model(ip,out3)
    c.summary()

    x = BatchNormalization(axis=channel_axis, momentum=0.1, epsilon=1e-5, gamma_initializer='uniform')(x)
    x = Activation('relu')(x)

    x = AveragePooling2D((8, 8))(x)
    x = Flatten()(x)

    x = Dense(nb_classes, W_regularizer=l2(weight_decay), activation='softmax')(x)

    model = Model(ip, [x,out1,out2,out3])

    if verbose: print("Wide Residual Network-%d-%d created." % (nb_conv, k))
    return model

def main():
    
    model = create_wide_residual_network
    input_shape = (32,32,3)
    model=create_wide_residual_network(input_shape, 10, N=2, k=1, dropout=0.)
    model.summary()
    
    
main()



First activation summary
Model: "model_15"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_6 (InputLayer)            (None, 32, 32, 3)    0                                            
__________________________________________________________________________________________________
conv2d_76 (Conv2D)              (None, 32, 32, 16)   432         input_6[0][0]                    
__________________________________________________________________________________________________
batch_normalization_61 (BatchNo (None, 32, 32, 16)   64          conv2d_76[0][0]                  
__________________________________________________________________________________________________
activation_61 (Activation)      (None, 32, 32, 16)   0           batch_normalization_61[0][0]     
__________________________________________________________________



Second activation summary
Model: "model_16"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_6 (InputLayer)            (None, 32, 32, 3)    0                                            
__________________________________________________________________________________________________
conv2d_76 (Conv2D)              (None, 32, 32, 16)   432         input_6[0][0]                    
__________________________________________________________________________________________________
batch_normalization_61 (BatchNo (None, 32, 32, 16)   64          conv2d_76[0][0]                  
__________________________________________________________________________________________________
activation_61 (Activation)      (None, 32, 32, 16)   0           batch_normalization_61[0][0]     
_________________________________________________________________



Third activation summary
Model: "model_17"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_6 (InputLayer)            (None, 32, 32, 3)    0                                            
__________________________________________________________________________________________________
conv2d_76 (Conv2D)              (None, 32, 32, 16)   432         input_6[0][0]                    
__________________________________________________________________________________________________
batch_normalization_61 (BatchNo (None, 32, 32, 16)   64          conv2d_76[0][0]                  
__________________________________________________________________________________________________
activation_61 (Activation)      (None, 32, 32, 16)   0           batch_normalization_61[0][0]     
__________________________________________________________________



In [2]:
#train our new WResNet

from __future__ import print_function, division

from keras.datasets import mnist
from keras.layers import Input, Dense, Reshape, Flatten, Dropout
from keras.layers import BatchNormalization, Activation, ZeroPadding2D
from keras.layers.advanced_activations import LeakyReLU
from keras.layers.convolutional import UpSampling2D, Conv2D
from keras.models import Sequential, Model
from keras.optimizers import Adam
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
from keras.datasets import cifar10
from keras.callbacks import LearningRateScheduler
from keras.models import model_from_json
from keras.models import load_model
from keras.losses import KLDivergence
from sklearn.metrics import accuracy_score
import numpy as np
import keras
from keract import get_activations
import math
import time
import matplotlib.pyplot as plt
import sys
import tensorflow as tf
import numpy as np
import sklearn.metrics as metrics

from keras.models import Model
from keras.layers import Input, Add, Activation, Dropout, Flatten, Dense
from keras.layers.convolutional import Convolution2D, MaxPooling2D, AveragePooling2D
from keras.layers.normalization import BatchNormalization
from keras.regularizers import l2
from keras import backend as K

#import wide_residual_network as wrn
from keras.datasets import cifar10
import keras.callbacks as callbacks
import keras.utils.np_utils as kutils
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import plot_model

import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
from keras.datasets import cifar10
from keras.callbacks import LearningRateScheduler
import numpy as np
from keras import optimizers

import torch.nn.functional as F

weight_decay = 0.0005

def initial_conv(input):
    x = Convolution2D(16, (3, 3), padding='same', kernel_initializer='he_normal',
                      W_regularizer=l2(weight_decay),
                      use_bias=False)(input)

    channel_axis = 1 if K.image_data_format() == "channels_first" else -1

    x = BatchNormalization(axis=channel_axis, momentum=0.1, epsilon=1e-5, gamma_initializer='uniform')(x)
    x = Activation('relu')(x)
    return x


def expand_conv(init, base, k, strides=(1, 1)):
    x = Convolution2D(base * k, (3, 3), padding='same', strides=strides, kernel_initializer='he_normal',
                      W_regularizer=l2(weight_decay),
                      use_bias=False)(init)

    channel_axis = 1 if K.image_data_format() == "channels_first" else -1

    x = BatchNormalization(axis=channel_axis, momentum=0.1, epsilon=1e-5, gamma_initializer='uniform')(x)
    x = Activation('relu')(x)

    x = Convolution2D(base * k, (3, 3), padding='same', kernel_initializer='he_normal',
                      W_regularizer=l2(weight_decay),
                      use_bias=False)(x)

    skip = Convolution2D(base * k, (1, 1), padding='same', strides=strides, kernel_initializer='he_normal',
                      W_regularizer=l2(weight_decay),
                      use_bias=False)(init)

    m = Add()([x, skip])

    return m


def conv1_block(input, k=1, dropout=0.0):
    init = input

    channel_axis = 1 if K.image_data_format() == "channels_first" else -1

    x = BatchNormalization(axis=channel_axis, momentum=0.1, epsilon=1e-5, gamma_initializer='uniform')(input)
    x = Activation('relu')(x)
    x = Convolution2D(16 * k, (3, 3), padding='same', kernel_initializer='he_normal',
                      W_regularizer=l2(weight_decay),
                      use_bias=False)(x)

    if dropout > 0.0: x = Dropout(dropout)(x)

    x = BatchNormalization(axis=channel_axis, momentum=0.1, epsilon=1e-5, gamma_initializer='uniform')(x)
    x = Activation('relu')(x)
    x = Convolution2D(16 * k, (3, 3), padding='same', kernel_initializer='he_normal',
                      W_regularizer=l2(weight_decay),
                      use_bias=False)(x)

    m = Add()([init, x])
    return m

def conv2_block(input, k=1, dropout=0.0):
    init = input

    #channel_axis = 1 if K.image_dim_ordering() == "th" else -1
    channel_axis = -1

    x = BatchNormalization(axis=channel_axis, momentum=0.1, epsilon=1e-5, gamma_initializer='uniform')(input)
    x = Activation('relu')(x)
    x = Convolution2D(32 * k, (3, 3), padding='same', kernel_initializer='he_normal',
                      W_regularizer=l2(weight_decay),
                      use_bias=False)(x)

    if dropout > 0.0: x = Dropout(dropout)(x)

    x = BatchNormalization(axis=channel_axis, momentum=0.1, epsilon=1e-5, gamma_initializer='uniform')(x)
    x = Activation('relu')(x)
    x = Convolution2D(32 * k, (3, 3), padding='same', kernel_initializer='he_normal',
                      W_regularizer=l2(weight_decay),
                      use_bias=False)(x)

    m = Add()([init, x])
    return m

def conv3_block(input, k=1, dropout=0.0):
    init = input

    #channel_axis = 1 if K.image_dim_ordering() == "th" else -1
    channel_axis = -1
    
    x = BatchNormalization(axis=channel_axis, momentum=0.1, epsilon=1e-5, gamma_initializer='uniform')(input)
    x = Activation('relu')(x)
    x = Convolution2D(64 * k, (3, 3), padding='same', kernel_initializer='he_normal',
                      W_regularizer=l2(weight_decay),
                      use_bias=False)(x)

    if dropout > 0.0: x = Dropout(dropout)(x)

    x = BatchNormalization(axis=channel_axis, momentum=0.1, epsilon=1e-5, gamma_initializer='uniform')(x)
    x = Activation('relu')(x)
    x = Convolution2D(64 * k, (3, 3), padding='same', kernel_initializer='he_normal',
                      W_regularizer=l2(weight_decay),
                      use_bias=False)(x)

    m = Add()([init, x])
    return m

def create_wide_residual_network(input_dim, nb_classes=100, N=2, k=1, dropout=0.0, verbose=1):
    """
    Creates a Wide Residual Network with specified parameters

    :param input: Input Keras object
    :param nb_classes: Number of output classes
    :param N: Depth of the network. Compute N = (n - 4) / 6.
              Example : For a depth of 16, n = 16, N = (16 - 4) / 6 = 2
              Example2: For a depth of 28, n = 28, N = (28 - 4) / 6 = 4
              Example3: For a depth of 40, n = 40, N = (40 - 4) / 6 = 6
    :param k: Width of the network.
    :param dropout: Adds dropout if value is greater than 0.0
    :param verbose: Debug info to describe created WRN
    :return:
    """
    channel_axis = 1 if K.image_data_format() == "channels_first" else -1

    ip = Input(shape=input_dim)

    x = initial_conv(ip)
    nb_conv = 4

    x = expand_conv(x, 16, k)
    nb_conv += 2

    for i in range(N - 1):
        x = conv1_block(x, k, dropout)
        nb_conv += 2

    print('First activation summary')
    out1 = x
        
    x = BatchNormalization(axis=channel_axis, momentum=0.1, epsilon=1e-5, gamma_initializer='uniform')(x)
    x = Activation('relu')(x)

    x = expand_conv(x, 32, k, strides=(2, 2))
    nb_conv += 2
    
    

    for i in range(N - 1):
        x = conv2_block(x, k, dropout)
        nb_conv += 2
        
    print('Second activation summary')
    out2 = x

    x = BatchNormalization(axis=channel_axis, momentum=0.1, epsilon=1e-5, gamma_initializer='uniform')(x)
    x = Activation('relu')(x)

    x = expand_conv(x, 64, k, strides=(2, 2))
    nb_conv += 2

    for i in range(N - 1):
        x = conv3_block(x, k, dropout)
        nb_conv += 2
    
    print('Third activation summary')
    out3 = x

    x = BatchNormalization(axis=channel_axis, momentum=0.1, epsilon=1e-5, gamma_initializer='uniform')(x)
    x = Activation('relu')(x)

    x = AveragePooling2D((8, 8))(x)
    x = Flatten()(x)

    x = Dense(nb_classes, W_regularizer=l2(weight_decay), activation='softmax')(x)

    model = Model(ip, [x,out1,out2,out3])

    if verbose: print("Wide Residual Network-%d-%d created." % (nb_conv, k))
    return model


'''
Function that returns the trainand test data of the CIFAR10 already preprocessed
'''
def getCIFAR10():
    # input image dimensions
    img_rows, img_cols = 32, 32
    num_classes = 10

    # the data, split between train and test sets
    (x_train, y_train), (x_test, y_test) = cifar10.load_data()
    
    # format of the tensor
    if K.image_data_format() == 'channels_first':
        x_train = x_train.reshape(x_train.shape[0], 3, img_rows, img_cols)
        x_test = x_test.reshape(x_test.shape[0], 3, img_rows, img_cols)
        input_shape = (3, img_rows, img_cols)
    else:
        x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 3)
        x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 3)
        input_shape = (img_rows, img_cols, 3)

    # convert in to float the images
    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')
    
    # new normalization with z-score
    mean = np.mean(x_train,axis=(0,1,2,3))
    std = np.std(x_train,axis=(0,1,2,3))
    x_train = (x_train-mean)/(std+1e-7)
    x_test = (x_test-mean)/(std+1e-7)
    
    # convert class vectors to binary class matrices
    y_train = keras.utils.to_categorical(y_train, num_classes)
    y_test = keras.utils.to_categorical(y_test, num_classes)
    
    print('CIFAR10 loaded')
    return x_train,y_train,x_test,y_test

'''
Small function that returns the shape of the CIFAR10 images
'''
def getCIFAR10InputShape():
    img_rows, img_cols = 32, 32
    if K.image_data_format() == 'channels_first':
        input_shape = (3, img_rows, img_cols)
    else:
        input_shape = (img_rows, img_cols, 3)
        
    return input_shape

def getNetwork(input_shape):
    model=create_wide_residual_network(input_shape, 10, N=2, k=1, dropout=0.)
    plot_model(model, "WRN-16-1.png", show_shapes=True, show_layer_names=True)
    return model;

def myLoss(y_true,y_pred):
    
    real_pred = y_pred
    real_true = y_true
    
    loss = keras.losses.categorical_crossentropy(real_true,real_pred)
    
    return loss;

def useless_loss(y_true,y_pred):
    
    zer = K.zeros(1)
    
    return zer

def myaccuracy(y_true, y_pred):
    
    real_pred = y_pred[0]
    real_true = y_true[0]
    
    return keras.metrics.categorical_accuracy(y_true, y_pred)

'''
Function to try to train the network on CIFAR10
'''
def trainNetwork(epochs):
    
    x_train,y_train,x_test,y_test = getCIFAR10()
    
    input_shape = getCIFAR10InputShape()
    print('CIFAR10 shape: ' + str(input_shape))
    
    model = getNetwork(input_shape)
    
    model.compile(loss=[myLoss, useless_loss, useless_loss,useless_loss],
              optimizer='adam')
    
    batch_size = 128
    n_batches = math.floor( x_train.shape[0] / batch_size)
    
    predictions = model.predict(x_train[0:1])
    print('predictions shape ' + str(predictions[0].shape) + ' ' + str(predictions[1].shape) +
          ' ' + str(predictions[2].shape) + ' ' + str(predictions[3].shape))

    for e in range(epochs):
    
        for i in range(0,n_batches):
            imgs = x_train[i*batch_size:(i+1)*batch_size]
            labels = y_train[i*batch_size:(i+1)*batch_size]
            
            tmp_labels1 = K.zeros((batch_size,32,32,16))
            tmp_labels2 = K.zeros((batch_size,16,16,32))
            tmp_labels3 = K.zeros((batch_size,8,8,64))
            
            loss = model.train_on_batch(imgs,[labels, tmp_labels1, tmp_labels2, tmp_labels3])
            
            print("Epoch: " + str(e+1) + " batch " + str(i) + " train loss: " + str(loss))
            
        #score = model.evaluate(x_test, y_test, verbose=0)
        print('After epoch ' + str(e+1) + ' test loss ' + str(score) + ' test accuracy ' + str(score))

def main():
    trainNetwork(5)
    
main()

CIFAR10 loaded
CIFAR10 shape: (32, 32, 3)




First activation summary




Second activation summary




Third activation summary
Wide Residual Network-16-1 created.


ValueError: Invalid argument "metric" passed to K.function with TensorFlow backend

In [10]:
# two ways: use intermediate layers or create multiple models, try the second one, this works

from __future__ import print_function, division

from keras.datasets import mnist
from keras.layers import Input, Dense, Reshape, Flatten, Dropout
from keras.layers import BatchNormalization, Activation, ZeroPadding2D
from keras.layers.advanced_activations import LeakyReLU
from keras.layers.convolutional import UpSampling2D, Conv2D
from keras.models import Sequential, Model
from keras.optimizers import Adam
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
from keras.datasets import cifar10
from keras.callbacks import LearningRateScheduler
from keras.models import model_from_json
from keras.models import load_model
from keras.losses import KLDivergence
from sklearn.metrics import accuracy_score
import numpy as np
import keras
from keract import get_activations
import math
import time
import matplotlib.pyplot as plt
import sys
import tensorflow as tf
import numpy as np
import sklearn.metrics as metrics

from keras.models import Model
from keras.layers import Input, Add, Activation, Dropout, Flatten, Dense
from keras.layers.convolutional import Convolution2D, MaxPooling2D, AveragePooling2D
from keras.layers.normalization import BatchNormalization
from keras.regularizers import l2
from keras import backend as K

#import wide_residual_network as wrn
from keras.datasets import cifar10
import keras.callbacks as callbacks
import keras.utils.np_utils as kutils
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import plot_model

import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D, Concatenate
from keras import backend as K
from keras.datasets import cifar10
from keras.callbacks import LearningRateScheduler
import numpy as np
from keras import optimizers

import torch.nn.functional as F

weight_decay = 0.0005

def initial_conv(input):
    x = Convolution2D(16, (3, 3), padding='same', kernel_initializer='he_normal',
                      W_regularizer=l2(weight_decay),
                      use_bias=False)(input)

    channel_axis = 1 if K.image_data_format() == "channels_first" else -1

    x = BatchNormalization(axis=channel_axis, momentum=0.1, epsilon=1e-5, gamma_initializer='uniform')(x)
    x = Activation('relu')(x)
    return x


def expand_conv(init, base, k, strides=(1, 1)):
    x = Convolution2D(base * k, (3, 3), padding='same', strides=strides, kernel_initializer='he_normal',
                      W_regularizer=l2(weight_decay),
                      use_bias=False)(init)

    channel_axis = 1 if K.image_data_format() == "channels_first" else -1

    x = BatchNormalization(axis=channel_axis, momentum=0.1, epsilon=1e-5, gamma_initializer='uniform')(x)
    x = Activation('relu')(x)

    x = Convolution2D(base * k, (3, 3), padding='same', kernel_initializer='he_normal',
                      W_regularizer=l2(weight_decay),
                      use_bias=False)(x)

    skip = Convolution2D(base * k, (1, 1), padding='same', strides=strides, kernel_initializer='he_normal',
                      W_regularizer=l2(weight_decay),
                      use_bias=False)(init)

    m = Add()([x, skip])

    return m


def conv1_block(input, k=1, dropout=0.0):
    init = input

    channel_axis = 1 if K.image_data_format() == "channels_first" else -1

    x = BatchNormalization(axis=channel_axis, momentum=0.1, epsilon=1e-5, gamma_initializer='uniform')(input)
    x = Activation('relu')(x)
    x = Convolution2D(16 * k, (3, 3), padding='same', kernel_initializer='he_normal',
                      W_regularizer=l2(weight_decay),
                      use_bias=False)(x)

    if dropout > 0.0: x = Dropout(dropout)(x)

    x = BatchNormalization(axis=channel_axis, momentum=0.1, epsilon=1e-5, gamma_initializer='uniform')(x)
    x = Activation('relu')(x)
    x = Convolution2D(16 * k, (3, 3), padding='same', kernel_initializer='he_normal',
                      W_regularizer=l2(weight_decay),
                      use_bias=False)(x)

    m = Add()([init, x])
    return m

def conv2_block(input, k=1, dropout=0.0):
    init = input

    #channel_axis = 1 if K.image_dim_ordering() == "th" else -1
    channel_axis = -1

    x = BatchNormalization(axis=channel_axis, momentum=0.1, epsilon=1e-5, gamma_initializer='uniform')(input)
    x = Activation('relu')(x)
    x = Convolution2D(32 * k, (3, 3), padding='same', kernel_initializer='he_normal',
                      W_regularizer=l2(weight_decay),
                      use_bias=False)(x)

    if dropout > 0.0: x = Dropout(dropout)(x)

    x = BatchNormalization(axis=channel_axis, momentum=0.1, epsilon=1e-5, gamma_initializer='uniform')(x)
    x = Activation('relu')(x)
    x = Convolution2D(32 * k, (3, 3), padding='same', kernel_initializer='he_normal',
                      W_regularizer=l2(weight_decay),
                      use_bias=False)(x)

    m = Add()([init, x])
    return m

def conv3_block(input, k=1, dropout=0.0):
    init = input

    #channel_axis = 1 if K.image_dim_ordering() == "th" else -1
    channel_axis = -1
    
    x = BatchNormalization(axis=channel_axis, momentum=0.1, epsilon=1e-5, gamma_initializer='uniform')(input)
    x = Activation('relu')(x)
    x = Convolution2D(64 * k, (3, 3), padding='same', kernel_initializer='he_normal',
                      W_regularizer=l2(weight_decay),
                      use_bias=False)(x)

    if dropout > 0.0: x = Dropout(dropout)(x)

    x = BatchNormalization(axis=channel_axis, momentum=0.1, epsilon=1e-5, gamma_initializer='uniform')(x)
    x = Activation('relu')(x)
    x = Convolution2D(64 * k, (3, 3), padding='same', kernel_initializer='he_normal',
                      W_regularizer=l2(weight_decay),
                      use_bias=False)(x)

    m = Add()([init, x])
    return m

def create_wide_residual_network(input_dim, nb_classes=100, N=2, k=1, dropout=0.0, verbose=1):
    """
    Creates a Wide Residual Network with specified parameters

    :param input: Input Keras object
    :param nb_classes: Number of output classes
    :param N: Depth of the network. Compute N = (n - 4) / 6.
              Example : For a depth of 16, n = 16, N = (16 - 4) / 6 = 2
              Example2: For a depth of 28, n = 28, N = (28 - 4) / 6 = 4
              Example3: For a depth of 40, n = 40, N = (40 - 4) / 6 = 6
    :param k: Width of the network.
    :param dropout: Adds dropout if value is greater than 0.0
    :param verbose: Debug info to describe created WRN
    :return:
    """
    channel_axis = 1 if K.image_data_format() == "channels_first" else -1

    ip = Input(shape=input_dim)

    x = initial_conv(ip)
    nb_conv = 4

    x = expand_conv(x, 16, k)
    nb_conv += 2

    for i in range(N - 1):
        x = conv1_block(x, k, dropout)
        nb_conv += 2

    print('First activation summary')
    out1 = x
        
    x = BatchNormalization(axis=channel_axis, momentum=0.1, epsilon=1e-5, gamma_initializer='uniform')(x)
    x = Activation('relu')(x)

    x = expand_conv(x, 32, k, strides=(2, 2))
    nb_conv += 2
    
    

    for i in range(N - 1):
        x = conv2_block(x, k, dropout)
        nb_conv += 2
        
    print('Second activation summary')
    out2 = x

    x = BatchNormalization(axis=channel_axis, momentum=0.1, epsilon=1e-5, gamma_initializer='uniform')(x)
    x = Activation('relu')(x)

    x = expand_conv(x, 64, k, strides=(2, 2))
    nb_conv += 2

    for i in range(N - 1):
        x = conv3_block(x, k, dropout)
        nb_conv += 2
    
    print('Third activation summary')
    out3 = x

    x = BatchNormalization(axis=channel_axis, momentum=0.1, epsilon=1e-5, gamma_initializer='uniform')(x)
    x = Activation('relu')(x)

    x = AveragePooling2D((8, 8))(x)
    x = Flatten()(x)

    x = Dense(nb_classes, W_regularizer=l2(weight_decay), activation='softmax')(x)
    
    #model = Model(ip, [x,out1,out2,out3])
    model = Model(ip,x)
    
    m1 = Model(ip,[out1,x])
    m2 = Model(ip,[out2,x])
    m3 = Model(ip,[out3,x])

    if verbose: print("Wide Residual Network-%d-%d created." % (nb_conv, k))
    return model,m1,m2,m3


'''
Function that returns the trainand test data of the CIFAR10 already preprocessed
'''
def getCIFAR10():
    # input image dimensions
    img_rows, img_cols = 32, 32
    num_classes = 10

    # the data, split between train and test sets
    (x_train, y_train), (x_test, y_test) = cifar10.load_data()
    
    # format of the tensor
    if K.image_data_format() == 'channels_first':
        x_train = x_train.reshape(x_train.shape[0], 3, img_rows, img_cols)
        x_test = x_test.reshape(x_test.shape[0], 3, img_rows, img_cols)
        input_shape = (3, img_rows, img_cols)
    else:
        x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 3)
        x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 3)
        input_shape = (img_rows, img_cols, 3)

    # convert in to float the images
    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')
    
    # new normalization with z-score
    mean = np.mean(x_train,axis=(0,1,2,3))
    std = np.std(x_train,axis=(0,1,2,3))
    x_train = (x_train-mean)/(std+1e-7)
    x_test = (x_test-mean)/(std+1e-7)
    
    # convert class vectors to binary class matrices
    y_train = keras.utils.to_categorical(y_train, num_classes)
    y_test = keras.utils.to_categorical(y_test, num_classes)
    
    print('CIFAR10 loaded')
    return x_train,y_train,x_test,y_test

'''
Small function that returns the shape of the CIFAR10 images
'''
def getCIFAR10InputShape():
    img_rows, img_cols = 32, 32
    if K.image_data_format() == 'channels_first':
        input_shape = (3, img_rows, img_cols)
    else:
        input_shape = (img_rows, img_cols, 3)
        
    return input_shape

def getNetwork(input_shape):
    model=create_wide_residual_network(input_shape, 10, N=2, k=1, dropout=0.)
    #plot_model(model, "WRN-16-1.png", show_shapes=True, show_layer_names=True)
    return model;

def myLoss(y_true,y_pred):
    
    real_pred = y_pred
    real_true = y_true
    
    loss = keras.losses.categorical_crossentropy(real_true,real_pred)
    
    return loss;

def useless_loss(y_true,y_pred):
    
    zer = K.zeros(1)
    
    return zer

def myaccuracy(y_true, y_pred):
    
    real_pred = y_pred[0]
    real_true = y_true[0]
    
    return keras.metrics.categorical_accuracy(y_true, y_pred)

'''
Function to try to train the network on CIFAR10
'''
def trainNetwork(epochs):
    batch_size = 128
    epochs = 2
    x_train,y_train,x_test,y_test = getCIFAR10()
    
    input_shape = getCIFAR10InputShape()
    print('CIFAR10 shape: ' + str(input_shape))
    
    model,m1,m2,m3 = getNetwork(input_shape)
    
    model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["acc"])
    
    m1.compile(loss=[useless_loss,useless_loss],optimizer='sgd')
    m2.compile(loss=[useless_loss,useless_loss],optimizer='sgd')
    m3.compile(loss=[useless_loss,useless_loss],optimizer='sgd')
    
    model.fit(x_train, y_train,
              batch_size=batch_size,
              epochs=epochs,
              validation_data=(x_test, y_test),
              shuffle=True)
    
    model_predictions = model.predict(x_train[0:1])
    m1_predictions = m1.predict(x_train[0:1])
    m2_predictions = m2.predict(x_train[0:1])
    m3_predictions = m3.predict(x_train[0:1])
    
    print('Full model predictions:')
    print(str(model_predictions))
    print("Model 1 predictions:")
    print(str(m1_predictions))
    
    print("Model 2 predictions:")
    print(str(m2_predictions))
    
    print("Model 3 predictions:")
    print(str(m3_predictions))

def main():
    trainNetwork(5)
    
main()

CIFAR10 loaded
CIFAR10 shape: (32, 32, 3)




First activation summary




Second activation summary




Third activation summary
Wide Residual Network-16-1 created.
Train on 50000 samples, validate on 10000 samples
Epoch 1/2
Epoch 2/2
Full model predictions:
[[8.7697332e-04 4.2944841e-04 3.8611881e-02 4.5042118e-01 2.0880094e-02
  3.0573282e-01 1.6604018e-01 1.6066436e-02 5.2239379e-04 4.1856553e-04]]
Model 1 predictions:
[array([[[[ 0.1077102 , -0.0100246 ,  0.1954115 , ...,  0.01236693,
          -0.11393171,  0.00953884],
         [ 0.08172166,  0.12524375,  0.15642695, ...,  0.07260023,
          -0.04846612, -0.00792598],
         [ 0.04368145,  0.08249444,  0.14672537, ...,  0.06307021,
           0.0423757 ,  0.03603948],
         ...,
         [ 0.02156087,  0.02610946,  0.08266972, ...,  0.02305071,
           0.05705534,  0.02804315],
         [ 0.0058015 ,  0.06322838,  0.08621318, ...,  0.03126223,
           0.08345835,  0.08404981],
         [ 0.01454124, -0.02253578,  0.12709   , ...,  0.02832267,
           0.05062149,  0.10027261]],

        [[ 0.14359112,  0.02728691,  

In [14]:
# try to train two model with the same in/out, it updates the weights of both

from __future__ import print_function, division

from keras.datasets import mnist
from keras.layers import Input, Dense, Reshape, Flatten, Dropout
from keras.layers import BatchNormalization, Activation, ZeroPadding2D
from keras.layers.advanced_activations import LeakyReLU
from keras.layers.convolutional import UpSampling2D, Conv2D
from keras.models import Sequential, Model
from keras.optimizers import Adam
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
from keras.datasets import cifar10
from keras.callbacks import LearningRateScheduler
from keras.models import model_from_json
from keras.models import load_model
from keras.losses import KLDivergence
from sklearn.metrics import accuracy_score
import numpy as np
import keras
from keract import get_activations
import math
import time
import matplotlib.pyplot as plt
import sys
import tensorflow as tf
import numpy as np
import sklearn.metrics as metrics

from keras.models import Model
from keras.layers import Input, Add, Activation, Dropout, Flatten, Dense
from keras.layers.convolutional import Convolution2D, MaxPooling2D, AveragePooling2D
from keras.layers.normalization import BatchNormalization
from keras.regularizers import l2
from keras import backend as K

#import wide_residual_network as wrn
from keras.datasets import cifar10
import keras.callbacks as callbacks
import keras.utils.np_utils as kutils
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import plot_model

import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D, Concatenate
from keras import backend as K
from keras.datasets import cifar10
from keras.callbacks import LearningRateScheduler
import numpy as np
from keras import optimizers

import torch.nn.functional as F

weight_decay = 0.0005

def initial_conv(input):
    x = Convolution2D(16, (3, 3), padding='same', kernel_initializer='he_normal',
                      W_regularizer=l2(weight_decay),
                      use_bias=False)(input)

    channel_axis = 1 if K.image_data_format() == "channels_first" else -1

    x = BatchNormalization(axis=channel_axis, momentum=0.1, epsilon=1e-5, gamma_initializer='uniform')(x)
    x = Activation('relu')(x)
    return x


def expand_conv(init, base, k, strides=(1, 1)):
    x = Convolution2D(base * k, (3, 3), padding='same', strides=strides, kernel_initializer='he_normal',
                      W_regularizer=l2(weight_decay),
                      use_bias=False)(init)

    channel_axis = 1 if K.image_data_format() == "channels_first" else -1

    x = BatchNormalization(axis=channel_axis, momentum=0.1, epsilon=1e-5, gamma_initializer='uniform')(x)
    x = Activation('relu')(x)

    x = Convolution2D(base * k, (3, 3), padding='same', kernel_initializer='he_normal',
                      W_regularizer=l2(weight_decay),
                      use_bias=False)(x)

    skip = Convolution2D(base * k, (1, 1), padding='same', strides=strides, kernel_initializer='he_normal',
                      W_regularizer=l2(weight_decay),
                      use_bias=False)(init)

    m = Add()([x, skip])

    return m


def conv1_block(input, k=1, dropout=0.0):
    init = input

    channel_axis = 1 if K.image_data_format() == "channels_first" else -1

    x = BatchNormalization(axis=channel_axis, momentum=0.1, epsilon=1e-5, gamma_initializer='uniform')(input)
    x = Activation('relu')(x)
    x = Convolution2D(16 * k, (3, 3), padding='same', kernel_initializer='he_normal',
                      W_regularizer=l2(weight_decay),
                      use_bias=False)(x)

    if dropout > 0.0: x = Dropout(dropout)(x)

    x = BatchNormalization(axis=channel_axis, momentum=0.1, epsilon=1e-5, gamma_initializer='uniform')(x)
    x = Activation('relu')(x)
    x = Convolution2D(16 * k, (3, 3), padding='same', kernel_initializer='he_normal',
                      W_regularizer=l2(weight_decay),
                      use_bias=False)(x)

    m = Add()([init, x])
    return m

def conv2_block(input, k=1, dropout=0.0):
    init = input

    #channel_axis = 1 if K.image_dim_ordering() == "th" else -1
    channel_axis = -1

    x = BatchNormalization(axis=channel_axis, momentum=0.1, epsilon=1e-5, gamma_initializer='uniform')(input)
    x = Activation('relu')(x)
    x = Convolution2D(32 * k, (3, 3), padding='same', kernel_initializer='he_normal',
                      W_regularizer=l2(weight_decay),
                      use_bias=False)(x)

    if dropout > 0.0: x = Dropout(dropout)(x)

    x = BatchNormalization(axis=channel_axis, momentum=0.1, epsilon=1e-5, gamma_initializer='uniform')(x)
    x = Activation('relu')(x)
    x = Convolution2D(32 * k, (3, 3), padding='same', kernel_initializer='he_normal',
                      W_regularizer=l2(weight_decay),
                      use_bias=False)(x)

    m = Add()([init, x])
    return m

def conv3_block(input, k=1, dropout=0.0):
    init = input

    #channel_axis = 1 if K.image_dim_ordering() == "th" else -1
    channel_axis = -1
    
    x = BatchNormalization(axis=channel_axis, momentum=0.1, epsilon=1e-5, gamma_initializer='uniform')(input)
    x = Activation('relu')(x)
    x = Convolution2D(64 * k, (3, 3), padding='same', kernel_initializer='he_normal',
                      W_regularizer=l2(weight_decay),
                      use_bias=False)(x)

    if dropout > 0.0: x = Dropout(dropout)(x)

    x = BatchNormalization(axis=channel_axis, momentum=0.1, epsilon=1e-5, gamma_initializer='uniform')(x)
    x = Activation('relu')(x)
    x = Convolution2D(64 * k, (3, 3), padding='same', kernel_initializer='he_normal',
                      W_regularizer=l2(weight_decay),
                      use_bias=False)(x)

    m = Add()([init, x])
    return m

def create_wide_residual_network(input_dim, nb_classes=100, N=2, k=1, dropout=0.0, verbose=1):
    """
    Creates a Wide Residual Network with specified parameters

    :param input: Input Keras object
    :param nb_classes: Number of output classes
    :param N: Depth of the network. Compute N = (n - 4) / 6.
              Example : For a depth of 16, n = 16, N = (16 - 4) / 6 = 2
              Example2: For a depth of 28, n = 28, N = (28 - 4) / 6 = 4
              Example3: For a depth of 40, n = 40, N = (40 - 4) / 6 = 6
    :param k: Width of the network.
    :param dropout: Adds dropout if value is greater than 0.0
    :param verbose: Debug info to describe created WRN
    :return:
    """
    channel_axis = 1 if K.image_data_format() == "channels_first" else -1

    ip = Input(shape=input_dim)

    x = initial_conv(ip)
    nb_conv = 4

    x = expand_conv(x, 16, k)
    nb_conv += 2

    for i in range(N - 1):
        x = conv1_block(x, k, dropout)
        nb_conv += 2

    print('First activation summary')
    out1 = x
        
    x = BatchNormalization(axis=channel_axis, momentum=0.1, epsilon=1e-5, gamma_initializer='uniform')(x)
    x = Activation('relu')(x)

    x = expand_conv(x, 32, k, strides=(2, 2))
    nb_conv += 2
    
    

    for i in range(N - 1):
        x = conv2_block(x, k, dropout)
        nb_conv += 2
        
    print('Second activation summary')
    out2 = x

    x = BatchNormalization(axis=channel_axis, momentum=0.1, epsilon=1e-5, gamma_initializer='uniform')(x)
    x = Activation('relu')(x)

    x = expand_conv(x, 64, k, strides=(2, 2))
    nb_conv += 2

    for i in range(N - 1):
        x = conv3_block(x, k, dropout)
        nb_conv += 2
    
    print('Third activation summary')
    out3 = x

    x = BatchNormalization(axis=channel_axis, momentum=0.1, epsilon=1e-5, gamma_initializer='uniform')(x)
    x = Activation('relu')(x)

    x = AveragePooling2D((8, 8))(x)
    x = Flatten()(x)

    x = Dense(nb_classes, W_regularizer=l2(weight_decay), activation='softmax')(x)
    
    #model = Model(ip, [x,out1,out2,out3])
    model = Model(ip,x)
    
    m1 = Model(ip,x)

    return model,m1


'''
Function that returns the trainand test data of the CIFAR10 already preprocessed
'''
def getCIFAR10():
    # input image dimensions
    img_rows, img_cols = 32, 32
    num_classes = 10

    # the data, split between train and test sets
    (x_train, y_train), (x_test, y_test) = cifar10.load_data()
    
    # format of the tensor
    if K.image_data_format() == 'channels_first':
        x_train = x_train.reshape(x_train.shape[0], 3, img_rows, img_cols)
        x_test = x_test.reshape(x_test.shape[0], 3, img_rows, img_cols)
        input_shape = (3, img_rows, img_cols)
    else:
        x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 3)
        x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 3)
        input_shape = (img_rows, img_cols, 3)

    # convert in to float the images
    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')
    
    # new normalization with z-score
    mean = np.mean(x_train,axis=(0,1,2,3))
    std = np.std(x_train,axis=(0,1,2,3))
    x_train = (x_train-mean)/(std+1e-7)
    x_test = (x_test-mean)/(std+1e-7)
    
    # convert class vectors to binary class matrices
    y_train = keras.utils.to_categorical(y_train, num_classes)
    y_test = keras.utils.to_categorical(y_test, num_classes)
    
    print('CIFAR10 loaded')
    return x_train,y_train,x_test,y_test

'''
Small function that returns the shape of the CIFAR10 images
'''
def getCIFAR10InputShape():
    img_rows, img_cols = 32, 32
    if K.image_data_format() == 'channels_first':
        input_shape = (3, img_rows, img_cols)
    else:
        input_shape = (img_rows, img_cols, 3)
        
    return input_shape

def getNetwork(input_shape):
    model=create_wide_residual_network(input_shape, 10, N=2, k=1, dropout=0.)
    #plot_model(model, "WRN-16-1.png", show_shapes=True, show_layer_names=True)
    return model;

def myLoss(y_true,y_pred):
    
    real_pred = y_pred
    real_true = y_true
    
    loss = keras.losses.categorical_crossentropy(real_true,real_pred)
    
    return loss;

def useless_loss(y_true,y_pred):
    
    zer = K.zeros(1)
    
    return zer

def myaccuracy(y_true, y_pred):
    
    real_pred = y_pred[0]
    real_true = y_true[0]
    
    return keras.metrics.categorical_accuracy(y_true, y_pred)

'''
Function to try to train the network on CIFAR10
'''
def trainNetwork(epochs):
    
    x_train,y_train,x_test,y_test = getCIFAR10()
    
    batch_size = 128
    epochs = 3
    n_batches = math.floor( x_train.shape[0] / batch_size)
    
    input_shape = getCIFAR10InputShape()
    print('CIFAR10 shape: ' + str(input_shape))
    
    model,m1 = getNetwork(input_shape)
    
    model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["acc"])
    
    m1.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["acc"])
    
    print('Untrained')
    model_predictions = model.predict(x_train[0:1])
    m1_predictions = m1.predict(x_train[0:1])
    
    print('Full model predictions:')
    print(str(model_predictions))
    print("Model 1 predictions:")
    print(str(m1_predictions))
    
    model.fit(x_train, y_train,
              batch_size=batch_size,
              epochs=epochs,
              validation_data=(x_test, y_test),
              shuffle=True)
    
    print('After full model training')
    model_predictions = model.predict(x_train[0:1])
    m1_predictions = m1.predict(x_train[0:1])
    
    print('Full model predictions:')
    print(str(model_predictions))
    print("Model 1 predictions:")
    print(str(m1_predictions))
    
    m1.fit(x_train, y_train,
              batch_size=batch_size,
              epochs=epochs,
              validation_data=(x_test, y_test),
              shuffle=True)
    
    print('After model 1 training')
    model_predictions = model.predict(x_train[0:1])
    m1_predictions = m1.predict(x_train[0:1])
    
    print('Full model predictions:')
    print(str(model_predictions))
    print("Model 1 predictions:")
    print(str(m1_predictions))

def main():
    trainNetwork(5)
    
main()

CIFAR10 loaded
CIFAR10 shape: (32, 32, 3)




First activation summary




Second activation summary




Third activation summary
Untrained
Full model predictions:
[[0.09999997 0.10000002 0.10000002 0.10000012 0.09999997 0.09999996
  0.09999995 0.10000005 0.09999994 0.09999998]]
Model 1 predictions:
Train on 50000 samples, validate on 10000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
After full model training
Full model predictions:
[[2.0413179e-04 8.8914043e-05 1.0133072e-02 2.6488888e-01 2.2332121e-02
  3.7137428e-01 3.2029131e-01 1.0307656e-02 2.8539321e-04 9.4255782e-05]]
Model 1 predictions:
[[2.0413179e-04 8.8914043e-05 1.0133072e-02 2.6488888e-01 2.2332121e-02
  3.7137428e-01 3.2029131e-01 1.0307656e-02 2.8539321e-04 9.4255782e-05]]
Train on 50000 samples, validate on 10000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
After model 1 training
Full model predictions:
[[8.4435480e-04 1.7966880e-04 5.5168971e-02 6.6091329e-02 1.0201749e-02
  8.5112095e-02 7.7819419e-01 3.6718380e-03 3.1834468e-04 2.1749425e-04]]
Model 1 predictions:
[[8.4435480e-04 1.7966880e-04 5.5168971e-02 6.6091329e-02 1.0201749