<img src="https://www.gov.br/cbpf/pt-br/assuntos/noticias/inscricao-para-duas-bolsas-pibic-ate-terca-09/cbpf-1.png"  width="500" />

<font size="5"> Implementing Transfer Learning to N>3 Channels Imagery and Removing Pre-processing Layers from Tensorflow.applications

    
<font size="2">
This is a tutorial Notebook of how to solve two problems:
    
    1) How to implemente transfer Learning, using the weights of IMAGENET to images with N>3 channels.
    
    2) How to Remove the peprocessing layers loaded with Tensorflow models
    
    Neste Notebook iremos focar em dois problemas:
  
1) As redes neurais Convolucionais pré carregadas do tensorflow, com os pesos da imagenet, só aceitam imagens com 3 canais (RGB).

2) As CNN carregadas do **TF** vêm com camadas de pré processamento embutidas. 

Objetivo: Um tutorial de como podemos retirar essa camada de pré 
processamento, e adaptar a rede para que ela possa ser usada com N canais.
Os pesos associados aos canais extras serão a média dos valores dos pesos da Imagenet para os canais originais.


    
***Este código funciona em qualquer rede disponível no TensorFlow.applications 2.8.0***
    
This code was developed by Phelipe Darc.

    
    Please do not remove this disclaimer.


In [2]:
import os
import time
import tensorflow as tf
import pandas as pd
from astropy.io import fits
from astropy.table import Table
import matplotlib.pyplot as plt
from tensorflow.keras import backend as k
from tensorflow.keras.models import Sequential , Model, load_model
from tensorflow.keras.layers import BatchNormalization, SeparableConv2D, MaxPooling2D, Activation, Flatten, Dropout, Dense, Conv2D,LeakyReLU,GlobalMaxPooling2D, Input
from tensorflow.python.keras.utils import np_utils
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import h5py
import numpy as np

# Creating a EfficientNetB4 with 5 channels and No preprocessing Layers.

## Changing the input layer:


### 1) Removing the Resscaling and Normalization Layers




Deletando as camadas de Pré-processamento:
1) Reescaling 
2) Normalization

In [4]:
#You can choose any keras.applications you want:
efib4 = tf.keras.applications.efficientnet.EfficientNetB4(weights='imagenet', include_top=False)

Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb4_notop.h5


In [5]:
#Printing the model
efib4.summary()

Model: "efficientnetb4"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_2 (InputLayer)           [(None, None, None,  0           []                               
                                 3)]                                                              
                                                                                                  
 rescaling_1 (Rescaling)        (None, None, None,   0           ['input_2[0][0]']                
                                3)                                                                
                                                                                                  
 normalization_1 (Normalization  (None, None, None,   7          ['rescaling_1[0][0]']            
 )                              3)                                                   

### Saving the IMAGENET weights for future comparison

Salvando os pesos da Imagenet - Isso é feito para futura comparações.

In [16]:
pesos=efib4.get_weights()

In [7]:
pesos

[array([0.485, 0.456, 0.406], dtype=float32),
 array([0.229, 0.224, 0.225], dtype=float32),
 0,
 array([[[[ 2.20859125e-02, -3.54408383e-01,  2.98673540e-01, ...,
            6.43759593e-02,  2.34505665e-02,  4.39255893e-01],
          [-3.61267403e-02, -7.12899044e-02,  1.16172409e+00, ...,
            5.87348402e-01, -5.32432683e-02,  1.18248723e-01],
          [ 5.13000607e-01,  4.16596770e-01,  2.34334469e-01, ...,
            6.27864450e-02, -6.46309376e-01, -8.16539675e-02]],
 
         [[-1.46005638e-02, -5.61126351e-01,  4.57988791e-02, ...,
            1.55987814e-01, -9.91783515e-02,  5.74809253e-01],
          [ 3.91144641e-02, -1.02806643e-01,  3.46348703e-01, ...,
            4.67721641e-01, -1.00331023e-01,  1.43342853e-01],
          [ 5.97358584e-01,  5.83681822e-01,  2.12075412e-02, ...,
            1.02894828e-02, -8.32731366e-01,  4.01173867e-02]],
 
         [[-1.46421671e-01, -2.48344451e-01, -1.01743698e-01, ...,
            1.20773181e-01,  9.61307958e-02,  2.635

In [8]:
len(efib4.layers)

474

In [9]:
import tensorflow as tf
def split(model, start, end):
    '''
    Split and remove the model layers
    
    -<keras.model> - model - Model to be changed.
    
    -<int> - start - Number of the initial layer to be saved.
    
    -<int> - end - Number of the last layer to be saved.
    
    '''
    confs = model.get_config()
    kept_layers = set()
    for i, l in enumerate(confs['layers']):
        if i == 0:
            confs['layers'][0]['config']['batch_input_shape'] = model.layers[start].input_shape
            if i != start:
                #confs['layers'][0]['name'] += str(random.randint(0, 100000000)) # rename the input layer to avoid conflicts on merge
                confs['layers'][0]['config']['name'] = confs['layers'][0]['name']
        elif i < start or i > end:
            continue
        kept_layers.add(l['name'])
    # filter layers
    layers = [l for l in confs['layers'] if l['name'] in kept_layers]
    layers[1]['inbound_nodes'][0][0][0] = layers[0]['name']
    # set conf
    confs['layers'] = layers
    confs['input_layers'][0][0] = layers[0]['name']
    confs['output_layers'][0][0] = layers[-1]['name']
    # create new model
    submodel = tf.keras.Model.from_config(confs)
    for l in submodel.layers:
        orig_l = model.get_layer(l.name)
        if orig_l is not None:
            l.set_weights(orig_l.get_weights())
    return submodel

# first layer = 3 (first conv), last layer = len(model.layers)
efib4_2 = split(efib4, 3, len(efib4.layers))
efib4_2.summary()

Model: "efficientnetb4"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_2 (InputLayer)           [(None, None, None,  0           []                               
                                 3)]                                                              
                                                                                                  
 stem_conv_pad (ZeroPadding2D)  (None, None, None,   0           ['input_2[0][0]']                
                                3)                                                                
                                                                                                  
 stem_conv (Conv2D)             (None, None, None,   1296        ['stem_conv_pad[0][0]']          
                                48)                                                  

 - As you can see, we droped the Normalization and reescaling layer.

*Foram removidas as camadas de Normalization e reescaling.*

*Note que os pesos não foram reinicializados.*

In [11]:
pesos2=efib4_2.get_weights()
pesos2

[array([[[[ 2.20859125e-02, -3.54408383e-01,  2.98673540e-01, ...,
            6.43759593e-02,  2.34505665e-02,  4.39255893e-01],
          [-3.61267403e-02, -7.12899044e-02,  1.16172409e+00, ...,
            5.87348402e-01, -5.32432683e-02,  1.18248723e-01],
          [ 5.13000607e-01,  4.16596770e-01,  2.34334469e-01, ...,
            6.27864450e-02, -6.46309376e-01, -8.16539675e-02]],
 
         [[-1.46005638e-02, -5.61126351e-01,  4.57988791e-02, ...,
            1.55987814e-01, -9.91783515e-02,  5.74809253e-01],
          [ 3.91144641e-02, -1.02806643e-01,  3.46348703e-01, ...,
            4.67721641e-01, -1.00331023e-01,  1.43342853e-01],
          [ 5.97358584e-01,  5.83681822e-01,  2.12075412e-02, ...,
            1.02894828e-02, -8.32731366e-01,  4.01173867e-02]],
 
         [[-1.46421671e-01, -2.48344451e-01, -1.01743698e-01, ...,
            1.20773181e-01,  9.61307958e-02,  2.63529241e-01],
          [-7.12019205e-02,  2.53082924e-02,  8.20219889e-03, ...,
            4.138

In [16]:
pesos2[0]==pesos[3]

array([[[[ True,  True,  True,  True,  True,  True,  True,  True,
           True,  True,  True,  True,  True,  True,  True,  True,
           True,  True,  True,  True,  True,  True,  True,  True,
           True,  True,  True,  True,  True,  True,  True,  True],
         [ True,  True,  True,  True,  True,  True,  True,  True,
           True,  True,  True,  True,  True,  True,  True,  True,
           True,  True,  True,  True,  True,  True,  True,  True,
           True,  True,  True,  True,  True,  True,  True,  True],
         [ True,  True,  True,  True,  True,  True,  True,  True,
           True,  True,  True,  True,  True,  True,  True,  True,
           True,  True,  True,  True,  True,  True,  True,  True,
           True,  True,  True,  True,  True,  True,  True,  True]],

        [[ True,  True,  True,  True,  True,  True,  True,  True,
           True,  True,  True,  True,  True,  True,  True,  True,
           True,  True,  True,  True,  True,  True,  True,  True,
     

## 2) Change input shape to new dimensions
The Input shape will be  = (None, H, W, Channels)

*O Input Shape vai ser no formato = (None, Altura, Largura, canais)*

In [12]:
config = efib4_2.get_config()

In [13]:
config

{'name': 'efficientnetb4',
 'layers': [{'class_name': 'InputLayer',
   'config': {'batch_input_shape': (None, None, None, 3),
    'dtype': 'float32',
    'sparse': False,
    'ragged': False,
    'name': 'input_2'},
   'name': 'input_2',
   'inbound_nodes': []},
  {'class_name': 'ZeroPadding2D',
   'config': {'name': 'stem_conv_pad',
    'trainable': True,
    'dtype': 'float32',
    'padding': ((0, 1), (0, 1)),
    'data_format': 'channels_last'},
   'name': 'stem_conv_pad',
   'inbound_nodes': [[['input_2', 0, 0, {}]]]},
  {'class_name': 'Conv2D',
   'config': {'name': 'stem_conv',
    'trainable': True,
    'dtype': 'float32',
    'filters': 48,
    'kernel_size': (3, 3),
    'strides': (2, 2),
    'padding': 'valid',
    'data_format': 'channels_last',
    'dilation_rate': (1, 1),
    'groups': 1,
    'activation': 'linear',
    'use_bias': False,
    'kernel_initializer': {'class_name': 'VarianceScaling',
     'config': {'scale': 2.0,
      'mode': 'fan_out',
      'distribution':

In [17]:
# Change input shape to new dimensions
config["layers"][0]["config"]["batch_input_shape"] = (None, 913, 913, 5)

In [18]:
efib4_new = tf.keras.models.Model.from_config(config)

### The wieghts are Randomized, because now there are 5(N) channels.
You can see that now the weigths are not the same as IMAGENET or any other started weights.

In [19]:
efib4_new.get_weights()

[array([[[[ 0.11277412, -0.04199104, -0.10009462, ...,  0.12644099,
           -0.04471166, -0.08027443],
          [-0.06684812,  0.07349679, -0.04060726, ..., -0.0015971 ,
            0.01262976, -0.04870388],
          [ 0.00454874, -0.01815088,  0.01214862, ..., -0.04564384,
           -0.00304458,  0.00020945],
          [ 0.06869159,  0.05517098, -0.04953826, ...,  0.05987235,
            0.05791467,  0.02629689],
          [ 0.06782323, -0.03998237,  0.0822259 , ..., -0.04177353,
           -0.02954729, -0.07731159]],
 
         [[ 0.01604284,  0.08185303, -0.11181833, ..., -0.04969283,
           -0.06985378,  0.13870114],
          [-0.05706918,  0.00542395, -0.07123236, ..., -0.0390478 ,
            0.10155996,  0.05301297],
          [ 0.02338686, -0.04087836,  0.14469078, ..., -0.04930027,
            0.0622886 , -0.03895876],
          [-0.08667093,  0.06029704,  0.02651184, ..., -0.05426276,
           -0.05114008,  0.03737027],
          [-0.0192516 ,  0.13759346, -0.075

In [20]:
efib4_new.summary()

Model: "efficientnetb4"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_2 (InputLayer)           [(None, 913, 913, 5  0           []                               
                                )]                                                                
                                                                                                  
 stem_conv_pad (ZeroPadding2D)  (None, 914, 914, 5)  0           ['input_2[0][0]']                
                                                                                                  
 stem_conv (Conv2D)             (None, 456, 456, 48  2160        ['stem_conv_pad[0][0]']          
                                )                                                                 
                                                                                     

## 3) Loading the wheight from IMAGENET and matching the number of channels.

**To do this, we would need to loop through the layers of both the old model and the newly created model and copy over the weights.**

We would run into a problem, however, as the dimensions won’t match. We confirmed earlier changing the number of channels affects the dimensions of the weights. To get around this, we expand the weight dimensions to more accurately represent the increase in channels and copy over the mean of the weights. 

In [21]:
def multify_weights(kernel, out_channels):
    mean_1d = np.mean(kernel, axis=-2).reshape(kernel[:,:,-1:,:].shape)
    tiled = np.tile(mean_1d, (out_channels, 1))
    return(tiled)


def weightify(model_orig, custom_model, layer_modify):
    layer_to_modify = [layer_modify]

    conf = custom_model.get_config()
    layer_names = [conf['layers'][x]['name'] for x in range(len(conf['layers']))]
    for layer in model_orig.layers:
        if layer.name in layer_names:
            if layer.get_weights() != []:
                target_layer = custom_model.get_layer(layer.name)
                if layer.name in layer_to_modify:    
                    kernels = layer.get_weights()[0]
                    #biases  = layer.get_weights()[1]

                    kernels_extra_channel = np.concatenate((kernels,
                                                  multify_weights(kernels, 5 - 3)),
                                                  axis=-2) # For channels_last, and 5 is the input channels
                                                  
                    #target_layer.set_weights([kernels_extra_channel, biases])
                    target_layer.set_weights([kernels_extra_channel])
                    target_layer.trainable = False

                else:
                    target_layer.set_weights(layer.get_weights())
                    target_layer.trainable = False

### The only real change is done only in the first Convolutional Layer.

In [22]:
#Layer i need to change
modify_name = config["layers"][2]["config"]["name"] #1 - conv layer

In [23]:
modify_name

'stem_conv'

- Using the function to load the weights:

In [24]:
weightify(efib4_2, efib4_new,modify_name )

- The weights are "**equal**": 

*Not exactly because the inputs shapes are differents.*

In [25]:
efib4_2.get_weights()[1]==efib4_new.get_weights()[1]

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True])

In [26]:
len(efib4_2.layers), len(efib4_new.layers)

(472, 472)

## 4) Adding the Output Layer matching the problem you are trying to solve:

In [27]:
#Regression:
flat1 = tf.keras.layers.Flatten()(efib4_new.layers[-1].output)
output = (Dense(1,activation='linear'))(flat1)
modelregre = Model(inputs=efib4_new.inputs, outputs=output)

### Final model:

In [28]:
modelregre.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_2 (InputLayer)           [(None, 913, 913, 5  0           []                               
                                )]                                                                
                                                                                                  
 stem_conv_pad (ZeroPadding2D)  (None, 914, 914, 5)  0           ['input_2[0][0]']                
                                                                                                  
 stem_conv (Conv2D)             (None, 456, 456, 48  2160        ['stem_conv_pad[0][0]']          
                                )                                                                 
                                                                                              

### The weights are matching:

In [29]:
modelregre.get_weights()[1]==efib4_new.get_weights()[1]

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True])

In [30]:
modelregre.get_weights()[0]==efib4_new.get_weights()[0]

array([[[[ True,  True,  True, ...,  True,  True,  True],
         [ True,  True,  True, ...,  True,  True,  True],
         [ True,  True,  True, ...,  True,  True,  True],
         [ True,  True,  True, ...,  True,  True,  True],
         [ True,  True,  True, ...,  True,  True,  True]],

        [[ True,  True,  True, ...,  True,  True,  True],
         [ True,  True,  True, ...,  True,  True,  True],
         [ True,  True,  True, ...,  True,  True,  True],
         [ True,  True,  True, ...,  True,  True,  True],
         [ True,  True,  True, ...,  True,  True,  True]],

        [[ True,  True,  True, ...,  True,  True,  True],
         [ True,  True,  True, ...,  True,  True,  True],
         [ True,  True,  True, ...,  True,  True,  True],
         [ True,  True,  True, ...,  True,  True,  True],
         [ True,  True,  True, ...,  True,  True,  True]]],


       [[[ True,  True,  True, ...,  True,  True,  True],
         [ True,  True,  True, ...,  True,  True,  True],
      

## 5) Saving the model:

In [27]:
modelregre.save("efficientnetb4_5ch.h5")



### Loading the model

In [27]:
loaded_model = load_model("efficientnetb4_5ch.h5")



In [51]:
loaded_model.get_weights()[1]==efib4_new.get_weights()[1]

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True])