### Python packages used in this code

In [1]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import random
import os
import pickle
import time
import sklearn
import platform
import sys
from sklearn.base import BaseEstimator, RegressorMixin
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import Ridge, LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import joblib
import itertools
from IPython.display import clear_output

## Keras
import tensorflow
from tensorflow import keras
from tensorflow.keras.models import Sequential, Model, model_from_json
from tensorflow.keras.layers import Dense, Input, Add, Lambda, Dropout, Subtract, Multiply, Concatenate, Dot, BatchNormalization, Activation, LeakyReLU, ReLU
from tensorflow.keras.losses import mse
import keras.backend as K
from tensorflow.keras import regularizers
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
tensorflow.get_logger().setLevel("ERROR")

%matplotlib inline

In [2]:
"""
Environments

--Platform--
OS : Windows-10-10.0.19044-SP0
--Version--
python :  3.9.12 (main, Apr  4 2022, 05:22:27) [MSC v.1916 64 bit (AMD64)]
numpy : 1.23.1
pandas : 1.4.3
sklearn : 1.1.1
tensorflow : 2.9.1
keras : 2.9.0
"""

print('--Platform--')
print('OS :', platform.platform())
print('--Version--')
print('python : ', sys.version)
print('numpy :', np.__version__)
print('pandas :', pd.__version__)
print('sklearn :', sklearn.__version__)
print('tensorflow :', tensorflow.__version__)
print('keras :', keras.__version__)

--Platform--
OS : Windows-10-10.0.19044-SP0
--Version--
python :  3.9.12 (main, Apr  4 2022, 05:22:27) [MSC v.1916 64 bit (AMD64)]
numpy : 1.23.1
pandas : 1.4.3
sklearn : 1.1.1
tensorflow : 2.9.1
keras : 2.9.0


# Preparation

## Define the model class

### Without transfer

In [3]:
class cls_WithoutTL_NN(BaseEstimator, RegressorMixin):
    def __init__(self, dr_rate=0, layer1=4, decay_rate1=0.5, decay_rate2=0.5, l2_lambda=0.01, learning_rate=0.01, epochs=10):
        """
        Define the neural network model without transfer.
        
        Parameters
        ----------
            dr_rate       : dropout rate
            layer1        : width of the 1st layer
            decay_rate1   : rate of the width decay from the 1st layer to the 2nd layer
            decay_rate2   : rate of the width decay from the 2nd layer to the last layer
            l2_lambda     : l2-reguralization parameter
            learning_rate : learning rate for Adam
            epochs        : learning epochs            
        """
        self.dr_rate = dr_rate
        self.layer1 = layer1
        self.decay_rate1 = decay_rate1
        self.decay_rate2 = decay_rate2
        self.l2_lambda = l2_lambda
        self.learning_rate = learning_rate
        self.epochs = epochs

    def prop_make_model(self):
        """
        Making the neural network model
        """
        input1 = Input(shape=(dim_x,))

        h = Dense(units=self.layer1, kernel_regularizer=regularizers.L2(l2=self.l2_lambda))(input1)
        h = LeakyReLU(alpha=0.01)(h)
        h = Dropout(self.dr_rate)(h)
        
        h = Dense(units=self.layer2, kernel_regularizer=regularizers.L2(l2=self.l2_lambda))(h)
        h = LeakyReLU(alpha=0.01)(h)
        h = Dropout(self.dr_rate)(h)
        
        out = Dense(units=1, kernel_regularizer=regularizers.L2(l2=self.l2_lambda))(h)

        self.model = Model(inputs=input1, outputs=out)
        optimizer = keras.optimizers.Adam(self.learning_rate)
        self.model.compile(loss='mse',
                     optimizer=optimizer,
                     metrics=['mae', 'mse'])
        return self
    
    def fit(self, X, y=None):
        """
        Model fitting
        
        Required grobal variables
        -----------------------
            dim_x  : dimension of the input
            b_size : batch size
        
        Returns
        -------
            layer1 : width of the 1st layer
            layer2 : width of the 2nd layer
            
            model   : neural network model
            history : training history
        """
        self.layer1 = int(np.floor(dim_x*self.decay_rate1))
        self.layer2 = int(np.floor(self.layer1*self.decay_rate2))
        fix_seed(373)
        self.prop_make_model()
        self.history = self.model.fit(
            X,
            y,
            batch_size=b_size,
            epochs=self.epochs,
            validation_split = 0,
            verbose=0
        )
        K.clear_session()
        return self
    
    def predict(self, X):
        """
        Prediction
        """
        return self.model.predict(X, verbose=0)         
                             
    def score(self, X, y=None):
        """
        Score function for cross-validation
        """
        return -np.sum((y.values - self.predict(X).reshape(-1))**2)
    
    def get_params(self, deep=True):
        """
        Create parameter dictionary for cross-validation
        """
        return {
            'dr_rate' : self.dr_rate,
            'layer1' : self.layer1,
            'decay_rate1' : self.decay_rate1,
            'decay_rate2' : self.decay_rate2,
            'l2_lambda' : self.l2_lambda,
            'learning_rate' : self.learning_rate,
            'epochs' : self.epochs
        }
    
    def set_params(self, **parameters):
        """
        For cross-validation
        """
        for parameter, value in parameters.items():
            setattr(self, parameter, value)
        return self  

### Feature extract

In [4]:
class cls_FeatureExt_NN(BaseEstimator, RegressorMixin):
    def __init__(self, dr_rate=0, layer1=4, decay_rate1=0.5, decay_rate2=0.5, l1_lambda=0.01, l2_lambda=0.01, learning_rate=0.01, epochs=10):
        """
        Define the neural network model for the feature extraction.
        
        Parameters
        ----------
            dr_rate       : dropout rate
            layer1        : width of the 1st layer
            decay_rate1   : rate of the width decay from the 1st layer to the 2nd layer
            decay_rate2   : rate of the width decay from the 2nd layer to the last layer
            l1_lambda     : l1-reguralization parameter
            l2_lambda     : l2-reguralization parameter
            learning_rate : learning rate for Adam
            epochs        : learning epochs            
        """
        self.dr_rate       = dr_rate
        self.layer1        = layer1
        self.decay_rate1   = decay_rate1
        self.decay_rate2   = decay_rate2
        self.l1_lambda     = l1_lambda
        self.l2_lambda     = l2_lambda
        self.learning_rate = learning_rate
        self.epochs        = epochs

    def prop_make_model(self):
        """
        Making the neural network model
        """
        input1 = Input(shape=(num_SourceTasks,))

        h = Dense(units=self.layer1, kernel_regularizer=regularizers.L2(l2=self.l2_lambda))(input1)
        h = LeakyReLU(alpha=0.01)(h)
        h = Dropout(self.dr_rate)(h)
        
        h = Dense(units=self.layer2, kernel_regularizer=regularizers.L2(l2=self.l2_lambda))(h)
        h = LeakyReLU(alpha=0.01)(h)
        h = Dropout(self.dr_rate)(h)
        
        out = Dense(units=1, kernel_regularizer=regularizers.L2(l2=self.l2_lambda))(h)

        self.model = Model(inputs=input1, outputs=out)
        optimizer = keras.optimizers.Adam(self.learning_rate)
        self.model.compile(loss='mse',
                     optimizer=optimizer,
                     metrics=['mae', 'mse'])
        return self
    
    def fit(self, X, y=None):
        """
        Model fitting
        
        Required grobal variables
        -----------------------
            dim_x : dimension of the input
            b_size : batch size
        
        Returns
        -------
            layer1 : width of the 1st layer
            layer2 : width of the 2nd layer
            
            model : neural network model
            history : training history
        """
        self.layer1 = int(np.floor(num_SourceTasks*self.decay_rate1))
        self.layer2 = int(np.floor(self.layer1*self.decay_rate2))
        fix_seed(373)
        self.prop_make_model()
        self.history = self.model.fit(
            X,
            y,
            batch_size = b_size,
            epochs = self.epochs,
            validation_split = 0,
            verbose = 0
        )
        K.clear_session()
        return self
    
    def predict(self, X):
        """
        Prediction
        """
        return self.model.predict(X, verbose=0)         
                             
    def score(self, X, y=None):
        """
        Score function for cross-validation
        """
        return -np.sum((y.values - self.predict(X).reshape(-1))**2)
    
    def get_params(self, deep=True):
        """
        Create parameter dictionary for cross-validation
        """
        return {
            'dr_rate'       : self.dr_rate,
            'layer1'        : self.layer1,
            'decay_rate1'   : self.decay_rate1,
            'decay_rate2'   : self.decay_rate2,
            'l2_lambda'     : self.l2_lambda,
            'learning_rate' : self.learning_rate,
            'epochs'        : self.epochs
        }
    
    def set_params(self, **parameters):
        """
        For cross-validation
        """
        for parameter, value in parameters.items():
            setattr(self, parameter, value)
        return self     

### Proposed method

In [5]:
class cls_InvTrans_NN(BaseEstimator, RegressorMixin):
    def __init__(self, dr_rate=0, layer1=4, layer2=4, layer3=4, 
                 decay_rate1=0.5, decay_rate2=0.5, decay_rate3=0.5,
                 l2_lambda1=0.01, l2_lambda2=0.01, learning_rate=0.01, epochs=10):
        """
        Define the neural network model for affine transfer model.
            y = g_1(f_s(x)) + g_2(f_s(x)) g_3(x)
        
        Parameters
        ----------
            dr_rate       : dropout rate
            layer1        : width of the 1st layer of g1
            layer2        : width of the 1st layer of g2
            layer3        : width of the 1st layer of g3
            decay_rate1   : rate of the width decay for the g1 network
            decay_rate2   : rate of the width decay for the g2 network
            decay_rate3   : rate of the width decay for the g3 network
            l2_lambda1    : l2-reguralization parameter for g1 and g2 network
            l2_lambda2    : l2-reguralization parameter for g3 network
            learning_rate : learning rate for Adam
            epochs        : learning epochs            
        """
        self.dr_rate       = dr_rate
        self.layer1        = layer1
        self.layer2        = layer2
        self.layer3        = layer3
        self.decay_rate1   = decay_rate1
        self.decay_rate2   = decay_rate2
        self.decay_rate3   = decay_rate3
        self.l2_lambda1    = l2_lambda1
        self.l2_lambda2    = l2_lambda2
        self.learning_rate = learning_rate
        self.epochs        = epochs

    def prop_make_model(self):
        """
        Making the neural network model
        """
        input1 = Input(shape=(num_SourceTasks,))
        input2 = Input(shape=(dim_x,))

        h1 = Dense(units=self.layer1, kernel_regularizer=regularizers.L2(l2=self.l2_lambda1))(input1)
        h1 = LeakyReLU(alpha=0.01)(h1)
        h1 = Dropout(self.dr_rate)(h1)
        h1 = Dense(units=1, kernel_regularizer=regularizers.L2(l2=self.l2_lambda1))(h1)
        
        h2 = Dense(units=self.layer2, kernel_regularizer=regularizers.L2(l2=self.l2_lambda1))(input1)
        h2 = LeakyReLU(alpha=0.01)(h2)
        h2 = Dropout(self.dr_rate)(h2)
        h2 = Dense(units=1, kernel_regularizer=regularizers.L2(l2=self.l2_lambda1))(h2)
        
        h = Dense(units=self.layer3, kernel_regularizer=regularizers.L2(l2=self.l2_lambda2))(input2)
        h = LeakyReLU(alpha=0.01)(h)
        h = Dropout(self.dr_rate)(h)
        h = Dense(units=1, kernel_regularizer=regularizers.L2(l2=self.l2_lambda2))(h)

        h = Multiply()([h2, h])
        out = Add()([h1, h])

        self.model = Model(inputs=[input1, input2], outputs=out)

        optimizer = keras.optimizers.Adam(self.learning_rate)

        self.model.compile(
            loss='mse',
            optimizer=optimizer,
            metrics=['mae', 'mse'])
        return self
    
    def fit(self, X, y=None):
        self.X_train = X.iloc[:,:dim_x]
        self.X_source = X.iloc[:,dim_x:]
        
        self.layer1_2 = int(np.floor(self.layer1*self.decay_rate1))
        self.layer2_2 = int(np.floor(self.layer2*self.decay_rate2))
        self.layer3_2 = int(np.floor(self.layer3*self.decay_rate3))
        self.layer1   = int(np.floor(num_SourceTasks*self.decay_rate1))
        self.layer2   = int(np.floor(num_SourceTasks*self.decay_rate1))
        self.layer3   = int(np.floor(dim_x*self.decay_rate3))
        
        fix_seed(373)
        self.prop_make_model()
        self.history = self.model.fit(
            [self.X_source, self.X_train],
            y,
            batch_size = b_size,
            epochs = self.epochs,
            validation_split = 0,
            verbose = 0
        )
        K.clear_session()
        return self
    
    def predict(self, X):
        X_source_pred = X.iloc[:,dim_x:]
        X_train_pred = X.iloc[:,:dim_x]
        return self.model.predict([X_source_pred, X_train_pred], verbose=0)

    def score(self, X, y=None):
        return -np.sum((y.values - self.predict(X).reshape(-1))**2)
    
    def get_params(self, deep=True):
        return {
            'dr_rate'       : self.dr_rate,
            'layer1'        : self.layer1,
            'layer2'        : self.layer2,
            'layer3'        : self.layer3,
            'decay_rate1'   : self.decay_rate1,
            'decay_rate2'   : self.decay_rate2,
            'decay_rate3'   : self.decay_rate3,
            'l2_lambda1'    : self.l2_lambda1,
            'l2_lambda2'    : self.l2_lambda2,
            'learning_rate' : self.learning_rate,
            'epochs'        : self.epochs
        }
    
    def set_params(self, **parameters):
        for parameter, value in parameters.items():
            setattr(self, parameter, value)
        return self 

### Fine-tuning

In [6]:
class cls_FineTune_NN(BaseEstimator, RegressorMixin):
    def __init__(self, learning_rate=0.01, epochs=10, n_frozen=0):
        """
        Define the neural network model for fine-tuning model.
        
        Parameters
        ----------
            learning_rate : learning rate for Adam
            epochs        : learning epochs            
            n_frozen      : number of frozen layers
        """
        self.learning_rate = learning_rate
        self.epochs        = epochs
        self.n_frozen      = n_frozen
    
    def fit(self, X, y=None):
        """
        Model fitting
        
        Required grobal variables
        -----------------------
            dim_x : dimension of the input
            b_size : batch size
        
        Returns
        -------
            layer1 : width of the 1st layer
            layer2 : width of the 2nd layer
            
            model : neural network model
            history : training history
        """
        self.model = model_from_json(open(source_model_path_json, 'r').read())
        self.model.load_weights(source_model_path_hdf5)
        name_layers_dense = [self.model.layers[1].name, self.model.layers[3].name, self.model.layers[5].name]
        if self.n_frozen > 0:
            for i_frz in np.arange(self.n_frozen):
                self.model.get_layer(name_layers_dense).trainable = False

        self.model.compile(loss='mse',
                     optimizer=keras.optimizers.Adam(self.learning_rate),
                     metrics=['mae', 'mse'])
        
        fix_seed(373)
        self.history = self.model.fit(
            X,
            y,
            batch_size = 2,
            epochs = self.epochs,
            validation_split = 0,
            verbose = 0
        )
        K.clear_session()
        return self
    
    def predict(self, X):
        """
        Prediction
        """
        return self.model.predict(X, verbose=0)

    def score(self, X, y=None):
        """
        Score function for cross-validation
        """
        return -np.sum((y.values - self.predict(X).reshape(-1))**2)
    
    def get_params(self, deep=True):
        """
        Create parameter dictionary for cross-validation
        """
        return {
            'learning_rate' : self.learning_rate,
            'epochs'        : self.epochs
        }
    
    def set_params(self, **parameters):
        """
        For cross-validation
        """
        for parameter, value in parameters.items():
            setattr(self, parameter, value)
        return self  

## fix_seed function

In [7]:
def fix_seed(seed):
    # Numpy
    np.random.seed(seed)
    # Tensorflow
    tensorflow.random.set_seed(seed)
    # for built-in random
    random.seed(seed)
    # for hash seed
    os.environ["PYTHONHASHSEED"] = str(seed)

## Make figure function

In [8]:
def MakeFigure_Train(figsize,
                     dict_result,
                     savename,
                     plt_show=True):
    
    plt_row = 2
    plt_col = 4
    
    fig = plt.figure(figsize=figsize)

    y_prd1 = dict_result['y_fits_Wotl']
    y_obs1 = dict_result['y_train']
    y_prd2 = dict_result['y_pred_Wotl']
    y_obs2 = dict_result['y_test']
    ax = fig.add_subplot(plt_row, plt_col, 1, 
                         title='Without transfer', 
                         xlabel='Prediction', 
                         ylabel='Observation')
    ax.scatter(y_prd1, y_obs1, color='steelblue', alpha=0.7)
    ax.scatter(y_prd2, y_obs2, color='darkorange', alpha=1)
    xy_min = min(ax.get_xlim()[0], ax.get_ylim()[0])
    xy_max = max(ax.get_xlim()[1], ax.get_ylim()[1])
    ax.axis('equal')
    ax.axis('square')
    ax.set_xlim([xy_min, xy_max])
    ax.set_ylim([xy_min, xy_max])
    ax.grid(color='gray', linestyle='dotted', linewidth=1, alpha=0.5)
    ax.text(0.53, 0.14, 'Corr : '+str(round(np.corrcoef(y_prd2, y_obs2)[0,1], 4)), size=15, transform=ax.transAxes)
    ax.text(0.53, 0.08, 'MSE : '+str(round(np.mean((y_prd2-y_obs2)**2), 4)), size=15, transform=ax.transAxes)
    ax.text(0.53, 0.02, 'MAE : '+str(round(np.mean(np.abs(y_prd2-y_obs2)), 4)), size=15, transform=ax.transAxes)
    _ = ax.plot([-30000, 30000], [-30000, 30000], color='gray', linewidth=0.5)

    for plt_layer in [1,2,3]:
        y_prd1 = dict_result['y_fits_Ext'+str(plt_layer)]
        y_obs1 = dict_result['y_train']
        y_prd2 = dict_result['y_pred_Ext'+str(plt_layer)]
        y_obs2 = dict_result['y_test']
        ax = fig.add_subplot(plt_row, plt_col, 1+plt_layer, 
                             title='Feature extractor (neural network) : Layer '+str(plt_layer), 
                             xlabel='Prediction', 
                             ylabel='Observation')
        ax.scatter(y_prd1, y_obs1, color='steelblue', alpha=0.7)
        ax.scatter(y_prd2, y_obs2, color='darkorange', alpha=1)
        xy_min = min(ax.get_xlim()[0], ax.get_ylim()[0])
        xy_max = max(ax.get_xlim()[1], ax.get_ylim()[1])
        ax.axis('equal')
        ax.axis('square')
        ax.set_xlim([xy_min, xy_max])
        ax.set_ylim([xy_min, xy_max])
        ax.grid(color='gray', linestyle='dotted', linewidth=1, alpha=0.5)
        ax.text(0.53, 0.14, 'Corr : '+str(round(np.corrcoef(y_prd2, y_obs2)[0,1], 4)), size=15, transform=ax.transAxes)
        ax.text(0.53, 0.08, 'MSE : '+str(round(np.mean((y_prd2-y_obs2)**2), 4)), size=15, transform=ax.transAxes)
        ax.text(0.53, 0.02, 'MAE : '+str(round(np.mean(np.abs(y_prd2-y_obs2)), 4)), size=15, transform=ax.transAxes)
        _ = ax.plot([-30000, 30000], [-30000, 30000], color='gray', linewidth=0.5)

        y_prd1 = dict_result['y_fits_InvNN'+str(plt_layer)]
        y_obs1 = dict_result['y_train']
        y_prd2 = dict_result['y_pred_InvNN'+str(plt_layer)]
        y_obs2 = dict_result['y_test']
        ax = fig.add_subplot(plt_row, plt_col, 5+plt_layer, 
                             title='Proposed method (neural network) : Layer '+str(plt_layer), 
                             xlabel='Prediction', 
                             ylabel='Observation')
        ax.scatter(y_prd1, y_obs1, color='steelblue', alpha=0.7)
        ax.scatter(y_prd2, y_obs2, color='darkorange', alpha=1)
        xy_min = min(ax.get_xlim()[0], ax.get_ylim()[0])
        xy_max = max(ax.get_xlim()[1], ax.get_ylim()[1])
        ax.axis('equal')
        ax.axis('square')
        ax.set_xlim([xy_min, xy_max])
        ax.set_ylim([xy_min, xy_max])
        ax.grid(color='gray', linestyle='dotted', linewidth=1, alpha=0.5)
        ax.text(0.53, 0.14, 'Corr : '+str(round(np.corrcoef(y_prd2, y_obs2)[0,1], 4)), size=15, transform=ax.transAxes)
        ax.text(0.53, 0.08, 'MSE : '+str(round(np.mean((y_prd2-y_obs2)**2), 4)), size=15, transform=ax.transAxes)
        ax.text(0.53, 0.02, 'MAE : '+str(round(np.mean(np.abs(y_prd2-y_obs2)), 4)), size=15, transform=ax.transAxes)
        _ = ax.plot([-30000, 30000], [-30000, 30000], color='gray', linewidth=0.5)

    y_prd1 = dict_result['y_fits_Fine']
    y_obs1 = dict_result['y_train']
    y_prd2 = dict_result['y_pred_Fine']
    y_obs2 = dict_result['y_test']
    ax = fig.add_subplot(plt_row, plt_col, 5, 
                         title='Fine-tuning', 
                         xlabel='Prediction', 
                         ylabel='Observation')
    ax.scatter(y_prd1, y_obs1, color='steelblue', alpha=0.7)
    ax.scatter(y_prd2, y_obs2, color='darkorange', alpha=1)
    xy_min = min(ax.get_xlim()[0], ax.get_ylim()[0])
    xy_max = max(ax.get_xlim()[1], ax.get_ylim()[1])
    ax.axis('equal')
    ax.axis('square')
    ax.set_xlim([xy_min, xy_max])
    ax.set_ylim([xy_min, xy_max])
    ax.grid(color='gray', linestyle='dotted', linewidth=1, alpha=0.5)
    ax.text(0.53, 0.14, 'Corr : '+str(round(np.corrcoef(y_prd2, y_obs2)[0,1], 4)), size=15, transform=ax.transAxes)
    ax.text(0.53, 0.08, 'MSE : '+str(round(np.mean((y_prd2-y_obs2)**2), 4)), size=15, transform=ax.transAxes)
    ax.text(0.53, 0.02, 'MAE : '+str(round(np.mean(np.abs(y_prd2-y_obs2)), 4)), size=15, transform=ax.transAxes)
    _ = ax.plot([-30000, 30000], [-30000, 30000], color='gray', linewidth=0.5)

    fig.tight_layout(rect=[0,0,1,0.96])

    # plt.suptitle(title,fontsize=20)

    fig.savefig(savename)
    if plt_show == False:
        plt.close(fig)

## Search parameters in cross-validation

In [9]:
# For without transfer
SearchParams_WithoutTL_NN = {
    'dr_rate' : [0.25, 0.5],
    'layer1' : [1],
    'decay_rate1': [0.5, 0.75],
    'decay_rate2': [0.5, 0.75],
    'l2_lambda' : [1e-2],
    'learning_rate' : [1e-3],
    'epochs' : [50, 75, 100]
}

# For feature extractor
SearchParams_FeatureExt_NN = {
    'dr_rate' : [0.25, 0.5],
    'layer1' : [1],
    'decay_rate1': [0.5, 0.75],
    'decay_rate2': [0.5, 0.75],
    'l2_lambda' : [1e-2],
    'learning_rate' : [1e-3],
    'epochs' : [50, 75, 100]
}

# For proposed method
SearchParams_InvTrans_NN = {
    'dr_rate' : [0.5], 
    'layer1' : [1],
    'layer2' : [1],
    'layer3' : [1],
    'decay_rate1' : [0.25, 0.5, 0.75], 
    'decay_rate2' : [1], 
    'decay_rate3' : [0.25, 0.5, 0.75], 
    'l2_lambda1' : [1e-2],
    'l2_lambda2' : [1e-2],
    'learning_rate' : [1e-3], 
    'epochs' : [50, 75, 100]
}

SearchParams_FineTune_NN = {
    'learning_rate' : [1e-3],
    'epochs' : [0, 1, 5, 10, 20, 30, 40, 50],
    'n_frozen' : [0]
}

# Main codes

## Load data

In [10]:
drop_list = ['min:gs_mag_moment','min:num_d_unfilled','min:num_f_unfilled','min:num_f_valence','ave:num_f_unfilled','ave:num_f_valence','sum:num_f_unfilled','sum:num_f_valence','var:num_f_unfilled','var:num_f_valence','max:num_f_unfilled','max:num_f_valence']
data_all = joblib.load('../10_Data/SPSTC_290.pkl')
x_all = data_all['desc']
x_all = x_all.drop(drop_list, axis=1)
y_all = data_all['data']
y_LTC = y_all[-y_all['TC (W/mK)'].isna()]['TC (W/mK)']
x_LTC = x_all[-y_all['TC (W/mK)'].isna()]

dim_x = 290 - len(drop_list)
b_size = 2

In [11]:
## Scaling parameters
x_mean = x_all.mean()
x_std = x_all.std()
y_LogMean = np.log(y_LTC).mean()
y_LogStd = np.log(y_LTC).std()

In [12]:
# Data scaling
## Input
xs_LTC_scal = (x_LTC - x_mean)/x_std
x_LTC_scal = (x_LTC - x_mean)/x_std
## Output
y_LTC_scal = (np.log(y_LTC) - y_LogMean)/y_LogStd

## User parameter setting

In [13]:
# Number of iterations
num_itr = 10
# Number of fold of cross-validation
n_fold = 5
# seed for fix_seed function
SEED = 373
# Source model id
i_list_source = [
    66,
    83,
    39,
    36,
    70,
    95,
    56,
    72,
    69,
    42
]

# Main

In [14]:
# List for storing the results
list_result = list()
df_result = pd.DataFrame([], columns=[
    'No. source model', 
    'Itr', 
    'Count',
    'MSE (train, Wotl)',
    'MSE (train, Fine)',
    'MSE (train, Ext-1)', 
    'MSE (train, Ext-2)', 
    'MSE (train, Ext-3)', 
    'MSE (train, Inv-1)', 
    'MSE (train, Inv-2)', 
    'MSE (train, Inv-3)',
    'MSE (test, Wotl)',
    'MSE (test, Fine)',
    'MSE (test, Ext-1)', 
    'MSE (test, Ext-2)', 
    'MSE (test, Ext-3)', 
    'MSE (test, Inv-1)', 
    'MSE (test, Inv-2)', 
    'MSE (test, Inv-3)'
])
i_count = 0

In [None]:
t1 = time.time()
# Repeat for different splittings of samples
for i_itr in range(num_itr):
    print(i_itr)
    x_train_scal, x_test_scal, xs_train_scal, xs_test_scal, y_train_scal, y_test_scal = train_test_split(x_LTC_scal, xs_LTC_scal, y_LTC_scal, train_size=40, random_state=int(i_itr))
    y_train = np.exp(y_train_scal*y_LogStd + y_LogMean)
    y_test = np.exp(y_test_scal*y_LogStd + y_LogMean)
    
    # Model training
    ## Without transfer
    gsr_Wotl = GridSearchCV(
        cls_WithoutTL_NN(),
        SearchParams_WithoutTL_NN,
        scoring = 'neg_mean_squared_error',
        cv = n_fold,
        n_jobs = -1,
        verbose = False
    )
    t_tmp = time.time()
    fix_seed(SEED)
    gsr_Wotl.fit(x_train_scal, y_train_scal)

    model_Wotl = cls_WithoutTL_NN(
        dr_rate       = gsr_Wotl.best_params_['dr_rate'],
        layer1        = gsr_Wotl.best_params_['layer1'],
        decay_rate1   = gsr_Wotl.best_params_['decay_rate1'],
        decay_rate2   = gsr_Wotl.best_params_['decay_rate2'],
        l2_lambda     = gsr_Wotl.best_params_['l2_lambda'],
        learning_rate = gsr_Wotl.best_params_['learning_rate'],
        epochs        = gsr_Wotl.best_params_['epochs']
    )
    fix_seed(SEED)
    model_Wotl.fit(x_train_scal, y_train_scal)

    y_fits_Wotl_scal = model_Wotl.predict(x_train_scal)
    y_pred_Wotl_scal = model_Wotl.predict(x_test_scal)
    y_fits_Wotl      = np.exp(y_fits_Wotl_scal*y_LogStd+y_LogMean)
    y_pred_Wotl      = np.exp(y_pred_Wotl_scal*y_LogStd+y_LogMean)
    y_fits_Wotl      = pd.Series(y_fits_Wotl.reshape(-1), index=y_train.index)
    y_pred_Wotl      = pd.Series(y_pred_Wotl.reshape(-1), index=y_test.index)
    train_MSE_Wotl   = np.mean((y_fits_Wotl - y_train)**2)
    test_MSE_Wotl    = np.mean((y_pred_Wotl - y_test)**2)
    print('  Without transfer has been done.    '+str(time.time()-t_tmp))
    
    # Repeat for different source models
    for i_source in i_list_source:
        if not os.path.isdir('../30_Output/20_Plot/200_TransferLearning/Source'+str(i_source)):
            os.makedirs('../30_Output/20_Plot/200_TransferLearning/Source'+str(i_source))
        if not os.path.isdir('../30_Output/30_csv/200_TransferLearning'):
            os.makedirs('../30_Output/30_csv/200_TransferLearning')
        if not os.path.isdir('../30_Output/40_pkl/200_TransferLearning/Source'+str(i_source)):
            os.makedirs('../30_Output/40_pkl/200_TransferLearning/Source'+str(i_source))

        # Load source model
        with open('../30_Output/40_pkl/100_MakeSourceModel/100_Model_'+str(i_source)+'.pkl', 'rb') as f:
            data_list = pickle.load(f)
        source_model_path_json = '../30_Output/10_Model/100_MakeSourceModel/100_Model_'+str(i_source)+'.json'
        source_model_path_hdf5 = '../30_Output/10_Model/100_MakeSourceModel/100_Model_'+str(i_source)+'.hdf5'
        num_SourceTasks = int(data_list['width_layers'][0])
        source_model = model_from_json(open(source_model_path_json, 'r').read())
        source_model.load_weights(source_model_path_hdf5)

        print(i_source, ' : ', i_itr)
        dict_result = {}
        # Data splitting
        dict_result['x_train_scal']     = x_train_scal
        dict_result['x_test_scal']      = x_test_scal
        dict_result['xs_train_scal']    = xs_train_scal
        dict_result['xs_test_scal']     = xs_test_scal
        dict_result['y_train_scal']     = y_train_scal
        dict_result['y_test_scal']      = y_test_scal
        dict_result['y_train']          = y_train
        dict_result['y_test']           = y_test
        dict_result['y_fits_Wotl']      = y_fits_Wotl
        dict_result['y_pred_Wotl']      = y_pred_Wotl
        dict_result['best_params_Wotl'] = gsr_Wotl.best_params_

        # Model training
        ## Fine-tuning
        gsr_Fine = GridSearchCV(
            cls_FineTune_NN(),
            SearchParams_FineTune_NN,
            scoring = 'neg_mean_squared_error',
            cv = n_fold,
            n_jobs = -1,
            verbose = False
        )
        t_tmp = time.time()
        fix_seed(SEED)
        gsr_Fine.fit(x_train_scal, y_train_scal)

        model_Fine = cls_FineTune_NN(
            learning_rate = gsr_Fine.best_params_['learning_rate'],
            epochs        = gsr_Fine.best_params_['epochs'],
            n_frozen      = gsr_Fine.best_params_['n_frozen']
        )
        fix_seed(SEED)
        model_Fine.fit(x_train_scal, y_train_scal)

        y_fits_Fine_scal = model_Fine.predict(x_train_scal)
        y_pred_Fine_scal = model_Fine.predict(x_test_scal)
        y_fits_Fine      = np.exp(y_fits_Fine_scal*y_LogStd+y_LogMean)
        y_pred_Fine      = np.exp(y_pred_Fine_scal*y_LogStd+y_LogMean)
        y_fits_Fine      = pd.Series(y_fits_Fine.reshape(-1), index=y_train.index)
        y_pred_Fine      = pd.Series(y_pred_Fine.reshape(-1), index=y_test.index)
        dict_result['y_fits_Fine']      = y_fits_Fine
        dict_result['y_pred_Fine']      = y_pred_Fine
        dict_result['best_params_Fine'] = gsr_Fine.best_params_
        train_MSE_Fine = np.mean((y_fits_Fine - y_train)**2)
        test_MSE_Fine  = np.mean((y_pred_Fine - y_test )**2)
        print('  Fine-tuning has been done.    '+str(time.time()-t_tmp))

        list_train_MSE_Ext = list()
        list_test_MSE_Ext  = list()
        list_train_MSE_Inv = list()
        list_test_MSE_Inv  = list()
        # Repeat for different layers
        for i_layer in [1,2,3]:
            name_layer = source_model.layers[2*i_layer].name
            source_model_Ext = Model(inputs=source_model.input, outputs=source_model.get_layer(name_layer).output)

            ## Feature extract
            ll_train = pd.DataFrame(source_model_Ext.predict(xs_train_scal), index=xs_train_scal.index)
            ll_test  = pd.DataFrame(source_model_Ext.predict(xs_test_scal), index=xs_test_scal.index)

            ll_mean = 0
            ll_std  = 1
            ll_train_scal = (ll_train - ll_mean)/ll_std
            ll_test_scal  = (ll_test  - ll_mean)/ll_std

            ## Merged data
            x_train_adds = pd.merge(x_train_scal, ll_train_scal, left_index=True, right_index=True)
            x_test_adds  = pd.merge(x_test_scal, ll_test_scal, left_index=True, right_index=True)
            
            ## Feature extractor
            gsr_Ext = GridSearchCV(
                cls_FeatureExt_NN(),
                SearchParams_FeatureExt_NN,
                scoring = 'neg_mean_squared_error',
                cv = n_fold,
                n_jobs = -1,
                verbose = False
            )
            t_tmp = time.time()
            fix_seed(SEED)
            gsr_Ext.fit(ll_train_scal, y_train_scal)

            model_Ext = cls_FeatureExt_NN(
                dr_rate       = gsr_Ext.best_params_['dr_rate'],
                layer1        = gsr_Ext.best_params_['layer1'],
                decay_rate1   = gsr_Ext.best_params_['decay_rate1'],
                decay_rate2   = gsr_Ext.best_params_['decay_rate2'],
                l2_lambda     = gsr_Ext.best_params_['l2_lambda'],
                learning_rate = gsr_Ext.best_params_['learning_rate'],
                epochs        = gsr_Ext.best_params_['epochs']
            )
            fix_seed(SEED)
            model_Ext.fit(ll_train_scal, y_train_scal)

            y_fits_Ext_scal = model_Ext.predict(ll_train_scal)
            y_pred_Ext_scal = model_Ext.predict(ll_test_scal)
            y_fits_Ext      = np.exp(y_fits_Ext_scal*y_LogStd+y_LogMean)
            y_pred_Ext      = np.exp(y_pred_Ext_scal*y_LogStd+y_LogMean)
            y_fits_Ext      = pd.Series(y_fits_Ext.reshape(-1), index=y_train.index)
            y_pred_Ext      = pd.Series(y_pred_Ext.reshape(-1), index=y_test.index)
            dict_result['y_fits_Ext'+str(i_layer)]      = y_fits_Ext
            dict_result['y_pred_Ext'+str(i_layer)]      = y_pred_Ext
            dict_result['best_params_Ext'+str(i_layer)] = gsr_Ext.best_params_
            list_train_MSE_Ext.append(np.mean((y_fits_Ext - y_train)**2))
            list_test_MSE_Ext.append( np.mean((y_pred_Ext - y_test )**2))
            print('     Feature extractor (neural network, layer-'+str(i_layer)+') has been done.    '+str(time.time()-t_tmp))

            ## Proposed method
            gsr_InvNN = GridSearchCV(
                cls_InvTrans_NN(),
                SearchParams_InvTrans_NN,
                scoring = 'neg_mean_squared_error',
                cv = n_fold,
                n_jobs = -1,
                verbose = False
            )
            t_tmp = time.time()
            fix_seed(SEED)
            gsr_InvNN.fit(x_train_adds, y_train_scal)

            model_InvNN = cls_InvTrans_NN(
                dr_rate       = gsr_InvNN.best_params_['dr_rate'],
                layer1        = gsr_InvNN.best_params_['layer1'],
                layer2        = gsr_InvNN.best_params_['layer2'],
                layer3        = gsr_InvNN.best_params_['layer3'],
                decay_rate1   = gsr_InvNN.best_params_['decay_rate1'],
                decay_rate2   = gsr_InvNN.best_params_['decay_rate2'],
                decay_rate3   = gsr_InvNN.best_params_['decay_rate3'],
                l2_lambda1    = gsr_InvNN.best_params_['l2_lambda1'],
                l2_lambda2    = gsr_InvNN.best_params_['l2_lambda2'],
                learning_rate = gsr_InvNN.best_params_['learning_rate'],
                epochs        = gsr_InvNN.best_params_['epochs']
            )
            fix_seed(SEED)
            model_InvNN.fit(x_train_adds, y_train_scal)

            y_fits_InvNN_scal = model_InvNN.predict(x_train_adds)
            y_pred_InvNN_scal = model_InvNN.predict(x_test_adds)
            y_fits_InvNN      = np.exp(y_fits_InvNN_scal*y_LogStd+y_LogMean)
            y_pred_InvNN      = np.exp(y_pred_InvNN_scal*y_LogStd+y_LogMean)
            y_fits_InvNN      = pd.Series(y_fits_InvNN.reshape(-1), index=y_train.index)
            y_pred_InvNN      = pd.Series(y_pred_InvNN.reshape(-1), index=y_test.index)
            dict_result['y_fits_InvNN'+str(i_layer)]      = y_fits_InvNN
            dict_result['y_pred_InvNN'+str(i_layer)]      = y_pred_InvNN
            dict_result['best_params_InvNN'+str(i_layer)] = gsr_InvNN.best_params_
            list_train_MSE_Inv.append(np.mean((y_fits_InvNN - y_train)**2))
            list_test_MSE_Inv.append( np.mean((y_pred_InvNN - y_test )**2))
            print('     Proposed method (neural network, layer-'+str(i_layer)+') has been done.    '+str(time.time()-t_tmp))
            
            del source_model_Ext
            K.clear_session()
            
        MakeFigure_Train(figsize=(25,10), dict_result=dict_result, savename='../30_Output/20_Plot/200_TransferLearning/Source'+str(i_source)+'/dict_result_s'+str(i_source)+'-i'+str(i_itr)+'.png', plt_show=False)
        
        df_result.loc[str(i_count)] = [
            i_source,
            i_itr,
            i_count,
            train_MSE_Wotl,
            train_MSE_Fine,
            list_train_MSE_Ext[0],
            list_train_MSE_Ext[1],
            list_train_MSE_Ext[2],
            list_train_MSE_Inv[0],
            list_train_MSE_Inv[1],
            list_train_MSE_Inv[2],
            test_MSE_Wotl,
            test_MSE_Fine,
            list_test_MSE_Ext[0],
            list_test_MSE_Ext[1],
            list_test_MSE_Ext[2],
            list_test_MSE_Inv[0],
            list_test_MSE_Inv[1],
            list_test_MSE_Inv[2]
        ]
        df_result.to_csv('../30_Output/30_csv/200_TransferLearning/300_TransferLearning_Result.csv')
        f = open('../30_Output/40_pkl/200_TransferLearning/Source'+str(i_source)+'/dict_result_s'+str(i_source)+'-i'+str(i_itr)+'.pkl','wb')
        pickle.dump(dict_result,f)
        f.close
        i_count += 1
        clear_output(True)
        print(df_result.mean())
print(time.time()-t1)