### Python packages used in this code

In [1]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import random
import os
import pickle
import time
import sklearn
import platform
import sys
from sklearn.base import BaseEstimator, RegressorMixin
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import Ridge, LinearRegression, Lasso, ElasticNet
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

from sklearn.linear_model._base import LinearModel
from spmimage.linear_model import admm

%matplotlib inline

In [2]:
"""
Environments

--Platform--
OS : Windows-10-10.0.19044-SP0
--Version--
python :  3.9.12 (main, Apr  4 2022, 05:22:27) [MSC v.1916 64 bit (AMD64)]
numpy : 1.23.1
pandas : 1.4.3
sklearn : 1.1.1
"""

print('--Platform--')
print('OS :', platform.platform())
print('--Version--')
print('python : ', sys.version)
print('numpy :', np.__version__)
print('pandas :', pd.__version__)
print('sklearn :', sklearn.__version__)

--Platform--
OS : Windows-10-10.0.19044-SP0
--Version--
python :  3.9.12 (main, Apr  4 2022, 05:22:27) [MSC v.1916 64 bit (AMD64)]
numpy : 1.23.1
pandas : 1.4.3
sklearn : 1.1.1


# Preparation

## Define the model cladd for the log-difference model

### Fused ridge

In [3]:
class cls_FusedRidge(BaseEstimator, RegressorMixin):
    def __init__(self, lambda_scale=1, lambda_trend=1):
        """
        Define the model class for log-difference model
        
        Parameters
        ----------
            lambda_scale : reguralization parameter for the scale of gamma
            lambda_trend : reguralization parameter for the trend of gamma
        """
        self.lambda_scale = lambda_scale
        self.lambda_trend = lambda_trend
    
    def make_D(self, n_features):
        trend_matrix = np.eye(n_features - 1, n_features, k=1) - np.eye(n_features - 1, n_features)
        trend_matrix[[9, 29, 49, 69, 89, 109, 129, 149, 169]] = np.zeros(n_features)
        
        if self.lambda_scale == 0:
            return self.lambda_trend * trend_matrix
        elif self.lambda_trend == 0:
            return self.lambda_scale * np.identity(n_features)
        else:
            generated = np.vstack([self.lambda_scale * np.identity(n_features),
                                   self.lambda_trend * trend_matrix])
            return generated
        
    def fit(self, X, y=None):
        """
        Model fitting
        
        Required grobal variables
        -----------------------
        
        Returns
        -------

        """
        # dataset
        self.X = X
        self.y = y
        
        # dimension
        self.n_sample, self.dim_X = self.X.shape
        
        # Matrices
        self.D = self.make_D(self.dim_X)
        self.D2 = np.transpose(self.D).dot(self.D)
        
        # Fit
        self.intercept = np.mean(self.y)
        self.theta = np.linalg.pinv(self.X.T.dot(self.X) + self.D2).dot(self.X.T).dot(self.y-self.intercept)

        return self
    
    def predict(self, X):
        """
        Prediction function
            
        Returns
        -------
        """
        return X.dot(self.theta) + self.intercept

    def score(self, X, y=None):
        """
        Score function for cross-validation
        
        Returns
        -------
            -\sum(y-\hat{y})/n (Consider the minus value because 'GridSearchCV' maximize the score.)
        """
        return -sum((y.values - self.predict(X).values)**2)/self.n_sample
    
    def get_params(self, deep=True):
        """
        Create parameter dictionary for cross-validation
        
        Returns
        -------
        """
        return {'lambda_scale' : self.lambda_scale,
                'lambda_trend' : self.lambda_trend}
    
    def set_params(self, **parameters):
        """
        For cross-validation
        """
        for parameter, value in parameters.items():
            setattr(self, parameter, value)
        return self     

## Define the model class proposed in the paper

### Fused ridge

In [4]:
class cls_InvTrans_FR(BaseEstimator, RegressorMixin):
    def __init__(self, lambda1=1, lambda2=1, lambda3_scale=1, lambda3_trend=1):
        """
        Define the model class proposed in the paper
            h(x) = \alpha_1*ys + \alpha_0 + (\beta*ys + 1)<\gamma, x>
                x  : discriptor
                ys : CP(MD)
        
        Parameters
        ----------
            lambda1       : regularization parameter for alpha
            lambda2       : regularization parameter for beta
            lambda3_scale : reguralization parameter for the scale of gamma
            lambda3_trend : reguralization parameter for the trend of gamma
        """
        self.lambda1 = lambda1
        self.lambda2 = lambda2
        self.lambda3_scale = lambda3_scale
        self.lambda3_trend = lambda3_trend
    
    def estimation_alpha(self):
        """
        Optimization with respect to alpha
            [\hat{\alpha}_1, \hat{\alpha}_2]^T 
                = (Ys^TYs + n\Lambda_1)^{-1} Ys^T (y + (\beta*ys+1)*<\gamma, x>)
            
                    Ys = | ys_1  1 | \in R^{n*2}, \Lambda_1 = | \lambda1  0 |, ys = | ys_1 |
                         | ys_2  1 |                          |        0  0 |       | ys_2 |
                              :                                                     |   :  |
                         | ys_n  1 |                                                | ys_n |
                         
        Note that the regularization applies only to \alpha_1 and not to the intercept \alpha_1.
        """
        self.alpha = self.InvMat.dot(self.X_source1.T).dot(self.y + (self.Mat2+1)*(self.Mat3))
        self.Mat1 = self.X_source1.dot(self.alpha)
        self.result_alpha[self.i_count] = self.alpha
        return self
    
    def estimation_beta(self):
        """
        Optimization with respect to beta
            \hat{\beta}
                = -(ys^T Diag(X\gamma)^2 ys + n\lambda_2)^{-1} ys^T Diag(X\gamma) (y - Ys\alpha + X\gamma)
        """
        tmp_mat1 = np.linalg.pinv(self.X_source2.T.dot(np.diag(self.Mat3)).dot(np.diag(self.Mat3)).values.dot(self.X_source2.values) + self.n_sample*self.lambda2*np.diag(np.ones(self.dim_X_source2)), hermitian=True)
        self.beta = -tmp_mat1.dot(self.X_source2.T).dot(np.diag(self.Mat3)).dot(self.y-self.Mat1+self.Mat3)
        self.Mat2 = self.X_source2.dot(self.beta)
        self.result_beta[self.i_count] = self.beta
        return self
    
    def estimation_gamma(self):
        """
        Optimization with respect to gamma
        """
        tmp_x = pd.DataFrame(np.diag(self.X_source2.values.reshape(-1)*self.beta+1).dot(self.X), index=self.X.index, columns=self.X.columns)
        tmp_y = self.y-self.Mat1
        
        self.tmp_x = tmp_x
        self.tmp_y = tmp_y
                
        fix_seed(373)
        self.gamma = -np.linalg.pinv(tmp_x.T.dot(tmp_x) + self.D2).dot(tmp_x.T).dot(tmp_y)
        
        self.Mat3 = self.X.dot(self.gamma)
        self.result_gamma[self.i_count] = self.gamma
        return self
    
    def make_diff(self, w_new, w_old):
        """
        Function to calculate parameter changes for algorithm convergence determination
        We use \max{|w_new - w_old|}/\max{|w_old|} for determining the convergence.
        This criterion is used in some algorithms in scikit-learn, for example, see https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Lasso.html
        We apply this criterion to each of \alpha, \beta and \gamma separately and use their maximum value for the convergence decision.
        """
        diff1 = np.max(np.abs(w_new-w_old))
        diff2 = np.max(np.abs(w_old))
        if diff2 < 1e-10: #Avoiding division by zero
            out = 0
        else:
            out = diff1/diff2
        return out
        
    def make_D(self, n_features):
        trend_matrix = np.eye(n_features - 1, n_features, k=1) - np.eye(n_features - 1, n_features)
        trend_matrix[[9, 29, 49, 69, 89, 109, 129, 149, 169]] = np.zeros(n_features)
        
        if self.lambda3_scale == 0:
            return self.lambda3_trend * trend_matrix
        elif self.lambda3_trend == 0:
            return self.lambda3_scale * np.identity(n_features)
        else:
            generated = np.vstack([self.lambda3_scale * np.identity(n_features),
                                   self.lambda3_trend * trend_matrix])
            return generated
        
    def fit(self, X, y=None):
        """
        Model fitting
        
        Required grobal variables
        -----------------------
            dim_x         : dimension of the discriptor
            ini_alpha     : initial value for \alpha_1
            ini_intercept : initial value for \alpha_0
            ini_beta      : initial value for \beta
            ini_gamma     : initial value for \gamma
        
        Returns
        -------
            i_count       : counter for the iterations
            n_loop        : maximum number of iterations
            convergence   : flag indicating whether the algorithm has converged before the maximum iteration
            error         : flag indication whether the algorithm has terminated with an error
            
            Input         : X=[discriptors, source features], y=output
            X             : descriptors
            X_source1     : source features + all-one vector
            X_source2     : source features
            y             : output
            n_sample      : number of sumples
            dim_X         : dimension of the discriptor
            dim_X_source1 : dimension of the source features + intercept
            dim_X_source2 : dimension of the source features

            result_alpha  : dataframe to store alpha in all iterations
            result_beta   : dataframe to store beta in all iterations
            result_gamma  : dataframe to store gamma in all iterations
            diff_i        : series to store the difference between \alpha_0_new and \alpha_0_old 
            diff_a        : series to store the difference between \alpha_1_new and \alpha_1_old 
            diff_b        : series to store the difference between \beta_new and \beta_old
            diff_c        : series to store the difference between \gamma_new and \gamma_old 
            diff          : series to store the difference between AllParams_new and AllParams_old 

            InvMat        : (Ys^TYs + n\Lambda_1)^{-1}
            Mat1          : Ys\alpha (updated with every update of \alpha)
            Mat2          : ys\beta (updated with every update of \alpha)
            Mat3          : X\gamma (updated with every update of \alpha)
        """
        # setting
        self.i_count = 0        
        self.n_loop = 1000
        self.convergence = False
        self.error = False
        
        # dataset
        self.X = X.iloc[:,:dim_x]
        self.X_source1 = X.iloc[:,dim_x:].copy()
        self.X_source1['Intercept'] = 1
        self.X_source2 = X.iloc[:,dim_x:].copy()
        self.y = y
        
        # dimension
        self.n_sample, self.dim_X = self.X.shape
        self.dim_X_source1 = self.X_source1.shape[1]
        self.dim_X_source2 = self.X_source2.shape[1]
        
        # for storing the results
        self.result_alpha = np.zeros([self.n_loop+1, self.dim_X_source1])
        self.result_beta = np.zeros([self.n_loop+1, self.dim_X_source2])
        self.result_gamma = np.zeros([self.n_loop+1, self.dim_X])
        self.diff_i = np.zeros(self.n_loop+1)
        self.diff_a = np.zeros(self.n_loop+1)
        self.diff_b = np.zeros(self.n_loop+1)
        self.diff_c = np.zeros(self.n_loop+1)
        self.diff = np.zeros(self.n_loop+1)
        
        # initialize the parameters
        self.alpha = np.array([ini_alpha, ini_intercept])
        self.beta = np.array([ini_beta])
        self.gamma = -ini_gamma
        self.result_alpha[0] = self.alpha
        self.result_beta[0] = self.beta
        self.result_gamma[0] = self.gamma
        self.diff[0] = np.nan
        
        # Matrices
        self.InvMat = np.linalg.pinv(self.X_source1.T.dot(self.X_source1) + self.n_sample*self.lambda1*np.diag(np.ones(self.dim_X_source1-1).tolist()+[0]), hermitian=True)
        self.Mat1 = self.X_source1.dot(self.alpha)
        self.Mat2 = self.X_source2.dot(self.beta)
        self.Mat3 = self.X.dot(self.gamma)
        self.D = self.make_D(self.dim_X)
        self.D2 = np.transpose(self.D).dot(self.D)

        # try:
        # Repeat until convergence
        for i_loop in range(self.n_loop):
            self.i_count += 1

            # Update
            self.estimation_alpha()
            self.estimation_beta()
            self.estimation_gamma()

            # Compute changes of parameters
            diff_i = self.make_diff(w_new=self.result_alpha[self.i_count][self.dim_X_source1-1], w_old=self.result_alpha[self.i_count-1][self.dim_X_source1-1])
            diff_a = self.make_diff(w_new=self.result_alpha[self.i_count][:(self.dim_X_source1-1)], w_old=self.result_alpha[self.i_count-1][:(self.dim_X_source1-1)])
            diff_b = self.make_diff(w_new=self.result_beta[self.i_count], w_old=self.result_beta[self.i_count-1])
            diff_c = self.make_diff(w_new=self.result_gamma[self.i_count], w_old=self.result_gamma[self.i_count-1])
            diff = np.max([diff_i, diff_a, diff_b, diff_c]) # We use the maximum value of {diff_i, diff_a, diff_b, diff_c}.
            # Store
            self.diff_i[self.i_count] = diff_i
            self.diff_a[self.i_count] = diff_a
            self.diff_b[self.i_count] = diff_b
            self.diff_c[self.i_count] = diff_c
            self.diff[self.i_count] = diff

            # Check the convergence
            if diff < 1e-3:
                self.convergence = True
                break
        # except:
            # self.error = True
        
        # Cut off the unused parts of the dataframes
        self.result_alpha = self.result_alpha[:(self.i_count+1),:]
        self.result_beta = self.result_beta[:(self.i_count+1),:]
        self.result_gamma = self.result_gamma[:(self.i_count+1),:]
        self.diff_i = self.diff_i[:(self.i_count+1)]
        self.diff_a = self.diff_a[:(self.i_count+1)]
        self.diff_b = self.diff_b[:(self.i_count+1)]
        self.diff_c = self.diff_c[:(self.i_count+1)]
        self.diff = self.diff[:(self.i_count+1)]
        
        return self
    
    def predict(self, X):
        """
        Prediction function
            h(x) = \alpha_1*ys + \alpha_0 + (\beta*ys + 1)<\gamma, x>
            
        Returns
        -------
            pred1  : \alpha_1*ys + \alpha_0
            pred2  : \beta*ys + 1
            pred3  : <\gamma, x>
            
            y_pred : \alpha_1*ys + \alpha_0 + (\beta*ys + 1)<\gamma, x>
        """
        # dataset
        X_source_pred1 = X.iloc[:,dim_x:].copy()
        X_source_pred1['Intercept'] = 1
        X_source_pred2 = X.iloc[:,dim_x:].copy()
        X_pred = X.iloc[:,:dim_x]
        
        # Compute each term
        self.pred1 = X_source_pred1.dot(self.alpha)
        self.pred2 = X_source_pred2.dot(self.beta) + 1
        self.pred3 = X_pred.dot(self.gamma)
        y_pred = self.pred1 - self.pred2*self.pred3
        
        return y_pred

    def score(self, X, y=None):
        """
        Score function for cross-validation
        
        Returns
        -------
            -\sum(y-\hat{y})/n (Consider the minus value because 'GridSearchCV' maximize the score.)
        """
        return -sum((y.values - self.predict(X).values)**2)/self.n_sample
    
    def get_params(self, deep=True):
        """
        Create parameter dictionary for cross-validation
        
        Returns
        -------
            {'lambda1', 'lambda2', 'lambda3', 'l1_ratio'}
        """
        return {'lambda1' : self.lambda1,
                'lambda2' : self.lambda2,
                'lambda3_scale' : self.lambda3_scale,
                'lambda3_trend' : self.lambda3_trend}
    
    def set_params(self, **parameters):
        """
        For cross-validation
        """
        for parameter, value in parameters.items():
            setattr(self, parameter, value)
        return self     

## fix_seed function

In [5]:
def fix_seed(seed):
    # Numpy
    np.random.seed(seed)
    # for built-in random
    random.seed(seed)
    # for hash seed
    os.environ["PYTHONHASHSEED"] = str(seed)

## Search parameters in cross-validation

In [6]:
# For difference-learning
SearchParams_diff_FR = {
    'lambda_scale' : 10**np.linspace(-2, 2, 25),
    'lambda_trend' : [50, 100, 150]
}

# For proposed method
SearchParams_InvTrans_FR = {
    'lambda1' : [0],
    'lambda2' : [1],
    'lambda3_scale' : 10**np.linspace(-2, 2, 25),
    'lambda3_trend' : [50, 100, 150]
}

## Create output directories

In [7]:
if not os.path.isdir('../30_Output/10_Model/300_TransferLearning'):
    os.makedirs('../30_Output/10_Model/300_TransferLearning')
if not os.path.isdir('../30_Output/20_Plot/300_TransferLearning'):
    os.makedirs('../30_Output/20_Plot/300_TransferLearning')
if not os.path.isdir('../30_Output/30_csv/300_TransferLearning'):
    os.makedirs('../30_Output/30_csv/300_TransferLearning')
if not os.path.isdir('../30_Output/40_pkl/300_TransferLearning'):
    os.makedirs('../30_Output/40_pkl/300_TransferLearning')

# Main codes

## Load data

In [8]:
with open('../30_Output/40_pkl/100_CheckData/100_Data.pkl', 'rb') as f:
# with open('../30_Output/40_pkl/100_CheckData/100_Data_masked.pkl', 'rb') as f:
    data_list = pickle.load(f)
x = data_list['x']
y = data_list['y']
ys = data_list['ys']
PID = data_list['PID']
dim_x = x.shape[1]

# Scaling parameter
#     For stability of the estimation, scaling parameters are calculated using all data.
x_Mean = x.mean()
x_Std = x.std()
y_LogMean = np.log(y).mean()
y_LogStd = np.log(y).std()
ys_LogMean = np.log(ys).mean()
ys_LogStd = np.log(ys).std()

## User parameter setting

In [9]:
# Number of iterations
num_itr = 20
# Number of fold of cross-validation
n_fold = 5
# seed for fix_seed function
SEED = 373

In [10]:
# Repeat for different splittings of samples
for i_itr in range(num_itr):
    print('Itr : ' + str(i_itr))
    
    # Data splitting
    PID_train, PID_test = train_test_split(PID, train_size=60, random_state=int(i_itr))
    x_train  = x.loc[PID_train]
    y_train  = y.loc[PID_train]
    ys_train = ys.loc[PID_train]
    x_test   = x.loc[PID_test]
    y_test   = y.loc[PID_test]
    ys_test  = ys.loc[PID_test]

    # Data scaling
    x_train_scal  = (x_train - x_Mean)/x_Std
    x_test_scal   = (x_test - x_Mean)/x_Std
    y_LogMean     = 0
    y_LogStd      = 1
    y_train_scal  = (np.log(y_train)-y_LogMean)/y_LogStd
    ys_train_scal = (np.log(ys_train)-ys_LogMean)/ys_LogStd
    ys_test_scal  = (np.log(ys_test)-ys_LogMean)/ys_LogStd

    # Model training
    ## Simple linear regression
    t_tmp = time.time()
    model_slr = LinearRegression(fit_intercept=True)
    model_slr.fit(ys_train.values.reshape(-1,1), y_train_scal.values)

    y_fits_slr_scal = model_slr.predict(ys_train.values.reshape(-1,1))
    y_pred_slr_scal = model_slr.predict(ys_test.values.reshape(-1,1))
    y_fits_slr = y_fits_slr_scal*y_LogStd + y_LogMean
    y_pred_slr = y_pred_slr_scal*y_LogStd + y_LogMean
    print('  Simple linear regression has been done.    '+str(time.time()-t_tmp))

    ## Learn the difference
    y_train_diff      = np.log(y_train) - np.log(ys_train)
    y_DiffMean        = 0
    y_DiffStd         = 1
    y_train_diff_scal = (y_train_diff - y_DiffMean)/y_DiffStd
    
    gsr_diff = GridSearchCV(
        cls_FusedRidge(),
        SearchParams_diff_FR,
        cv      = n_fold,
        n_jobs  = -1,
        scoring = 'neg_mean_squared_error',
        verbose = False
    )
    t_tmp = time.time()
    fix_seed(SEED)
    gsr_diff.fit(x_train_scal, y_train_diff_scal)

    model_diff = cls_FusedRidge(
        lambda_scale=gsr_diff.best_params_['lambda_scale'],
        lambda_trend=gsr_diff.best_params_['lambda_trend']
    )

    fix_seed(373)
    model_diff.fit(x_train_scal, y_train_diff_scal)

    y_fits_diff_scal = model_diff.predict(x_train_scal)*y_DiffStd + y_DiffMean
    y_pred_diff_scal = model_diff.predict(x_test_scal)*y_DiffStd + y_DiffMean
    y_fits_diff      = (y_fits_diff_scal+np.log(ys_train))
    y_pred_diff      = (y_pred_diff_scal+np.log(ys_test))
    print('  Learn the difference has been done.    '+str(time.time()-t_tmp))

    ## Proposed method
    s_train_scal = pd.DataFrame(ys_train_scal)
    s_test_scal  = pd.DataFrame(ys_test_scal)
    x_train_adds = pd.merge(x_train_scal, s_train_scal, left_index=True, right_index=True)
    x_test_adds  = pd.merge(x_test_scal, s_test_scal, left_index=True, right_index=True)
    gsr_InvTrans = GridSearchCV(
        cls_InvTrans_FR(),
        SearchParams_InvTrans_FR,
        cv      = n_fold,
        n_jobs  = -1,
        scoring = 'neg_mean_squared_error',
        verbose = False
    )
    ini_intercept = model_slr.intercept_
    ini_alpha     = model_slr.coef_[0]*ys_LogStd
    ini_beta      = 0
    fix_seed(373)
    ini_gamma = model_diff.theta

    t_tmp = time.time()
    fix_seed(SEED)
    gsr_InvTrans.fit(X=x_train_adds, y=y_train_scal)
    model_InvTrans = cls_InvTrans_FR(
        lambda1       = gsr_InvTrans.best_params_['lambda1'],
        lambda2       = gsr_InvTrans.best_params_['lambda2'],
        lambda3_scale = gsr_InvTrans.best_params_['lambda3_scale'],    
        lambda3_trend = gsr_InvTrans.best_params_['lambda3_trend']
    )
    fix_seed(SEED)
    model_InvTrans.fit(X=x_train_adds, y=y_train_scal)

    y_fits_inv_scal = model_InvTrans.predict(x_train_adds)
    y_pred_inv_scal = model_InvTrans.predict(x_test_adds)
    y_fits_inv      = y_fits_inv_scal*y_LogStd + y_LogMean
    y_pred_inv      = y_pred_inv_scal*y_LogStd + y_LogMean
    print('  Proposed method has been done.    '+str(time.time()-t_tmp))

    # Make figures
#     fig = plt.figure(figsize=(15,5))

    ## Simple linear regression
#     y_prd1 = y_fits_slr
#     y_obs1 = np.log(y_train).values
#     y_prd2 = y_pred_slr
#     y_obs2 = np.log(y_test).values
#     ax = fig.add_subplot(1, 3, 1, 
#                          title='Simple linear regression', 
#                          xlabel='Prediction', 
#                          ylabel='Observation')
#     ax.scatter(y_prd1, y_obs1, color='steelblue', alpha=0.7)
#     ax.scatter(y_prd2, y_obs2, color='darkorange', alpha=1)
#     xy_min = min(ax.get_xlim()[0], ax.get_ylim()[0])
#     xy_max = max(ax.get_xlim()[1], ax.get_ylim()[1])
#     ax.axis('equal')
#     ax.axis('square')
#     ax.set_xlim([xy_min, xy_max])
#     ax.set_ylim([xy_min, xy_max])
#     ax.grid(color='gray', linestyle='dotted', linewidth=1, alpha=0.5)
#     ax.text(0.6, 0.14, 'Corr : '+str(round(np.corrcoef(y_prd2, y_obs2)[0,1], 4)), size=15, transform=ax.transAxes)
#     ax.text(0.6, 0.08, 'MSE : '+str(round(np.mean((y_prd2-y_obs2)**2), 4)), size=15, transform=ax.transAxes)
#     ax.text(0.6, 0.02, 'MAE : '+str(round(np.mean(np.abs(y_prd2-y_obs2)), 4)), size=15, transform=ax.transAxes)
#     _ = ax.plot([-30000, 30000], [-30000, 30000], color='gray', linewidth=0.5)

#     ## log-difference
#     y_prd1 = y_fits_diff
#     y_obs1 = np.log(y_train)
#     y_prd2 = y_pred_diff
#     y_obs2 = np.log(y_test)
#     ax = fig.add_subplot(1, 3, 2, 
#                          title='Learning the log-difference', 
#                          xlabel='Prediction', 
#                          ylabel='Observation')
#     ax.scatter(y_prd1, y_obs1, color='steelblue', alpha=0.7)
#     ax.scatter(y_prd2, y_obs2, color='darkorange', alpha=1)
#     xy_min = min(ax.get_xlim()[0], ax.get_ylim()[0])
#     xy_max = max(ax.get_xlim()[1], ax.get_ylim()[1])
#     ax.axis('equal')
#     ax.axis('square')
#     ax.set_xlim([xy_min, xy_max])
#     ax.set_ylim([xy_min, xy_max])
#     ax.grid(color='gray', linestyle='dotted', linewidth=1, alpha=0.5)
#     ax.text(0.6, 0.14, 'Corr : '+str(round(np.corrcoef(y_prd2, y_obs2)[0,1], 4)), size=15, transform=ax.transAxes)
#     ax.text(0.6, 0.08, 'MSE : '+str(round(np.mean((y_prd2-y_obs2)**2), 4)), size=15, transform=ax.transAxes)
#     ax.text(0.6, 0.02, 'MAE : '+str(round(np.mean(np.abs(y_prd2-y_obs2)), 4)), size=15, transform=ax.transAxes)
#     _ = ax.plot([-30000, 30000], [-30000, 30000], color='gray', linewidth=0.5)

#     ## Proposed method
#     y_prd1 = y_fits_inv.values
#     y_obs1 = np.log(y_train).values
#     y_prd2 = y_pred_inv.values
#     y_obs2 = np.log(y_test).values
#     ax = fig.add_subplot(1, 3, 3, 
#                          title='Proposed method', 
#                          xlabel='Prediction', 
#                          ylabel='Observation')
#     ax.scatter(y_prd1, y_obs1, color='steelblue', alpha=0.7)
#     ax.scatter(y_prd2, y_obs2, color='darkorange', alpha=1)
#     xy_min = min(ax.get_xlim()[0], ax.get_ylim()[0])
#     xy_max = max(ax.get_xlim()[1], ax.get_ylim()[1])
#     ax.axis('equal')
#     ax.axis('square')
#     ax.set_xlim([xy_min, xy_max])
#     ax.set_ylim([xy_min, xy_max])
#     ax.grid(color='gray', linestyle='dotted', linewidth=1, alpha=0.5)
#     ax.text(0.6, 0.14, 'Corr : '+str(round(np.corrcoef(y_prd2, y_obs2)[0,1], 4)), size=15, transform=ax.transAxes)
#     ax.text(0.6, 0.08, 'MSE : '+str(round(np.mean((y_prd2-y_obs2)**2), 4)), size=15, transform=ax.transAxes)
#     ax.text(0.6, 0.02, 'MAE : '+str(round(np.mean(np.abs(y_prd2-y_obs2)), 4)), size=15, transform=ax.transAxes)
#     _ = ax.plot([-30000, 30000], [-30000, 30000], color='gray', linewidth=0.5)

#     plt.tight_layout()
#     plt.savefig('../30_Output/20_Plot/300_TransferLearning/300_Plot_'+str(i_itr)+'.png')
#     plt.close()

    # Save results
    result_list = {
        'x_train'     : x_train,
        'y_train'     : y_train,
        'ys_train'    : ys_train,
        'x_test'      : x_test,
        'y_test'      : y_test,
        'ys_test'     : ys_test,
        'PID_train'   : PID_train,
        'PID_test'    : PID_test,
        'y_fits_slr'  : y_fits_slr,
        'y_pred_slr'  : y_pred_slr,
        'model_slr'   : model_slr,
        'y_fits_diff' : y_fits_diff,
        'y_pred_diff' : y_pred_diff,
        'model_diff'  : model_diff,
        'y_fits_inv'  : y_fits_inv,
        'y_pred_inv'  : y_pred_inv,
        'model_inv'   : model_InvTrans
    }

    f = open('../30_Output/40_pkl/300_TransferLearning/300_Results_'+str(i_itr)+'.pkl','wb')
    pickle.dump(result_list,f)
    f.close()

Itr : 0
  Simple linear regression has been done.    0.014689445495605469
  Learn the difference has been done.    5.148162364959717
  Proposed method has been done.    46.25245118141174
Itr : 1
  Simple linear regression has been done.    0.0
  Learn the difference has been done.    1.5400893688201904
  Proposed method has been done.    109.04994463920593
Itr : 2
  Simple linear regression has been done.    0.0
  Learn the difference has been done.    2.371997117996216
  Proposed method has been done.    60.74292469024658
Itr : 3
  Simple linear regression has been done.    0.0
  Learn the difference has been done.    1.869391918182373
  Proposed method has been done.    106.45542120933533
Itr : 4
  Simple linear regression has been done.    0.0
  Learn the difference has been done.    1.8417932987213135
  Proposed method has been done.    110.22532820701599
Itr : 5
  Simple linear regression has been done.    0.0
  Learn the difference has been done.    1.8404974937438965
  Proposed 