In [34]:
######################## Header files ################################################
# from two_TrAdaBoostR2 import TwoStageTrAdaBoostR2 ##For STrAdaBoost.R2
# from TwoStageTrAdaBoostR2 import TwoStageTrAdaBoostR2 ## For two-stage TrAdaBoost.R2

import pandas as pd
import sys
import numpy as np
from pandas import DataFrame
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import AdaBoostRegressor

from keras.models import Sequential, load_model, Model
from keras.layers import Input, Dense, Activation, Conv2D, Dropout, Flatten
from keras import optimizers, utils, initializers, regularizers
import keras.backend as K

from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler #Importing the StandardScaler

from itertools import combinations

import matplotlib.pyplot as plt
import seaborn as sns

from scipy.stats.stats import pearsonr
from math import sqrt

#Geo plotting libraries
#import geopandas as gdp
#from matplotlib.colors import ListedColormap
#import geoplot as glpt

import xgboost as xgb
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn import linear_model
from keras.models import Sequential
from keras.callbacks import ModelCheckpoint
from sklearn.ensemble import GradientBoostingRegressor

from sklearn.model_selection import KFold
import matplotlib.lines as mlines
import folium
import glob

import statistics
from sklearn.cluster import KMeans
from scipy.spatial import distance

pd.options.display.max_columns = None

from adapt.instance_based import (TrAdaBoost, TrAdaBoostR2, TwoStageTrAdaBoostR2)


print("Done uploading repositories")

from adapt.instance_based import TrAdaBoost, TrAdaBoostR2, TwoStageTrAdaBoostR2
from sklearn.model_selection import GridSearchCV
from adapt.instance_based import KMM

print("Second Upload Completed!!")

############################## UCI Italian dataset #######################################################
#################### Dataset Information: 2 years and single terrain #####################################
## Predictors: T, Ah, Rh, NMHC_GT, NOx_GT, CO_GT, C6H6_GT ,Target: O3
###########################################################################################################
#AQI_datasets/UCI_AQI
aqi_df = pd.read_csv('AQI_datasets/UCI_AQI/AirQualityUCI.csv', sep=',', delimiter=";", decimal=",", index_col = None, header=0)

def remove_outlier(col):
    aqi_df[col] = aqi_df.groupby('Date')[col].transform(lambda x: x.fillna(x.mean()))

################ Pre-processing ############################################
aqi_df.dropna(how = 'all', inplace = True) ## drop end rows with NaN values
drop_unamed = ['Unnamed: 15', 'Unnamed: 16']
aqi_df = aqi_df.drop(drop_unamed, axis = 1) ## drop unamed columns

drop_uw = ['Time', 'PT08_S1_CO', 'PT08_S2_NMHC', 'PT08_S3_NOx', 'NO2_GT', 'PT08_S4_NO2']
aqi_df = aqi_df.drop(drop_uw, axis = 1) ## Drop unwanted columns

aqi_df.replace(to_replace = -200, value = np.NaN, inplace = True) ## Replace the -200 values seen in the dataset with NaN

## Replace the NaN values with the column mean
col_list = aqi_df.columns[1:]
for i in col_list:
    remove_outlier(i)

aqi_df.fillna(method ='ffill', inplace= True)
aqi_df.dropna(axis = 0)

## Convert 'Date' column to datetime and then seperate out year and month into different columns.
aqi_df.Date = pd.to_datetime(aqi_df.Date)
aqi_df['Year'] = aqi_df['Date'].dt.year
aqi_df['Month'] = aqi_df['Date'].dt.month
drop_date = ['Date']
aqi_df = aqi_df.drop(drop_date, axis = 1)
aqi_df = aqi_df.reset_index(drop=True)

print("Dataset after pre-processing: ")
print(aqi_df.shape)

################ Observing data statistics ################################
# print(aqi_df.describe())

#Split the dataset according to the year.
drop_cols = ['Year', 'Month']
aqi_df_2004 = aqi_df[aqi_df['Year'] == 2004]
aqi_df_2004 = aqi_df_2004.drop(drop_cols, axis = 1)

aqi_df_2005 = aqi_df[aqi_df['Year'] == 2005]
aqi_df_2005 = aqi_df_2005.drop(drop_cols, axis = 1)

aqi_df_2004 = aqi_df_2004.reset_index(drop=True)
aqi_df_2005 = aqi_df_2005.reset_index(drop=True)

################ Divide the dataframe into target and the predictors. ################
target_uci_col = ['PT08_S5_O3']
aqi_df_2004_target = aqi_df_2004[target_uci_col]
aqi_df_2004_target.columns = ['O3']
aqi_df_2004_predictors = aqi_df_2004.drop(target_uci_col, axis = 1)
aqi_df_2004_predictors = aqi_df_2004_predictors.reset_index(drop=True)

aqi_df_2005_target = aqi_df_2005[target_uci_col]
aqi_df_2005_target.columns = ['O3']
aqi_df_2005_predictors = aqi_df_2005.drop(target_uci_col, axis = 1)
aqi_df_2005_predictors = aqi_df_2005_predictors.reset_index(drop=True)

################### 2004: Source Dataset, 2005: Training Set [Training and Testing Set] ###################
################ Standardize the dataset ################
ss = StandardScaler()

# columns_uci = aqi_df_2004_predictors.columns
# aqi_df_2004_predictors[columns_uci] = ss.fit_transform(aqi_df_2004_predictors[columns_uci])
# aqi_df_2005_predictors[columns_uci] = ss.fit_transform(aqi_df_2005_predictors[columns_uci])

#################### Renaming features and target ####################

italianAQ_train_df_y = aqi_df_2005_target
italianAQ_train_df_X = aqi_df_2005_predictors

italianAQ_source_df_y = aqi_df_2004_target
italianAQ_source_df_X = aqi_df_2004_predictors

################## Split into target and test dataset ###################
def TimeSeriesTrainTestSplit(X, y, test_size):

        test_index = int(len(X)*(1-test_size))

        X_train = X.iloc[:test_index]
        y_train = y.iloc[:test_index]
        X_test = X.iloc[test_index:]
        y_test = y.iloc[test_index:]
        return X_train, y_train, X_test, y_test

italianAQ_test_df_X, italianAQ_test_df_y, italianAQ_tgt_df_X, italianAQ_tgt_df_y = TimeSeriesTrainTestSplit(italianAQ_train_df_X,italianAQ_train_df_y, 0.02)

# italianAQ_tgt_df = italianAQ_tgt_df_X
# italianAQ_tgt_df = italianAQ_tgt_df.reset_index(drop=True)

italianAQ_tgt_df = pd.concat([italianAQ_tgt_df_X, italianAQ_tgt_df_y], axis=1, sort= False)
italianAQ_tgt_df = italianAQ_tgt_df.reset_index(drop=True)
# italianAQ_tgt_df.index = italianAQ_tgt_df.index + 1

italianAQ_test_df = pd.concat([italianAQ_test_df_X, italianAQ_test_df_y], axis=1, sort= False)
italianAQ_test_df = italianAQ_test_df.reset_index(drop=True)

italianAQ_source_df = pd.concat([italianAQ_source_df_X, italianAQ_source_df_y], axis=1, sort= False)
italianAQ_source_df = italianAQ_source_df.reset_index(drop=True)

################################## Importance Sampling ######################################################
italianAQ_source_df["ManDis"] = ""


italianAQ_tgt_df_mean = []
# prow = italianAQ_tgt_df.mean().tolist()
italianAQ_tgt_df_mean = italianAQ_tgt_df.mean().tolist()
# italianAQ_tgt_df_mean = [prow.CO_GT, prow.NMHC_GT, prow.C6H6_GT, prow.NOx_GT, prow.T, prow.RH, prow.AH, prow.O3]

rowidx = 0

for row in italianAQ_source_df.itertuples():
    row_list =[row.CO_GT, row.NMHC_GT, row.C6H6_GT, row.NOx_GT, row.T, row.RH, row.AH, row.O3]
    
    man_dis = 0
    for i in range(0, len(row_list)):
        tempval = italianAQ_tgt_df_mean[i] - row_list[i]
        man_dis = man_dis + abs(tempval)
    
#     print("Mandis Value:", man_dis)
    italianAQ_source_df.loc[rowidx,"ManDis"] = man_dis
    rowidx = rowidx + 1

italianAQ_source_df = italianAQ_source_df.sort_values(by =['ManDis'])
italianAQ_source_df = italianAQ_source_df.head(1778) 
italianAQ_source_df = italianAQ_source_df.drop(['ManDis'], axis =1)
italianAQ_source_df = italianAQ_source_df.reset_index(drop=True)

############################ Split again into target and features ############################

target_column_italianAQ = ['O3']

italianAQ_tgt_df_y = italianAQ_tgt_df[target_column_italianAQ]
italianAQ_tgt_df_X = italianAQ_tgt_df.drop(target_column_italianAQ, axis = 1)

italianAQ_test_df_y = italianAQ_test_df[target_column_italianAQ]
italianAQ_test_df_X = italianAQ_test_df.drop(target_column_italianAQ, axis = 1)

italianAQ_source_df_y = italianAQ_source_df[target_column_italianAQ]
italianAQ_source_df_X = italianAQ_source_df.drop(target_column_italianAQ, axis = 1)


columns_italianAQ = italianAQ_tgt_df_X.columns
italianAQ_tgt_df_X[columns_italianAQ] = ss.fit_transform(italianAQ_tgt_df_X[columns_italianAQ])
italianAQ_test_df_X[columns_italianAQ] = ss.fit_transform(italianAQ_test_df_X[columns_italianAQ])
italianAQ_source_df_X[columns_italianAQ] = ss.fit_transform(italianAQ_source_df_X[columns_italianAQ])

# italianAQ_source_df_X = italianAQ_source_df

print("Target: ",italianAQ_tgt_df_X.shape)
print("Test: ",italianAQ_test_df_X.shape)
print("Source: ",italianAQ_source_df_X.shape)

############### Merging the datasets ##########################################
italianAQ_X_df = pd.concat([italianAQ_tgt_df_X, italianAQ_source_df_X], ignore_index=True)
italianAQ_y_df = pd.concat([italianAQ_tgt_df_y, italianAQ_source_df_y], ignore_index=True)

italianAQ_np_train_X = italianAQ_X_df.to_numpy()
italianAQ_np_train_y = italianAQ_y_df.to_numpy()

italianAQ_np_test_X = italianAQ_test_df_X.to_numpy()
italianAQ_np_test_y = italianAQ_test_df_y.to_numpy()

italianAQ_np_train_y_list = italianAQ_np_train_y.ravel()
italianAQ_np_test_y_list = italianAQ_np_test_y.ravel()

src_size_italianAQ = len(italianAQ_source_df_y)
tgt_size_italianAQ = len(italianAQ_tgt_df_y)

src_idx = np.arange(start=0, stop=(src_size_italianAQ - 1), step=1)
tgt_idx = np.arange(start=src_size_italianAQ, stop=((src_size_italianAQ + tgt_size_italianAQ) - 1), step=1)

########################### Transfer Learning Italian AQ #####################################################
from sklearn.ensemble import AdaBoostRegressor

def get_estimator(**kwargs):
    return DecisionTreeRegressor(max_depth = 6)

kwargs_TwoTrAda = {'steps': 30,
                    'fold': 10,
                  'learning_rate': 0.1}



print("Transfer Learning (M + H, L)")
print("-------------------------------------------")

r2scorelist_AdaTL_italianAQ = []
rmselist_AdaTL_italianAQ = []

r2scorelist_Ada_italianAQ = []
rmselist_Ada_italianAQ = []

r2scorelist_KMM_italianAQ = []
rmselist_KMM_italianAQ = []

r2scorelist_GBRTL_italianAQ = []
rmselist_GBRTL_italianAQ = []

r2scorelist_GBR_italianAQ = []
rmselist_GBR_italianAQ = []

r2scorelist_TwoTrAda_italianAQ = []
rmselist_TwoTrAda_italianAQ = []

r2scorelist_stradaboost_italianAQ = []
rmselist_stradaboost_italianAQ = []


kfold = KFold(n_splits = 10, random_state = 42, shuffle=False)

for x in range(0, 10):

    ################### STrAdaBoost ###################
    print("STrAdaBoost")
    from two_TrAdaBoostR2 import TwoStageTrAdaBoostR2

#     sample_size = [len(italianAQ_tgt_df_X), len(italianAQ_source_df_X)]
    sample_size = [45, 1778]
    n_estimators = 100
    steps = 30
    fold = 10
    random_state = np.random.RandomState(1)


    model_stradaboost_italianAQ = TwoStageTrAdaBoostR2(DecisionTreeRegressor(max_depth = 6),
                        n_estimators = n_estimators, sample_size = sample_size,
                        steps = steps, fold = fold, random_state = random_state)


    model_stradaboost_italianAQ.fit(italianAQ_np_train_X, italianAQ_np_train_y_list)
    y_pred_stradaboost_italianAQ = model_stradaboost_italianAQ.predict(italianAQ_np_test_X)


    mse_stradaboost_italianAQ = sqrt(mean_squared_error(italianAQ_np_test_y, y_pred_stradaboost_italianAQ))
    rmselist_stradaboost_italianAQ.append(mse_stradaboost_italianAQ)

    r2_score_stradaboost_italianAQ = pearsonr(italianAQ_np_test_y_list, y_pred_stradaboost_italianAQ)
    r2_score_stradaboost_italianAQ = (r2_score_stradaboost_italianAQ[0])**2
    r2scorelist_stradaboost_italianAQ.append(r2_score_stradaboost_italianAQ)



with open('italianAQ_rmse_stradaboost.txt', 'w') as italianAQ_handle_rmse:
    italianAQ_handle_rmse.write("\n\nSTrAdaBoost Active Sampling:\n ")
    italianAQ_handle_rmse.writelines("%s\n" % ele for ele in rmselist_stradaboost_italianAQ)


with open('italianAQ_r2_stradaboost.txt', 'w') as italianAQ_handle_r2:
    italianAQ_handle_r2.write("\n\nSTrAdaBoost Active Sampling:\n ")
    italianAQ_handle_r2.writelines("%s\n" % ele for ele in r2scorelist_stradaboost_italianAQ)


######################################################################################

print("-------------------------------------------")


Done uploading repositories
Second Upload Completed!!
Dataset after pre-processing: 
(9357, 10)
Target:  (45, 7)
Test:  (2202, 7)
Source:  (1778, 7)
Transfer Learning (M + H, L)
-------------------------------------------
STrAdaBoost
Inside STrAdaBoost.R2




STrAdaBoost
Inside STrAdaBoost.R2
STrAdaBoost
Inside STrAdaBoost.R2
STrAdaBoost
Inside STrAdaBoost.R2
STrAdaBoost
Inside STrAdaBoost.R2
STrAdaBoost
Inside STrAdaBoost.R2
STrAdaBoost
Inside STrAdaBoost.R2
STrAdaBoost
Inside STrAdaBoost.R2
STrAdaBoost
Inside STrAdaBoost.R2
STrAdaBoost
Inside STrAdaBoost.R2
-------------------------------------------


In [36]:
# from two_TrAdaBoostR2 import TwoStageTrAdaBoostR2 ##STrAdaBoost.R2
# from TwoStageTrAdaBoostR2 import TwoStageTrAdaBoostR2 ##two-stage TrAdaBoost.R2

import pandas as pd
import sys
import numpy as np
from pandas import DataFrame
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import AdaBoostRegressor

from keras.models import Sequential, load_model, Model
from keras.layers import Input, Dense, Activation, Conv2D, Dropout, Flatten
from keras import optimizers, utils, initializers, regularizers
import keras.backend as K

from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler #Importing the StandardScaler

from itertools import combinations

import matplotlib.pyplot as plt
import seaborn as sns

from scipy.stats.stats import pearsonr
from math import sqrt

#Geo plotting libraries
import geopandas as gdp
from matplotlib.colors import ListedColormap
# import geoplot as glpt

import xgboost as xgb
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn import linear_model
from keras.models import Sequential
from keras.callbacks import ModelCheckpoint
from sklearn.ensemble import GradientBoostingRegressor

from sklearn.model_selection import KFold
import matplotlib.lines as mlines

import statistics

from scipy.stats import rv_continuous
from scipy.stats import *

from statistics import mean
from sklearn.cluster import KMeans
from scipy.spatial import distance
from sklearn.model_selection import KFold


######################## Instance Transfer repositories ###################################
from adapt.instance_based import TwoStageTrAdaBoostR2

print("Repositories uploaded!!")

from adapt.instance_based import TrAdaBoost, TrAdaBoostR2, TwoStageTrAdaBoostR2
from sklearn.model_selection import GridSearchCV
from adapt.instance_based import KMM

print("Second Upload Completed!!")

##########################################################################################
def clean_dataset(df):
    assert isinstance(df, pd.DataFrame) #"df needs to be a pd.DataFrame"
    df.dropna(inplace=True)
    indices_to_keep = ~df.isin([np.nan, np.inf, -np.inf]).any(1)
    return df[indices_to_keep].astype(np.float64)

attrib = pd.read_csv('Scientific_data/Communities/attributes.csv', delim_whitespace = True)
communities_df = pd.read_csv("Scientific_data/Communities/communities.data", names = attrib['attributes'])

communities_df = communities_df.drop(columns=['state','county',
                          'community','communityname',
                          'fold'], axis=1)

################## All missing columns were dropped ##################
communities_df = communities_df.replace('?', np.nan)
feat_miss = communities_df.columns[communities_df.isnull().any()]

communities_df.drop(feat_miss, axis = 1)
print(communities_df.shape)

################## Correlation ==> Column: PctEmploy ##################
# print("The correlation matrix is: ")
# communities_df_corr = communities_df.corr()['ViolentCrimesPerPop'].abs().sort_values()
# print(communities_df_corr.to_string()) ### Helps to print the entire series

################## To find where to split the data ##################
print('Min: ', communities_df['PctEmploy'].min())
print('Max: ', communities_df['PctEmploy'].max())


################### Processing ######################################

ss = StandardScaler()

drop_col_communities = ['PctEmploy']

communities_train_df = communities_df.loc[(communities_df['PctEmploy'] <= 0.42)]
communities_train_df = communities_train_df.drop(drop_col_communities, axis = 1)
communities_train_df = communities_train_df.reset_index(drop=True)
communities_train_df = clean_dataset(communities_train_df)
communities_train_df = communities_train_df.reset_index(drop=True)
print("Training Set: ",communities_train_df.shape)

communities_source1_df = communities_df.loc[(communities_df['PctEmploy'] > 0.42) & (communities_df['PctEmploy'] <= 0.58)]
communities_source1_df = communities_source1_df.drop(drop_col_communities, axis = 1)
communities_source1_df = communities_source1_df.reset_index(drop=True)
communities_source1_df = clean_dataset(communities_source1_df)
communities_source1_df = communities_source1_df.reset_index(drop=True)
print("Source Set 1: ",communities_source1_df.shape)

communities_source2_df = communities_df.loc[(communities_df['PctEmploy'] > 0.58)]
communities_source2_df = communities_source2_df.drop(drop_col_communities, axis = 1)
communities_source2_df = communities_source2_df.reset_index(drop=True)
communities_source2_df = clean_dataset(communities_source2_df)
communities_source2_df = communities_source2_df.reset_index(drop=True)
print("Source Set 2: ",communities_source2_df.shape)

communities_source_df = pd.concat([communities_source1_df, communities_source2_df], ignore_index=True)
print("Final Source Set: ",communities_source_df.shape)


#################### Splitting into features and target ####################
target_column_communities = ['ViolentCrimesPerPop']

communities_train_df_y = communities_train_df[target_column_communities]
communities_train_df_X = communities_train_df.drop(target_column_communities, axis = 1)
communities_cols = communities_train_df_X.columns
communities_train_df_X[communities_cols] = ss.fit_transform(communities_train_df_X[communities_cols])


communities_source_df_y = communities_source_df[target_column_communities]
communities_source_df_X = communities_source_df.drop(target_column_communities, axis = 1)
communities_cols = communities_source_df_X.columns
communities_source_df_X[communities_cols] = ss.fit_transform(communities_source_df_X[communities_cols])


########################### Transfer Learning communities #####################################################
from sklearn.ensemble import AdaBoostRegressor

def get_estimator(**kwargs):
    return DecisionTreeRegressor(max_depth = 6)

kwargs_TwoTrAda = {'steps': 30,
                    'fold': 10,
                  'learning_rate': 0.1}



print("Adaboost.R2 Transfer Learning (L + M, H)")
print("-------------------------------------------")

r2scorelist_AdaTL_communities = []
rmselist_AdaTL_communities = []

r2scorelist_Ada_communities = []
rmselist_Ada_communities = []

r2scorelist_KMM_communities = []
rmselist_KMM_communities = []

r2scorelist_GBRTL_communities = []
rmselist_GBRTL_communities = []

r2scorelist_GBR_communities = []
rmselist_GBR_communities = []

r2scorelist_TwoTrAda_communities = []
rmselist_TwoTrAda_communities = []

r2scorelist_stradaboost_communities = []
rmselist_stradaboost_communities = []


kfold = KFold(n_splits = 10, random_state = 42, shuffle=False)

for train_ix, test_ix in kfold.split(communities_train_df_X):
    ############### get data ###############
    communities_test_df_X, communities_tgt_df_X  = communities_train_df_X.iloc[train_ix], communities_train_df_X.iloc[test_ix] #### Make it opposite, so target size is small.
    communities_test_df_y, communities_tgt_df_y  = communities_train_df_y.iloc[train_ix], communities_train_df_y.iloc[test_ix] #### Make it opposite, so target size is small.

    print(communities_tgt_df_X.shape, communities_test_df_X.shape)

    ############### Merging the datasets ##########################################
    communities_X_df = pd.concat([communities_tgt_df_X, communities_source_df_X], ignore_index=True)
    communities_y_df = pd.concat([communities_tgt_df_y, communities_source_df_y], ignore_index=True)

    communities_np_train_X = communities_X_df.to_numpy()
    communities_np_train_y = communities_y_df.to_numpy()

    communities_np_test_X = communities_test_df_X.to_numpy()
    communities_np_test_y = communities_test_df_y.to_numpy()

    communities_np_train_y_list = communities_np_train_y.ravel()
    communities_np_test_y_list = communities_np_test_y.ravel()

    src_size_communities = len(communities_source_df_y)
    tgt_size_communities = len(communities_tgt_df_y)

    src_idx = np.arange(start = 0, stop = (src_size_communities - 1), step = 1)
    tgt_idx = np.arange(start = src_size_communities, stop = ((src_size_communities + tgt_size_communities) - 1), step=1)


    ################### AdaBoost Tl ###################
    model_AdaTL_communities = AdaBoostRegressor(DecisionTreeRegressor(max_depth = 6), learning_rate = 0.1, n_estimators = 100)
    model_AdaTL_communities.fit(communities_np_train_X, communities_np_train_y_list)

    y_pred_AdaTL_communities = model_AdaTL_communities.predict(communities_np_test_X)

    mse_AdaTL_communities = sqrt(mean_squared_error(communities_np_test_y, y_pred_AdaTL_communities))
    rmselist_AdaTL_communities.append(mse_AdaTL_communities)

    r2_score_AdaTL_communities = pearsonr(communities_np_test_y_list, y_pred_AdaTL_communities)
    r2_score_AdaTL_communities = (r2_score_AdaTL_communities[0])**2
    r2scorelist_AdaTL_communities.append(r2_score_AdaTL_communities)


    ################### AdaBoost ###################
    model_Ada_communities = AdaBoostRegressor(DecisionTreeRegressor(max_depth = 6), learning_rate = 0.1, n_estimators = 100)
    model_Ada_communities.fit(communities_tgt_df_X, communities_tgt_df_y)

    y_pred_ada_communities = model_Ada_communities.predict(communities_np_test_X)

    mse_Ada_communities = sqrt(mean_squared_error(communities_np_test_y, y_pred_ada_communities))
    rmselist_Ada_communities.append(mse_Ada_communities)

    r2_score_Ada_communities = pearsonr(communities_np_test_y_list, y_pred_ada_communities)
    r2_score_Ada_communities = (r2_score_Ada_communities[0])**2
    r2scorelist_Ada_communities.append(r2_score_Ada_communities)


    ################### GBRTL ###################
    model_GBRTL_communities = GradientBoostingRegressor(learning_rate = 0.1, max_depth = 6, n_estimators = 100, subsample = 0.5)
    model_GBRTL_communities.fit(communities_np_train_X, communities_np_train_y_list)

    y_pred_GBRTL_communities = model_GBRTL_communities.predict(communities_test_df_X) ##Using dataframe instead of the numpy matrix

    mse_GBRTL_communities = sqrt(mean_squared_error(communities_np_test_y, y_pred_GBRTL_communities))
    rmselist_GBRTL_communities.append(mse_GBRTL_communities)

    r2_score_GBRTL_communities = pearsonr(communities_np_test_y_list, y_pred_GBRTL_communities)
    r2_score_GBRTL_communities = (r2_score_GBRTL_communities[0])**2
    r2scorelist_GBRTL_communities.append(r2_score_GBRTL_communities)


    ################### GBR ###################
    model_GBR_communities = GradientBoostingRegressor(learning_rate = 0.1, max_depth = 6, n_estimators = 100, subsample=0.5)
    model_GBR_communities.fit(communities_tgt_df_X, communities_tgt_df_y)

    y_pred_GBR_communities = model_GBR_communities.predict(communities_test_df_X) ##Using dataframe instead of the numpy matrix

    mse_GBR_communities = sqrt(mean_squared_error(communities_np_test_y, y_pred_GBR_communities))
    rmselist_GBR_communities.append(mse_GBR_communities)

    r2_score_GBR_communities = pearsonr(communities_np_test_y_list, y_pred_GBR_communities)
    r2_score_GBR_communities = (r2_score_GBR_communities[0])**2
    r2scorelist_GBR_communities.append(r2_score_GBR_communities)


    ################### STrAdaBoost ###################
    from two_TrAdaBoostR2 import TwoStageTrAdaBoostR2

    sample_size = [len(communities_tgt_df_X), len(communities_source_df_X)]
    n_estimators = 100
    steps = 30
    fold = 10
    random_state = np.random.RandomState(1)


    model_stradaboost_communities = TwoStageTrAdaBoostR2(DecisionTreeRegressor(max_depth = 6),
                        n_estimators = n_estimators, sample_size = sample_size,
                        steps = steps, fold = fold, random_state = random_state)


    model_stradaboost_communities.fit(communities_np_train_X, communities_np_train_y_list)
    y_pred_stradaboost_communities = model_stradaboost_communities.predict(communities_np_test_X)


    mse_stradaboost_communities = sqrt(mean_squared_error(communities_np_test_y, y_pred_stradaboost_communities))
    rmselist_stradaboost_communities.append(mse_stradaboost_communities)

    r2_score_stradaboost_communities = pearsonr(communities_np_test_y_list, y_pred_stradaboost_communities)
    r2_score_stradaboost_communities = (r2_score_stradaboost_communities[0])**2
    r2scorelist_stradaboost_communities.append(r2_score_stradaboost_communities)



with open('communities_rmse.txt', 'w') as communities_handle_rmse:
    communities_handle_rmse.write("AdaBoost TL:\n ")
    communities_handle_rmse.writelines("%s\n" % ele for ele in rmselist_AdaTL_communities)

    communities_handle_rmse.write("\n\nAdaBoost:\n ")
    communities_handle_rmse.writelines("%s\n" % ele for ele in rmselist_Ada_communities)

    communities_handle_rmse.write("\n\nGBRT:\n ")
    communities_handle_rmse.writelines("%s\n" % ele for ele in rmselist_GBRTL_communities)

    communities_handle_rmse.write("\n\nGBR:\n ")
    communities_handle_rmse.writelines("%s\n" % ele for ele in rmselist_GBR_communities)

    communities_handle_rmse.write("\n\nSTrAdaBoost:\n ")
    communities_handle_rmse.writelines("%s\n" % ele for ele in rmselist_stradaboost_communities)


with open('communities_r2.txt', 'w') as communities_handle_r2:
    communities_handle_r2.write("AdaBoost TL:\n ")
    communities_handle_r2.writelines("%s\n" % ele for ele in r2scorelist_AdaTL_communities)

    communities_handle_r2.write("\n\nAdaBoost:\n ")
    communities_handle_r2.writelines("%s\n" % ele for ele in r2scorelist_Ada_communities)

    communities_handle_r2.write("\n\nGBR:\n ")
    communities_handle_r2.writelines("%s\n" % ele for ele in r2scorelist_GBR_communities)

    communities_handle_r2.write("\n\nSTrAdaBoost:\n ")
    communities_handle_r2.writelines("%s\n" % ele for ele in r2scorelist_stradaboost_communities)


######################################################################################
print("-------------------------------------------")


Repositories uploaded!!
Second Upload Completed!!
(1994, 123)
Min:  0.0
Max:  1.0
Training Set:  (107, 122)
Source Set 1:  (136, 122)
Source Set 2:  (76, 122)
Final Source Set:  (212, 122)
Adaboost.R2 Transfer Learning (L + M, H)
-------------------------------------------
(11, 121) (96, 121)


  return f(**kwargs)
  return f(**kwargs)


Inside STrAdaBoost.R2
(11, 121) (96, 121)


  return f(**kwargs)
  return f(**kwargs)


Inside STrAdaBoost.R2
(11, 121) (96, 121)


  return f(**kwargs)
  return f(**kwargs)


Inside STrAdaBoost.R2
(11, 121) (96, 121)


  return f(**kwargs)
  return f(**kwargs)


Inside STrAdaBoost.R2
(11, 121) (96, 121)


  return f(**kwargs)
  return f(**kwargs)


Inside STrAdaBoost.R2
(11, 121) (96, 121)


  return f(**kwargs)
  return f(**kwargs)


Inside STrAdaBoost.R2
(11, 121) (96, 121)


  return f(**kwargs)
  return f(**kwargs)


Inside STrAdaBoost.R2
(10, 121) (97, 121)


  return f(**kwargs)
  return f(**kwargs)


Inside STrAdaBoost.R2
(10, 121) (97, 121)


  return f(**kwargs)
  return f(**kwargs)


Inside STrAdaBoost.R2
(10, 121) (97, 121)


  return f(**kwargs)
  return f(**kwargs)


Inside STrAdaBoost.R2
-------------------------------------------
