In [25]:
# Let us make the imports for the entire code

import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import mean_absolute_error
import time

# Enable to start counting processing time
# start = time.time()

In [26]:
def best_output(row):
    # if downloadTime is different than 100 for T2 and T3, both have completed the download
    if ((row.downloadTimeT2!=100)&(row.downloadTimeT3!=100)):
        # the best output has smaller downloadTime
        if (row.downloadTimeT2<=row.downloadTimeT3):
            return 2
        else:
            return 1

    # if downloadTime is different than 100 for only one target, only one completes the download
    elif ((row.downloadTimeT2!=100)|(row.downloadTimeT3!=100)):
        # the best output has downloadTime other than 100 (completed download before simulation time ends)
        if (row.downloadTimeT2!=100):
            return 2
        else:
            return 1

    # if downloadTime = 100 for both T2 and T3, both targets does not complete download
    elif ((row.downloadTimeT2==100)&(row.downloadTimeT3==100)):
        # the best output has greater rxBytes
        if (row.rxBytesT2>=row.rxBytesT3):
            return 2
        else:
            return 1

In [27]:
# Here we want to select rxBytes data from the eNB indicated by best_output function
def rxbytes(row):
    if (row.best_output == 1):
        return int(row.rxBytesT3)
    else:
        return int(row.rxBytesT2)

In [28]:
# Here we read and organize CSV data

t2 = pd.read_csv('t2_OhBuildings_ComShadowing_Modificado', delimiter='\t')
t3 = pd.read_csv('t3_OhBuildings_ComShadowing_Modificado', delimiter='\t')

t2 = t2[t2.nRun.isin(t3.nRun)]
t2.reset_index(drop=True, inplace=True)
t3 = t3[t3.nRun.isin(t2.nRun)]
t3.reset_index(drop=True, inplace=True)

a3rsrp = pd.read_csv('A3RSRP_OhBuildings_ComShadowing_Modificado', delimiter='\t')
a2a4rsrq = pd.read_csv('A2A4RSRQ_OhBuildings_ComShadowing_Modificado', delimiter='\t')


# Guarantee that we utilize only seeds present in both datasets
t2_runs = t2.nRun
t3_runs = t3.nRun
a3rsrp_runs = a3rsrp.nRun
a2a4rsrq_runs = a2a4rsrq.nRun
valid_results = t2[t2.rsrp1==t3.rsrp1].nRun
valid_runs = set(t2_runs).intersection(t3_runs).intersection(a3rsrp_runs).intersection(a2a4rsrq_runs).intersection(valid_results)

t2 = t2[t2.nRun.isin(valid_runs)]
t3 = t3[t3.nRun.isin(valid_runs)]
a3rsrp = a3rsrp[a3rsrp.nRun.isin(valid_runs)]
a2a4rsrq = a2a4rsrq[a2a4rsrq.nRun.isin(valid_runs)]

t2.reset_index(drop=True, inplace=True)
t3.reset_index(drop=True, inplace=True)
a3rsrp.reset_index(drop=True, inplace=True)
a2a4rsrq.reset_index(drop=True, inplace=True)

# Combining datasets
data = t2
data = data.drop(['targetCellId', 'downloadTime', 'rxBytes'], axis=1)
data['downloadTimeT2'] = t2.downloadTime
data['downloadTimeT3'] = t3.downloadTime
data['rxBytesT2'] = t2.rxBytes
data['rxBytesT3'] = t3.rxBytes

In [29]:
# Data Pre-processing

# Applies our function defined above to obtain the best output
data['best_output'] = data.apply(best_output, axis=1)
# Applies our function to select which downloadTime will be used for regression
data['rxbytes'] = data.apply(rxbytes, axis=1)


# Sets data as inputs and labels
previsores = data[['rsrp1','rsrq1','rsrp2','rsrq2','rsrp3','rsrq3','previousrsrp1','previousrsrq1','previousrsrp2','previousrsrq2','previousrsrp3','previousrsrq3']]
previsores = previsores.values
label = (data[['rxbytes']]/15728640) * 100
label = label.values


# Scaling data
scaler_x = MinMaxScaler(feature_range=(0, 1))
previsores = scaler_x.fit_transform(previsores)
scaler_y = MinMaxScaler(feature_range=(0, 1))
label = scaler_y.fit_transform(label)

In [30]:
# Now we load our regressor, execute k-Fold, train and test our algorithm

# KNN
from sklearn import neighbors

# Applies StratifiedKFold with k = 5 and repeats process 33 times for statistical robustness
resultados33 = []

for i in range(33):
    kfold = StratifiedKFold(n_splits = 5, shuffle = True, random_state = i)
    resultados1 = []
    matriz1 = []
    for n_train, n_test in kfold.split(previsores, np.zeros(shape=(previsores.shape[0], 1))):
        # Train the KNN regressor 
        regressor = neighbors.KNeighborsRegressor(n_neighbors = 6)
        # Fitting and prediction 
        regressor.fit(previsores[n_train], label[n_train].ravel())
        previsoes = regressor.predict(previsores[n_test])
        # Applying the inverse scale
        valores_previsao = np.asarray(previsoes).reshape(-1,1)
        valores_previsao = scaler_y.inverse_transform(valores_previsao) 
        y_teste = label[n_test].tolist()
        y_teste = scaler_y.inverse_transform(label[n_test]) 
        # Calculating the mean absolute error (MAE)
        mae = mean_absolute_error(y_teste, valores_previsao)   
        resultados1.append(mae)
    # Appending all the steps
    resultados1 = np.asarray(resultados1)
    media = resultados1.mean()
    resultados33 = np.append(resultados33, media)
# Final results
resultados33 = np.asarray(resultados33)

# Enable to obtain processing time
# end = time.time()
# tempo = end - start

# Enable to display classification mean and standard deviation
resultados33.mean()
# resultados33.std()

6.000465762347874

In [31]:
# Now we load our regressor, execute k-Fold, train and test our algorithm

# MLP
from sklearn.neural_network import MLPRegressor

# Applies StratifiedKFold with k = 5 and repeats process 33 times for statistical robustness
resultados33 = []

for i in range(33):
    kfold = StratifiedKFold(n_splits = 5, shuffle = True, random_state = i)
    resultados1 = []
    matriz1 = []
    for n_train, n_test in kfold.split(previsores, np.zeros(shape=(previsores.shape[0], 1))):
        # Train the XGBoost regressor 
        regressor = MLPRegressor(
                                    activation = 'logistic',
                                    alpha = 0.001,
                                    hidden_layer_sizes = 4,
                                    learning_rate = 'invscaling',
                                    learning_rate_init = 0.25024695513504763,
                                    max_iter = 3600,
                                    solver = 'lbfgs',
                                )
        # Fitting and prediction 
        regressor.fit(previsores[n_train], label[n_train].ravel())
        previsoes = regressor.predict(previsores[n_test])
        # Applying the inverse scale
        valores_previsao = np.asarray(previsoes).reshape(-1,1)
        valores_previsao = scaler_y.inverse_transform(valores_previsao) 
        y_teste = label[n_test].tolist()
        y_teste = scaler_y.inverse_transform(label[n_test]) 
        # Calculating the mean absolute error (MAE)
        mae = mean_absolute_error(y_teste, valores_previsao)   
        resultados1.append(mae)
    # Appending all the steps
    resultados1 = np.asarray(resultados1)
    media = resultados1.mean()
    resultados33 = np.append(resultados33, media)
# Final results
resultados33 = np.asarray(resultados33)

# Enable to obtain processing time
# end = time.time()
# tempo = end - start

# Enable to display classification mean and standard deviation
resultados33.mean()
# resultados33.std()

6.348613652460654

In [32]:
# Now we load our regressor, execute k-Fold, train and test our algorithm

# Random Forest 
from sklearn.ensemble import RandomForestRegressor

# Applies StratifiedKFold with k = 5 and repeats process 33 times for statistical robustness
resultados33 = []

for i in range(33):
    kfold = StratifiedKFold(n_splits = 5, shuffle = True, random_state = i)
    resultados1 = []
    matriz1 = []
    for n_train, n_test in kfold.split(previsores, np.zeros(shape=(previsores.shape[0], 1))):
        # Train the Random Forest regressor 
        regressor = RandomForestRegressor(
                                            criterion = 'mse',
                                            max_depth = 6,
                                            max_features = 0.7,
                                            max_samples = 0.8,
                                            n_estimators = 94,
                                         )
        # Fitting and prediction 
        regressor.fit(previsores[n_train], label[n_train].ravel())
        previsoes = regressor.predict(previsores[n_test])
        # Applying the inverse scale
        valores_previsao = np.asarray(previsoes).reshape(-1,1)
        valores_previsao = scaler_y.inverse_transform(valores_previsao) 
        y_teste = label[n_test].tolist()
        y_teste = scaler_y.inverse_transform(label[n_test]) 
        # Calculating the mean absolute error (MAE)
        mae = mean_absolute_error(y_teste, valores_previsao)   
        resultados1.append(mae)
    # Appending all the steps
    resultados1 = np.asarray(resultados1)
    media = resultados1.mean()
    resultados33 = np.append(resultados33, media)
# Final results
resultados33 = np.asarray(resultados33)

# Enable to obtain processing time
# end = time.time()
# tempo = end - start

# Enable to display classification mean and standard deviation
resultados33.mean()
# resultados33.std()

6.001691047374916

In [33]:
# Now we load our regressor, execute k-Fold, train and test our algorithm

#Gradient Boosting Machine (GBM)
from sklearn.ensemble import GradientBoostingRegressor

# Applies StratifiedKFold with k = 5 and repeats process 33 times for statistical robustness
resultados33 = []

for i in range(33):
    kfold = StratifiedKFold(n_splits = 5, shuffle = True, random_state = i)
    resultados1 = []
    matriz1 = []
    for n_train, n_test in kfold.split(previsores, np.zeros(shape=(previsores.shape[0], 1))):
        # Train the GBM regressor 
        regressor = GradientBoostingRegressor(
                                                criterion = 'mse',
                                                learning_rate = 0.047744243757850054,
                                                max_depth = 4,
                                                max_features = 0.4,
                                                n_estimators = 120,
                                                subsample = 0.7,
                                                )
        # Fitting and prediction 
        regressor.fit(previsores[n_train], label[n_train].ravel())
        previsoes = regressor.predict(previsores[n_test])
        # Applying the inverse scale
        valores_previsao = np.asarray(previsoes).reshape(-1,1)
        valores_previsao = scaler_y.inverse_transform(valores_previsao) 
        y_teste = label[n_test].tolist()
        y_teste = scaler_y.inverse_transform(label[n_test]) 
        # Calculating the mean absolute error (MAE)
        mae = mean_absolute_error(y_teste, valores_previsao)   
        resultados1.append(mae)
    # Appending all the steps
    resultados1 = np.asarray(resultados1)
    media = resultados1.mean()
    resultados33 = np.append(resultados33, media)
# Final results
resultados33 = np.asarray(resultados33)

# Enable to obtain processing time
# end = time.time()
# tempo = end - start

# Enable to display classification mean and standard deviation
resultados33.mean()
# resultados33.std()

6.189748408142736

In [34]:
# Now we load our regressor, execute k-Fold, train and test our algorithm

#LightGBM
from lightgbm import LGBMRegressor


# Applies StratifiedKFold with k = 5 and repeats process 33 times for statistical robustness
resultados33 = []

for i in range(33):
    kfold = StratifiedKFold(n_splits = 5, shuffle = True, random_state = i)
    resultados1 = []
    for n_train, n_test in kfold.split(previsores, np.zeros(shape=(previsores.shape[0], 1))):
        # Train the LightGBM regressor 
        regressor = lightgbm.LGBMRegressor(
                                            bagging_fraction = 0.4,
                                            eval_metric = 'mae',
                                            feature_fraction = 0.7,
                                            learning_rate = 0.033515974245406554,
                                            max_depth = 5,
                                            min_data_in_leaf = 36,
                                            n_estimators = 139,
                                            objective = 'regression_l1',
                                            )
        # Fitting and prediction 
        regressor.fit(previsores[n_train], label[n_train].ravel())
        previsoes = regressor.predict(previsores[n_test])
        # Applying the inverse scale
        valores_previsao = np.asarray(previsoes).reshape(-1,1)
        valores_previsao = scaler_y.inverse_transform(valores_previsao) 
        y_teste = label[n_test].tolist()
        y_teste = scaler_y.inverse_transform(label[n_test]) 
        # Calculating the mean absolute error (MAE)
        mae = mean_absolute_error(y_teste, valores_previsao)   
        resultados1.append(mae)
    # Appending all the steps
    resultados1 = np.asarray(resultados1)
    media = resultados1.mean()
    resultados33 = np.append(resultados33, media)
# Final results
resultados33 = np.asarray(resultados33)

# Enable to obtain processing time
# end = time.time()
# tempo = end - start

# Enable to display classification mean and standard deviation
resultados33.mean()
# resultados33.std()

5.088453123278241

In [35]:
# Now we load our regressor, execute k-Fold, train and test our algorithm

# XGBoost
import xgboost as xgb

# Applies StratifiedKFold with k = 5 and repeats process 33 times for statistical robustness
resultados33 = []

for i in range(33):
    kfold = StratifiedKFold(n_splits = 5, shuffle = True, random_state = i)
    resultados1 = []
    for n_train, n_test in kfold.split(previsores, np.zeros(shape=(previsores.shape[0], 1))):
        # Train the XGBoost regressor 
        regressor = xgb.XGBRegressor (
                                        booster = 'gbtree',
                                        colsample_bylevel = 0.6,
                                        colsample_bynode = 0.4,
                                        colsample_bytree = 0.5,
                                        eval_metric = 'mae',
                                        learning_rate = 0.26414442974612706,
                                        max_depth = 8,
                                        n_estimators = 120,
                                        objective = 'reg:logistic',
                                    )
        # Fitting and prediction 
        regressor.fit(previsores[n_train], label[n_train].ravel())
        previsoes = regressor.predict(previsores[n_test])
        # Applying the inverse scale
        valores_previsao = np.asarray(previsoes).reshape(-1,1)
        valores_previsao = scaler_y.inverse_transform(valores_previsao) 
        y_teste = label[n_test].tolist()
        y_teste = scaler_y.inverse_transform(label[n_test]) 
        # Calculating the mean absolute error (MAE)
        mae = mean_absolute_error(y_teste, valores_previsao)   
        resultados1.append(mae)
    # Appending all the steps
    resultados1 = np.asarray(resultados1)
    media = resultados1.mean()
    resultados33 = np.append(resultados33, media)
# Final results
resultados33 = np.asarray(resultados33)

# Enable to obtain processing time
# end = time.time()
# tempo = end - start

# Enable to display classification mean and standard deviation
resultados33.mean()
# resultados33.std()

5.884855602155836