In [19]:
# Let us make the imports for the entire code

import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error
import time
# Enable to start counting processing time
# start = time.time()

In [20]:

# Here we read and organize CSV data

t2 = pd.read_csv('t2_OkumuraHata_Modificado', delimiter='\t')
t3 = pd.read_csv('t3_OkumuraHata_Modificado', delimiter='\t')
min_download = pd.read_csv('file1.csv', delimiter=',')

# Guarantee that we utilize only seeds present in both datasets
t2 = t2[t2.nRun.isin(t3.nRun)]
t3 = t3[t3.nRun.isin(t2.nRun)]
t2 = t2.reset_index(drop=True)
t3 = t3.reset_index(drop=True)




# Combining datasets
data = t2
data = data.drop(['targetCellId', 'downloadTime', 'rxBytes'], axis=1)
data['downloadTimeT2'] = t2.downloadTime
data['downloadTimeT3'] = t3.downloadTime
data['downloadTime'] = min_download.downloadTimeT2
data['rxBytesT2'] = t2.rxBytes
data['rxBytesT3'] = t3.rxBytes

In [21]:
# Data Pre-processing


# Sets data as inputs and labels
previsores = data[['rsrp1','rsrq1','rsrp2','rsrq2','rsrp3','rsrq3','previousrsrp1','previousrsrq1','previousrsrp2','previousrsrq2','previousrsrp3','previousrsrq3']]
previsores = previsores.values
label = data[['downloadTime']] 
label = label.values

# Scaling data
scaler_x = MinMaxScaler(feature_range=(0, 1))
previsores = scaler_x.fit_transform(previsores)
scaler_y = MinMaxScaler(feature_range=(0, 1))
label = scaler_y.fit_transform(label)

In [22]:
# Now we load our regressor, execute k-Fold, train and test our algorithm

# KNN
from sklearn import neighbors

# Applies StratifiedKFold with k = 5 and repeats process 33 times for statistical robustness
resultados33 = []

for i in range(33):
    kfold = StratifiedKFold(n_splits = 5, shuffle = True, random_state = i)
    resultados1 = []
    for n_train, n_test in kfold.split(previsores, np.zeros(shape=(previsores.shape[0], 1))):
        # Train the KNN regressor 
        regressor = neighbors.KNeighborsRegressor(n_neighbors = 4)
        # Fitting and prediction 
        regressor.fit(previsores[n_train], label[n_train].ravel())
        previsoes = regressor.predict(previsores[n_test])
        # Applying the inverse scale
        valores_previsao = np.asarray(previsoes).reshape(-1,1)
        valores_previsao = scaler_y.inverse_transform(valores_previsao) 
        y_teste = label[n_test].tolist()
        y_teste = scaler_y.inverse_transform(label[n_test]) 
        # Calculating the mean absolute error (MAE)
        mae = mean_absolute_error(y_teste, valores_previsao)   
        resultados1.append(mae)
    # Appending all the steps
    resultados1 = np.asarray(resultados1)
    media = resultados1.mean()
    resultados33 = np.append(resultados33, media)
# Final results
resultados33 = np.asarray(resultados33)

# Enable to obtain processing time
# end = time.time()
# tempo = end - start

# Enable to display classification mean and standard deviation
resultados33.mean()
# resultados33.std()

0.12791765151515147

In [23]:
# Now we load our regressor, execute k-Fold, train and test our algorithm

# MLP
from sklearn.neural_network import MLPRegressor

# Applies StratifiedKFold with k = 5 and repeats process 33 times for statistical robustness
resultados33 = []

for i in range(33):
    kfold = StratifiedKFold(n_splits = 5, shuffle = True, random_state = i)
    resultados1 = []
    for n_train, n_test in kfold.split(previsores, np.zeros(shape=(previsores.shape[0], 1))):
        # Train the MLP regressor 
        regressor = MLPRegressor(activation = 'tanh',
                                 hidden_layer_sizes = 22,
                                 learning_rate = 'invscaling',
                                 learning_rate_init = 0.03026389988096674,
                                 max_iter = 5700,
                                 solver = 'lbfgs')
        # Fitting and prediction 
        regressor.fit(previsores[n_train], label[n_train].ravel())
        previsoes = regressor.predict(previsores[n_test])
        # Applying the inverse scale
        valores_previsao = np.asarray(previsoes).reshape(-1,1)
        valores_previsao = scaler_y.inverse_transform(valores_previsao) 
        y_teste = label[n_test].tolist()
        y_teste = scaler_y.inverse_transform(label[n_test]) 
        # Calculating the mean absolute error (MAE)
        mae = mean_absolute_error(y_teste, valores_previsao)   
        resultados1.append(mae)
    # Appending all the steps
    resultados1 = np.asarray(resultados1)
    media = resultados1.mean()
    resultados33 = np.append(resultados33, media)
# Final results
resultados33 = np.asarray(resultados33)

# Enable to obtain processing time
# end = time.time()
# tempo = end - start

# Enable to display classification mean and standard deviation
resultados33.mean()
# resultados33.std()

0.27016565398503

In [24]:
# Now we load our regressor, execute k-Fold, train and test our algorithm

# Random Forest
from sklearn.ensemble import RandomForestRegressor

# Applies StratifiedKFold with k = 5 and repeats process 33 times for statistical robustness
resultados33 = []

for i in range(33):
    kfold = StratifiedKFold(n_splits = 5, shuffle = True, random_state = i)
    resultados1 = []
    for n_train, n_test in kfold.split(previsores, np.zeros(shape=(previsores.shape[0], 1))):
        # Train the Random Forest regressor 
        regressor = RandomForestRegressor(criterion = 'mse',
                                          max_depth = 9,
                                          max_features = 0.4,
                                          n_estimators = 106) 
        # Fitting and prediction 
        regressor.fit(previsores[n_train], label[n_train].ravel())
        previsoes = regressor.predict(previsores[n_test])
        # Applying the inverse scale
        valores_previsao = np.asarray(previsoes).reshape(-1,1)
        valores_previsao = scaler_y.inverse_transform(valores_previsao) 
        y_teste = label[n_test].tolist()
        y_teste = scaler_y.inverse_transform(label[n_test]) 
        # Calculating the mean absolute error (MAE)
        mae = mean_absolute_error(y_teste, valores_previsao)   
        resultados1.append(mae)
    # Appending all the steps
    resultados1 = np.asarray(resultados1)
    media = resultados1.mean()
    resultados33 = np.append(resultados33, media)
# Final results
resultados33 = np.asarray(resultados33)

# Enable to obtain processing time
# end = time.time()
# tempo = end - start

# Enable to display classification mean and standard deviation
resultados33.mean()
# resultados33.std()

0.12038365699826047

In [25]:
# Now we load our regressor, execute k-Fold, train and test our algorithm

# Gradient Boosting Machine (GBM)
from sklearn.ensemble import GradientBoostingRegressor

# Applies StratifiedKFold with k = 5 and repeats process 33 times for statistical robustness
resultados33 = []

for i in range(33):
    kfold = StratifiedKFold(n_splits = 5, shuffle = True, random_state = i)
    resultados1 = []
    for n_train, n_test in kfold.split(previsores, np.zeros(shape=(previsores.shape[0], 1))):
        # Train the GBM regressor 
        regressor = GradientBoostingRegressor(criterion = 'mse',
                                              learning_rate = 0.06856362171992,
                                              max_depth = 6,
                                              max_features = 0.5,
                                              n_estimators = 84,
                                              subsample = 0.8)
        # Fitting and prediction 
        regressor.fit(previsores[n_train], label[n_train].ravel())
        previsoes = regressor.predict(previsores[n_test])
        # Applying the inverse scale
        valores_previsao = np.asarray(previsoes).reshape(-1,1)
        valores_previsao = scaler_y.inverse_transform(valores_previsao) 
        y_teste = label[n_test].tolist()
        y_teste = scaler_y.inverse_transform(label[n_test]) 
        # Calculating the mean absolute error (MAE)
        mae = mean_absolute_error(y_teste, valores_previsao)   
        resultados1.append(mae)
    # Appending all the steps
    resultados1 = np.asarray(resultados1)
    media = resultados1.mean()
    resultados33 = np.append(resultados33, media)
# Final results
resultados33 = np.asarray(resultados33)

# Enable to obtain processing time
# end = time.time()
# tempo = end - start

# Enable to display classification mean and standard deviation
resultados33.mean()
# resultados33.std()

0.12507082676496703

In [26]:
# Now we load our regressor, execute k-Fold, train and test our algorithm

# LightGBM
import lightgbm

# Applies StratifiedKFold with k = 5 and repeats process 33 times for statistical robustness
resultados33 = []

for i in range(33):
    kfold = StratifiedKFold(n_splits = 5, shuffle = True, random_state = i)
    resultados1 = []
    for n_train, n_test in kfold.split(previsores, np.zeros(shape=(previsores.shape[0], 1))):
        # Train the LightGBM regressor 
        regressor = lightgbm.LGBMRegressor(objective = 'regression_l1',
                                           bagging_fraction = 0.8,
                                           eval_metric = 'mae',
                                           feature_fraction = 0.4,
                                           learning_rate = 0.08165872333050837,
                                           max_depth = 9,
                                           n_estimators = 148)
        # Fitting and prediction 
        regressor.fit(previsores[n_train], label[n_train].ravel())
        previsoes = regressor.predict(previsores[n_test])
        # Applying the inverse scale
        valores_previsao = np.asarray(previsoes).reshape(-1,1)
        valores_previsao = scaler_y.inverse_transform(valores_previsao) 
        y_teste = label[n_test].tolist()
        y_teste = scaler_y.inverse_transform(label[n_test]) 
        # Calculating the mean absolute error (MAE)
        mae = mean_absolute_error(y_teste, valores_previsao)   
        resultados1.append(mae)
    # Appending all the steps
    resultados1 = np.asarray(resultados1)
    media = resultados1.mean()
    resultados33 = np.append(resultados33, media)
# Final results
resultados33 = np.asarray(resultados33)

# Enable to obtain processing time
# end = time.time()
# tempo = end - start

# Enable to display classification mean and standard deviation
resultados33.mean()
# resultados33.std()

0.11394650994318796

In [27]:
# Now we load our regressor, execute k-Fold, train and test our algorithm

# XGBoost 
import xgboost as xgb


# Applies StratifiedKFold with k = 5 and repeats process 33 times for statistical robustness
resultados33 = []

for i in range(33):
    kfold = StratifiedKFold(n_splits = 5, shuffle = True, random_state = i)
    resultados1 = []
    for n_train, n_test in kfold.split(previsores, np.zeros(shape=(previsores.shape[0], 1))):
        # Train the XGBoost regressor 
        regressor = xgb.XGBRegressor(colsample_bylevel = 0.7,
                                     colsample_bynode = 0.8,
                                     colsample_bytree = 0.5,
                                     eval_metric = 'mae',
                                     learning_rate = 0.04638617378157029,
                                     max_depth = 6,
                                     n_estimators = 174,
                                     objective = 'reg:squarederror')
        # Fitting and prediction 
        regressor.fit(previsores[n_train], label[n_train].ravel())
        previsoes = regressor.predict(previsores[n_test])
        # Applying the inverse scale
        valores_previsao = np.asarray(previsoes).reshape(-1,1)
        valores_previsao = scaler_y.inverse_transform(valores_previsao) 
        y_teste = label[n_test].tolist()
        y_teste = scaler_y.inverse_transform(label[n_test]) 
        # Calculating the mean absolute error (MAE)
        mae = mean_absolute_error(y_teste, valores_previsao)   
        resultados1.append(mae)
    # Appending all the steps
    resultados1 = np.asarray(resultados1)
    media = resultados1.mean()
    resultados33 = np.append(resultados33, media)
# Final results
resultados33 = np.asarray(resultados33)

# Enable to obtain processing time
# end = time.time()
# tempo = end - start

# Enable to display classification mean and standard deviation
resultados33.mean()
# resultados33.std()

0.12343192373472259