In [1]:
#Sci-kit
from sklearn.ensemble import RandomForestRegressor

#Torch
import torch.nn as nn
import torch.optim as optim
from skorch import NeuralNetRegressor

#xg-boost
import xgboost as xgb

# Random Forest Quantile
from sklearn_quantile import RandomForestQuantileRegressor

from visualization.visualize import *
from data.data_loader import loadDataParquet
from data.data_processing import processData, getDataProcessor, trainValTestSplit
from models.training import trainModels
from models.model import Model
from models.conformalprediction.quantile_regression import *
from models.conformalprediction.pinball import *
from models.neuralnetwork.architecture import *



In [2]:
dirParquet = "../data/intermediate/"
df = loadDataParquet(dirParquet)


In [3]:
### DATA PREPARATION ###
dependentCol = "UL_bitrate"

selectedFloatCols = ["Longitude", "Latitude", "Speed", "RSRP","RSRQ","SNR"]
selectedCatCols = ["CellID"]

processor = getDataProcessor(selectedFloatCols, selectedCatCols, applyScaler=True)
dataX, dataY = processData(df, selectedFloatCols, selectedCatCols, dependentCol, processor)
dataX.size


  X[column] = X[column].astype("object").fillna(np.nan).map(col_mapping)
  X[column] = X[column].astype("object").fillna(np.nan).map(col_mapping)
  X[column] = X[column].astype("object").fillna(np.nan).map(col_mapping)


230090

In [4]:
### DIVIDE INTO TRAINING, VALIDATION AND TEST ###
trainRatio = 0.75
validatioRatio = 0.15

xTrain, xVal, xTest, yTrain, yVal, yTest = trainValTestSplit(dataX, dataY, trainRatio, validatioRatio)

In [5]:
### NEURAL NET QUANTILE REGRESSOR ###
alpha = 0.1
lowerNet = NeuralNetRegressor(
    ThroughputPredictor,
    module__input_size=dataX.shape[1],  # Pass the input size to the module
    optimizer=optim.Adam,               # Optimizer
    criterion=PinballLoss(alpha/2),               # Loss function
    verbose=0,                          # Silence verbose output
    train_split=None                    # Disable internal train/val split, we'll use external CV
)
upperNet = NeuralNetRegressor(
    ThroughputPredictor,
    module__input_size=dataX.shape[1],  # Pass the input size to the module
    optimizer=optim.Adam,               # Optimizer
    criterion=PinballLoss(1-alpha/2),               # Loss function
    verbose=0,                          # Silence verbose output
    train_split=None                    # Disable internal train/val split, we'll use external CV
)
regularNet = NeuralNetRegressor(
    ThroughputPredictor,
    module__input_size=dataX.shape[1],  # Pass the input size to the module
    optimizer=optim.Adam,               # Optimizer
    criterion=PinballLoss(1-alpha/2),               # Loss function
    verbose=0,                          # Silence verbose output
    train_split=None                    # Disable internal train/val split, we'll use external CV
)
paramGridNetLower = {
    'lr': [0.01],
    'max_epochs': [100],
    'optimizer__weight_decay': [0.01],
    'batch_size': [128]
}
paramGridNetUpper = {
    'lr': [0.01],
    'max_epochs': [100],
    'optimizer__weight_decay': [0.01],
    'batch_size': [128]
}
paramGridNetRegular = {
    'lr': [0.01],
    'max_epochs': [100],
    'optimizer__weight_decay': [0.01],
    'batch_size': [128]
}
lowerScorer = pinballLossScorer(alpha/2)
upperScorer = pinballLossScorer(1-alpha/2)
lowerModel = Model(lowerNet, "Lower Bound Neural Network", paramGridNetLower, lowerScorer)
upperModel = Model(upperNet, "Upper Bound Neural Network", paramGridNetUpper, upperScorer)

quantileNeuralNetRegressor = QuantileRegressorNeuralNet([lowerModel, upperModel], alpha, "Neural Network Quantile")
conformalQuantileNeuralNetRegressor = ConformalizedQuantileRegressor(quantileNeuralNetRegressor)



In [6]:
### RANDOM FOREST QUANTILE REGRESSOR ###
alpha = 0.1
# paramGridRfq = {
#     # 'n_estimators': [50, 100, 200, 300],
#     # 'criterion': ['squared_error', 'absolute_error', 'friedman_mse', 'poisson'],
#     # 'max_depth': [None, 10, 20, 30, 40],
#     # 'min_samples_split': [2, 5, 10, 20],
#     # 'min_samples_leaf': [1, 2, 5, 10],
#     # 'min_weight_fraction_leaf': [0.0, 0.1, 0.2],
#     # 'max_features': ['sqrt', 'log2', None, 0.5, 1.0]
#     # 'n_estimators': [100],
#     # 'criterion': ['squared_error'],
#     # 'max_depth': [None, 10, 20],
#     # 'min_samples_split': [10],
#     # 'min_samples_leaf': [10],
#     # 'min_weight_fraction_leaf': [0.0, 0.1, 0.2],
#     # 'max_features': ['sqrt', 'log2', None]
#     'n_estimators': [100],
#     'criterion': ['squared_error'],
#     'max_depth': [10],
#     'min_samples_split': [10],
#     'min_samples_leaf': [10],
#     'min_weight_fraction_leaf': [0.1],
#     'max_features': ['log2']
# }
rfq = RandomForestQuantileRegressor(q = [alpha/2,1- alpha/2])
paramGridRfq = {
    'n_estimators': [100],
    'criterion': ['squared_error'],
    'max_depth': [10],
    'min_samples_split': [10],
    'min_samples_leaf': [10],
    'min_weight_fraction_leaf': [0.1],
    'max_features': ['log2']
}
doublePinballScorer = doublePinballLossScorer(alpha/2, 1-alpha/2)
rqfModel = Model(rfq, "Random Forest Quantile", paramGridRfq, doublePinballScorer)

quantileForestRegressor = QuantileRegressorRandomForest([rqfModel], alpha, "Random Forest Quantile")
conformalQuantileForestRegressor = ConformalizedQuantileRegressor(quantileForestRegressor)

In [7]:
### TRAINING ###
conformalQuantileRegressors = [conformalQuantileForestRegressor, conformalQuantileNeuralNetRegressor]
for conformalModel in conformalQuantileRegressors:
    conformalModel.fit(xTrain, yTrain, xVal, yVal, 2)

In [8]:
### EVALUATION ###
for conformalModel in conformalQuantileRegressors:
    print(f"{conformalModel.getQuantileRegressor().getName()} coverage: {conformalModel.getQuantileRegressor().getCoverageRatio(xTest, yTest)}")
    print(f"{conformalModel.getName()} coverage: {conformalModel.getCoverageRatio(xTest, yTest)}")
    print(f"Average {conformalModel.getName()} width: {conformalModel.getAverageIntervalWidth(xTest)}")

Random Forest Quantile coverage: 0.9300608166811468
Conformalized Random Forest Quantile coverage: 0.9300608166811468
Average Conformalized Random Forest Quantile width: 278.7750549316406
Neural Network Quantile coverage: 0.891398783666377
Conformalized Neural Network Quantile coverage: 0.9083405734144222
Average Conformalized Neural Network Quantile width: 247.37098693847656


In [9]:
### POINT ESTIMATION MODELS ###
rf = RandomForestRegressor(random_state=42)
paramGridRf = {
    'n_estimators': [300],
    'max_depth': [20],
    'min_samples_split': [5],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['sqrt']
}

xGradBoost = xgb.XGBRegressor(random_state=42)
paramGridXgb = {
    'n_estimators': [200],
    'learning_rate': [0.05],
    'max_depth': [5],
    'subsample': [0.8],
    'colsample_bytree': [0.8],
    'gamma': [0.1],
    'reg_alpha': [0.01],
    'reg_lambda': [1.5]
}

net = NeuralNetRegressor(
    ThroughputPredictor,
    module__input_size=dataX.shape[1],  # Pass the input size to the module
    optimizer=optim.Adam,               # Optimizer
    criterion=nn.MSELoss,               # Loss function
    verbose=0,                          # Silence verbose output
    train_split=None                    # Disable internal train/val split, external CV used
)
paramGridNet = {
    'lr': [0.01],
    'max_epochs': [100],
    'optimizer__weight_decay': [0.01],
    'batch_size': [128]
}

models = [Model(rf, "Random Forest", paramGridRf), Model(xGradBoost, "XGBoost", paramGridXgb), Model(net, "Neural Network", paramGridNet)]


In [10]:
_ = trainModels(models, xTrain, yTrain, xVal, yVal, xTest, yTest)

Fitting 3 folds for each of 3 candidates, totalling 9 fits
Best Parameters for Random Forest: {'max_depth': 20, 'max_features': 'sqrt', 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 300}
Training MSE: 3310.74
Training R^2: 0.66
Validation MSE: 6597.59
Validation R^2: 0.31
Test MSE: 6785.93
Test R^2: 0.32

Best Parameters for XGBoost: None
Training MSE: 7142.90
Training R^2: 0.27
Validation MSE: 7748.77
Validation R^2: 0.19
Test MSE: 8020.17
Test R^2: 0.20

Best Parameters for Neural Network: None
Training MSE: 7966.22
Training R^2: 0.19
Validation MSE: 8263.46
Validation R^2: 0.14
Test MSE: 8486.50
Test R^2: 0.15



In [11]:
### CHECK ALIGNMENT OF POINT ESTIMATES AND PREDICTION INTERVALS ###
for model in models:
    for conformalModel in conformalQuantileRegressors:
        yPred = model.predict(xTest)
        predIntervalCoverRatio = conformalModel.getCoverageRatio(xTest, yPred)
        print(f"{model.getName()} & {conformalModel.getName()} - cover %: {predIntervalCoverRatio}")

Random Forest & Conformalized Random Forest Quantile - cover %: 0.9895742832319722
Random Forest & Conformalized Neural Network Quantile - cover %: 0.9856646394439618
XGBoost & Conformalized Random Forest Quantile - cover %: 1.0
XGBoost & Conformalized Neural Network Quantile - cover %: 1.0
Neural Network & Conformalized Random Forest Quantile - cover %: 0.998262380538662
Neural Network & Conformalized Neural Network Quantile - cover %: 0.9978279756733276
