In [1]:
#Sci-kit
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor

#Torch
import torch.nn as nn
import torch.optim as optim
from skorch import NeuralNetRegressor

#xg-boost
import xgboost as xgb

# Random Forest Quantile
from sklearn_quantile import RandomForestQuantileRegressor

from visualization.visualize import *
from data.data_loader import *
from models.training import trainModels
from models.model import Model
from models.quantileregression.conformalprediction import *
from models.quantileregression.pinball import *
from models.neuralnetwork.architecture import *

In [2]:
dirParquet = "../data/intermediate/"
df = loadDataParquet(dirParquet)


In [3]:
### DATA PREPARATION ###
dependentCol = "UL_bitrate"

selectedFloatCols = ["Longitude", "Latitude", "Speed", "RSRP","RSRQ","SNR"]
selectedCatCols = ["CellID"]

dataX, dataY = processData(df, selectedFloatCols,selectedCatCols, dependentCol)
dataX.size


460180

In [4]:
### DIVIDE INTO TRAINING, VALIDATION AND TEST ###
train_ratio = 0.75
validation_ratio = 0.15
test_ratio = 0.10

xTrain, xTest, yTrain, yTest = train_test_split(dataX, dataY, test_size=1 - train_ratio)
xVal, xTest, yVal, yTest = train_test_split(xTest, yTest, test_size=test_ratio/(test_ratio + validation_ratio))


In [5]:
### NEURAL NET QUANTILE REGRESSOR ###
alpha = 0.1
lowerNet = NeuralNetRegressor(
    ThroughputPredictor,
    module__input_size=dataX.shape[1],  # Pass the input size to the module
    optimizer=optim.Adam,               # Optimizer
    criterion=PinballLoss(alpha/2),               # Loss function
    verbose=0,                          # Silence verbose output
    train_split=None                    # Disable internal train/val split, we'll use external CV
)
upperNet = NeuralNetRegressor(
    ThroughputPredictor,
    module__input_size=dataX.shape[1],  # Pass the input size to the module
    optimizer=optim.Adam,               # Optimizer
    criterion=PinballLoss(1-alpha/2),               # Loss function
    verbose=0,                          # Silence verbose output
    train_split=None                    # Disable internal train/val split, we'll use external CV
)
regularNet = NeuralNetRegressor(
    ThroughputPredictor,
    module__input_size=dataX.shape[1],  # Pass the input size to the module
    optimizer=optim.Adam,               # Optimizer
    criterion=PinballLoss(1-alpha/2),               # Loss function
    verbose=0,                          # Silence verbose output
    train_split=None                    # Disable internal train/val split, we'll use external CV
)
paramGridNetLower = {
    'lr': [0.01],
    'max_epochs': [100],
    'optimizer__weight_decay': [0.01],
    'batch_size': [128]
}
paramGridNetUpper = {
    'lr': [0.01],
    'max_epochs': [100],
    'optimizer__weight_decay': [0.01],
    'batch_size': [128]
}
paramGridNetRegular = {
    'lr': [0.01],
    'max_epochs': [100],
    'optimizer__weight_decay': [0.01],
    'batch_size': [128]
}
lowerScorer = pinballLossScorer(alpha/2)
upperScorer = pinballLossScorer(1-alpha/2)
lowerModel = Model(lowerNet, "Lower Bound Neural Network", paramGridNetLower, lowerScorer)
upperModel = Model(upperNet, "Upper Bound Neural Network", paramGridNetUpper, upperScorer)

quantileNeuralNetRegressor = QuantileRegressorNeuralNet([lowerModel, upperModel], alpha, "Neural Network Quantile")
conformalQuantileNeuralNetRegressor = ConformalizedQuantileRegressor(quantileNeuralNetRegressor)



In [6]:
### RANDOM FOREST QUANTILE REGRESSOR ###
alpha = 0.1
# paramGridRfq = {
#     # 'n_estimators': [50, 100, 200, 300],
#     # 'criterion': ['squared_error', 'absolute_error', 'friedman_mse', 'poisson'],
#     # 'max_depth': [None, 10, 20, 30, 40],
#     # 'min_samples_split': [2, 5, 10, 20],
#     # 'min_samples_leaf': [1, 2, 5, 10],
#     # 'min_weight_fraction_leaf': [0.0, 0.1, 0.2],
#     # 'max_features': ['sqrt', 'log2', None, 0.5, 1.0]
#     # 'n_estimators': [100],
#     # 'criterion': ['squared_error'],
#     # 'max_depth': [None, 10, 20],
#     # 'min_samples_split': [10],
#     # 'min_samples_leaf': [10],
#     # 'min_weight_fraction_leaf': [0.0, 0.1, 0.2],
#     # 'max_features': ['sqrt', 'log2', None]
#     'n_estimators': [100],
#     'criterion': ['squared_error'],
#     'max_depth': [10],
#     'min_samples_split': [10],
#     'min_samples_leaf': [10],
#     'min_weight_fraction_leaf': [0.1],
#     'max_features': ['log2']
# }
rfq = RandomForestQuantileRegressor(q = [alpha/2,1- alpha/2])
paramGridRfq = {
    'n_estimators': [100],
    'criterion': ['squared_error'],
    'max_depth': [10],
    'min_samples_split': [10],
    'min_samples_leaf': [10],
    'min_weight_fraction_leaf': [0.1],
    'max_features': ['log2']
}
doublePinballScorer = doublePinballLossScorer(alpha/2, 1-alpha/2)
rqfModel = Model(rfq, "Random Forest Quantile", paramGridRfq, doublePinballScorer)

quantileForestRegressor = QuantileRegressorRandomForest([rqfModel], alpha, "Random Forest Quantile")
conformalQuantileForestRegressor = ConformalizedQuantileRegressor(quantileForestRegressor)

In [7]:
### TRAINING ###
conformalQuantileRegressors = [conformalQuantileForestRegressor, conformalQuantileNeuralNetRegressor]
for conformalModel in conformalQuantileRegressors:
    conformalModel.fit(xTrain, yTrain, xVal, yVal, 2)

Fitting 2 folds for each of 1 candidates, totalling 2 fits
Fitting 2 folds for each of 1 candidates, totalling 2 fits


  cuda_attrs = torch.load(f, **load_kwargs)
  cuda_attrs = torch.load(f, **load_kwargs)


Fitting 2 folds for each of 1 candidates, totalling 2 fits


  cuda_attrs = torch.load(f, **load_kwargs)
  cuda_attrs = torch.load(f, **load_kwargs)


In [8]:
### EVALUATION ###
for conformalModel in conformalQuantileRegressors:
    print(f"{conformalModel.getQuantileRegressor().getName()} coverage: {conformalModel.getQuantileRegressor().getCoverageRatio(xTest, yTest)}")
    print(f"{conformalModel.getName()} coverage: {conformalModel.getCoverageRatio(xTest, yTest)}")
    print(f"Average {conformalModel.getName()} width: {conformalModel.getAverageIntervalWidth(xTest)}")

Random Forest Quantile coverage: 0.9144222415291051
Conformalized Random Forest Quantile coverage: 0.9144222415291051
Average Conformalized Random Forest Quantile width: 277.5693359375
Neural Network Quantile coverage: 0.9122502172024327
Conformalized Neural Network Quantile coverage: 0.9122502172024327
Average Conformalized Neural Network Quantile width: 282.3518981933594


In [9]:
### POINT ESTIMATION MODELS ###
rf = RandomForestRegressor(random_state=42)
paramGridRf = {
    'n_estimators': [300],
    'max_depth': [20],
    'min_samples_split': [5],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['sqrt']
}

xGradBoost = xgb.XGBRegressor(random_state=42)
paramGridXgb = {
    'n_estimators': [200],
    'learning_rate': [0.05],
    'max_depth': [5],
    'subsample': [0.8],
    'colsample_bytree': [0.8],
    'gamma': [0.1],
    'reg_alpha': [0.01],
    'reg_lambda': [1.5]
}

net = NeuralNetRegressor(
    ThroughputPredictor,
    module__input_size=dataX.shape[1],  # Pass the input size to the module
    optimizer=optim.Adam,               # Optimizer
    criterion=nn.MSELoss,               # Loss function
    verbose=0,                          # Silence verbose output
    train_split=None                    # Disable internal train/val split, external CV used
)
paramGridNet = {
    'lr': [0.01],
    'max_epochs': [100],
    'optimizer__weight_decay': [0.01],
    'batch_size': [128]
}

models = [Model(rf, "Random Forest", paramGridRf), Model(xGradBoost, "XGBoost", paramGridXgb), Model(net, "Neural Network", paramGridNet)]


In [10]:
_ = trainModels(models, xTrain, yTrain, xVal, yVal, xTest, yTest)

Fitting 3 folds for each of 3 candidates, totalling 9 fits
Best Parameters for Random Forest: {'max_depth': 20, 'max_features': 'sqrt', 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 300}
Training MSE: 3416.66
Training R^2: 0.64
Validation MSE: 6203.83
Validation R^2: 0.37
Test MSE: 8259.38
Test R^2: 0.30

Fitting 3 folds for each of 1 candidates, totalling 3 fits
Best Parameters for XGBoost: {'colsample_bytree': 0.8, 'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 5, 'n_estimators': 200, 'reg_alpha': 0.01, 'reg_lambda': 1.5, 'subsample': 0.8}
Training MSE: 7013.01
Training R^2: 0.27
Validation MSE: 7682.87
Validation R^2: 0.22
Test MSE: 9517.98
Test R^2: 0.19

Fitting 3 folds for each of 1 candidates, totalling 3 fits


  cuda_attrs = torch.load(f, **load_kwargs)
  cuda_attrs = torch.load(f, **load_kwargs)
  cuda_attrs = torch.load(f, **load_kwargs)


Best Parameters for Neural Network: {'batch_size': 128, 'lr': 0.01, 'max_epochs': 100, 'optimizer__weight_decay': 0.01}
Training MSE: 7589.52
Training R^2: 0.21
Validation MSE: 7943.75
Validation R^2: 0.19
Test MSE: 10000.65
Test R^2: 0.15



In [12]:
### CHECK ALIGNMENT OF POINT ESTIMATES AND PREDICTION INTERVALS ###
for model in models:
    for conformalModel in conformalQuantileRegressors:
        yPred = model.predict(xTest)
        predIntervalCoverRatio = conformalModel.getCoverageRatio(xTest, yPred)
        print(f"{model.getName()} & {conformalModel.getName()} - cover %: {predIntervalCoverRatio}")

Random Forest & Conformalized Random Forest Quantile - cover %: 0.9891398783666377
Random Forest & Conformalized Neural Network Quantile - cover %: 0.9921807124239791
XGBoost & Conformalized Random Forest Quantile - cover %: 0.998262380538662
XGBoost & Conformalized Neural Network Quantile - cover %: 1.0
Neural Network & Conformalized Random Forest Quantile - cover %: 0.9930495221546481
Neural Network & Conformalized Neural Network Quantile - cover %: 0.9995655951346655
