In [None]:
import numpy as np
import matplotlib.pyplot as plt

from data.data_loader import loadDataCsv
from data.data_processing import processData, getDataProcessor, trainTestSplit
from models.tuned_models import getPointEstimationModels, getQuantileRegressionModels, getConformalModels

In [None]:
### LOAD DATA ###
trainDataDir = "../data/intermediate/sthlm-sodertalje/train/"
testDataDir = "../data/intermediate/sthlm-sodertalje/test/"
hyperparamsDir = "../config/hyperparameters/"
dfTrain = loadDataCsv(trainDataDir, ["", "-"])
dfTest = loadDataCsv(testDataDir, ["", "-"])

In [None]:
### FEATURE CLEANING ###
dependentCol = "UL_bitrate"
#Mbps from Kbps
dfTrain[dependentCol] = dfTrain[dependentCol]/ 1024
dfTest[dependentCol] = dfTest[dependentCol]/ 1024


In [None]:
### DATA PREPARATION ###
selectedFloatCols = [
    "Longitude",
    "Latitude",
    "Speed",
    "SNR",
    "Level",
    "Qual",
]
selectedCatCols = [
    "CellID",
    "Node",
    "NetworkMode",
    "BAND",
    "BANDWIDTH",
    "LAC",
    "PSC",
]

processor = getDataProcessor(selectedFloatCols, selectedCatCols, applyScaler=True)
xTrain, yTrain = processData(
    dfTrain, selectedFloatCols, selectedCatCols, dependentCol, processor
)
xTest, yTest = processData(
    dfTest,
    selectedFloatCols,
    selectedCatCols,
    dependentCol,
    processor,
    fitProcessor=False,
)

In [None]:
models = getPointEstimationModels(hyperparamsDir, xTrain.shape[1])
for model in models:
    print(f"Model: {model.getName()}")
    model.fit(xTrain, yTrain)
    print(f"Train R2: {model.getR2(xTrain, yTrain):.3f}")
    print(f"Test R2: {model.getR2(xTest, yTest):.3f}")
    print("")

In [None]:
### UNCERTAINTY INTERVALS ###
alpha = 0.1
reservedRatio = 0.15

quantileModels = getQuantileRegressionModels(alpha, hyperparamsDir, xTrain.shape[1])
for qModel in quantileModels:
    qModel.fit(xTrain, yTrain)

xTrainPart, xRes, yTrainPart, yRes = trainTestSplit(xTrain, yTrain, 1-reservedRatio)
conformalPredictors = getConformalModels(alpha, hyperparamsDir, xTrain.shape[1])
for model in conformalPredictors:
    model.fit(xTrainPart, yTrainPart, xRes, yRes)

uncertaintyModels = quantileModels + conformalPredictors


In [None]:
### EVALUATION ###
for model in uncertaintyModels:
    print(f"Model: {model.getName()}")
    print(f"Average coverage: {model.getCoverageRatio(xTest, yTest)}")
    print(f"Average width: {model.getAverageIntervalWidth(xTest)}")
    print("")

In [None]:
def plotValuePredAndInterval(yTrue, yPredInterval, title):
    n = len(yTrue)
    plt.figure(figsize=(10, 6))
    plt.plot(np.arange(n), yTrue, 'o', label="Actual Value", color='blue')
    plt.fill_between(np.arange(n), yPredInterval[0], yPredInterval[1], step='mid', color='gray', alpha=0.4, label="Prediction Interval")
    plt.xlabel('Sample Index')
    plt.ylabel('Uplink throughput (Mbps)')
    plt.title(title)
    plt.legend()


In [None]:
### ILLUSTRATE INTERVALS ###
n = 100
xTestPart = xTest[:n]
yTestPart = yTest[:n]

for model in uncertaintyModels:
    yPredInterval = model.predict(xTestPart)
    plotValuePredAndInterval(yTestPart, yPredInterval, "Actual vs Predicted values with intervals: " + model.getName())

plt.show()