# Setup

In [1]:
# Get VM CPU and R version
shell_call <- function(command, ...) {
  result <- system(command, intern = TRUE, ...)
  cat(paste0(result, collapse = "\n"))
}
shell_call("grep -m1 'model name' /proc/cpuinfo | awk -F': ' '{printf \" CPU Model: %s \\n \",  $2}'")
shell_call("grep 'cpu cores' /proc/cpuinfo  | awk -F': ' '{a[cores]+=$2}END{printf \"CPU Cores: %s \\n \", a[cores] }'")
shell_call("grep MemTotal /proc/meminfo | awk '{printf \"RAM: %.1fGB \\n \", $2 / 1024 / 1024}'")
shell_call("R --version | head -n 1")

 CPU Model: Intel(R) Xeon(R) CPU @ 2.20GHz 
 CPU Cores: 72 
 RAM: 83.5GB 
 R version 4.4.1 (2024-06-14) -- "Race for Your Life"

In [2]:
# Get GPU Info
shell_call("nvidia-smi")

Tue Aug 27 08:07:15 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA A100-SXM4-40GB          Off | 00000000:00:04.0 Off |                    0 |
| N/A   32C    P0              45W / 400W |      2MiB / 40960MiB |      0%      Default |
|                                         |                      |             Disabled |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [3]:
# Install Torch with valid CUDA version

options(timeout = 600) # increasing timeout is recommended since we will be downloading a 2GB file.
# For Windows and Linux: "cpu", "cu117", "cu118" are the only currently supported
# For MacOS the supported are: "cpu-intel" or "cpu-m1"
kind <- "cu118"
version <- available.packages()["torch","Version"]
options(repos = c(
  torch = sprintf("https://torch-cdn.mlverse.org/packages/%s/%s/", kind, version),
  CRAN = "https://cloud.r-project.org" # or any other from which you want to install the other R dependencies.
))

install.packages("torch")

Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)

also installing the dependencies ‘coro’, ‘safetensors’




In [4]:
# Test Torch installation

library(torch)
torch_rand(4)

torch_tensor
 0.5680
 0.5107
 0.7876
 0.2857
[ CPUFloatType{4} ]

In [5]:
# Install BKTR

install.packages('BKTR')

### From Github (Latest Version)
# install.packages("devtools") # if not installed
# devtools::install_github("julien-hec/BKTR", ref = "main")

# For section 4 side by side plots
# install.packages('ggpubr')

Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)

also installing the dependencies ‘collections’, ‘png’, ‘plyr’, ‘jpeg’, ‘bitops’, ‘R6P’, ‘ggmap’




In [6]:
# The following block is because Jupyter for R does not print until the end of
# the code block execution. So, with this command we add real time printing.
# See: https://stackoverflow.com/questions/37689694/real-time-printing-to-console-with-r-in-jupyter
trace(what = "print", where = getNamespace("base"), exit = flush.console, print = FALSE)

Tracing function "print" in package "namespace:base"



# Load Libraries

In [7]:
# Code to run BKTR examples #
# Lanthier, Lei, Sun and Labbe 2023 #

library('BKTR')
library(data.table)
library(ggplot2)
# library('ggpubr')

# Run BKTR

## Appendix B: Covariates reshaping

In [8]:

################
################
#   Appendix   #
################
################

########################################
### Appendix B: Covariates reshaping ###
########################################

# Create full bixi data set
bixi_data <- BixiData$new()

# Ensure the data is reshaped to the right format
spatial_df <- bixi_data$spatial_features_df
temporal_df <- bixi_data$temporal_features_df
y_df <- bixi_data$departure_df
p_s <- ncol(spatial_df) - 1 # Not counting index column
p_t <- ncol(temporal_df) - 1
sprintf('Response M=%d and N=%d', nrow(y_df), ncol(y_df))
sprintf('Spatial features M=%d x p_s=%d', nrow(spatial_df), p_s)
sprintf('Temporal features N=%d x p_t=%d', nrow(temporal_df), p_t)
data_df <- reshape_covariate_dfs(spatial_df, temporal_df,
    y_df, 'nb_departure')
sprintf('Should obtain MN=%d x P=%d', nrow(spatial_df) *
   nrow(temporal_df), 1 + p_s + p_t)
sprintf('Reshaped MN=%d x P=%d', nrow(data_df), ncol(data_df) - 2)

## Appendix D:

In [16]:

#############################################################
# Appendix D: Influence of device and floating point format #
#############################################################
# WARNING: Very long to run                                 #
#############################################################

TSR$set_params(seed = 1)

res_colnames <- c(
  'Device', 'FP_Type', 'Iter', 'Y_RMSE',
  'Y_MAE', 'B_RMSE', 'B_MAE', 'Time'
)

nb_res_cols <- length(res_colnames)
res_vals <- c()
burn_in_iter <- 500
sampling_iter <- 500
for (fp_device in c('cuda', 'cpu')) {
  for (fp_type in c('float64', 'float32')) {
    for (i in 1:10) {
      print(sprintf('%s, %s, %02d', fp_device, fp_type, i))
      TSR$set_params(fp_type = fp_type, fp_device = fp_device)
      matern_lengthscale <- KernelParameter$new(value = 14)
      se_lengthscale <- KernelParameter$new(value = 5)
      spatial_kernel <- KernelMatern$new(lengthscale = matern_lengthscale)
      temporal_kernel <- KernelSE$new(lengthscale = se_lengthscale)

      simu_data <- simulate_spatiotemporal_data(
        nb_locations = 100,
        nb_time_points = 150,
        nb_spatial_dimensions = 2,
        spatial_scale = 10,
        time_scale = 10,
        spatial_covariates_means = c(0, 2, 4),
        temporal_covariates_means = c(1, 3),
        spatial_kernel = spatial_kernel,
        temporal_kernel = temporal_kernel,
        noise_variance_scale = 1
      )

      bktr_regressor <- BKTRRegressor$new(
        data_df = simu_data$data_df,
        spatial_kernel = KernelMatern$new(),
        spatial_positions_df = simu_data$spatial_positions_df,
        temporal_kernel = KernelSE$new(),
        temporal_positions_df = simu_data$temporal_positions_df,
        burn_in_iter = burn_in_iter,
        sampling_iter = sampling_iter,
        has_geo_coords = FALSE
      )

      # Hide output of sampling because its volume creates notebook errors
      .unused_out <- capture.output(bktr_regressor$mcmc_sampling())

      # Calc Beta Errors
      beta_err <- unlist(abs(
        lapply(bktr_regressor$beta_estimates[, -c(1, 2)], as.numeric)
        - simu_data$beta_df[, -c(1, 2)]
      ))
      beta_rmse <- sqrt(mean(beta_err^2))
      beta_mae <- mean(abs(beta_err))
      # Formatting Values
      res_vals <- c(
        res_vals,
        fp_device,
        fp_type,
        sprintf('%04d', i),
        sprintf('%.4f', bktr_regressor$result_logger$error_metrics$RMSE),
        sprintf('%.4f', bktr_regressor$result_logger$error_metrics$MAE),
        sprintf('%.4f', beta_rmse),
        sprintf('%.4f', beta_mae),
        sprintf('%.3f', as.numeric(
          bktr_regressor$result_logger$total_elapsed_time, units = "secs"
        ))
      )
      df <- as.data.table(matrix(res_vals, ncol = nb_res_cols, byrow = TRUE))
    }
  }
}
colnames(df) <- res_colnames
print(df)

# Aggregate results (Table 8)
mean_fmt <- function(x) sprintf('%.4f', mean(x))
sd_fmt <- function(x) sprintf('%.4f', sd(x))
df <- df[, lapply(.SD, as.numeric), by = list(Device, FP_Type)]
df <- df[, .(
    Y_RMSE_avg = mean_fmt(Y_RMSE),
    Y_RMSE_sd = sd_fmt(Y_RMSE),
    Y_MAE_avg = mean_fmt(Y_MAE),
    Y_MAE_sd = sd_fmt(Y_MAE),
    B_RMSE_avg = mean_fmt(B_RMSE),
    B_RMSE_sd = sd_fmt(B_RMSE),
    B_MAE_avg = mean_fmt(B_MAE),
    B_MAE_sd = sd_fmt(B_MAE),
    Time_avg = mean_fmt(Time),
    Time_sd = sd_fmt(Time)
), by = list(Device, FP_Type)]
print(df)


[1] "cuda, float64, 01"
[1] "cuda, float64, 02"
[1] "cuda, float64, 03"
[1] "cuda, float64, 04"
[1] "cuda, float64, 05"
[1] "cuda, float64, 06"
[1] "cuda, float64, 07"
[1] "cuda, float64, 08"
[1] "cuda, float64, 09"
[1] "cuda, float64, 10"
[1] "cuda, float32, 01"
[1] "cuda, float32, 02"
[1] "cuda, float32, 03"
[1] "cuda, float32, 04"
[1] "cuda, float32, 05"
[1] "cuda, float32, 06"
[1] "cuda, float32, 07"
[1] "cuda, float32, 08"
[1] "cuda, float32, 09"
[1] "cuda, float32, 10"
[1] "cpu, float64, 01"
[1] "cpu, float64, 02"
[1] "cpu, float64, 03"
[1] "cpu, float64, 04"
[1] "cpu, float64, 05"
[1] "cpu, float64, 06"
[1] "cpu, float64, 07"
[1] "cpu, float64, 08"
[1] "cpu, float64, 09"
[1] "cpu, float64, 10"
[1] "cpu, float32, 01"
[1] "cpu, float32, 02"
[1] "cpu, float32, 03"
[1] "cpu, float32, 04"
[1] "cpu, float32, 05"
[1] "cpu, float32, 06"
[1] "cpu, float32, 07"
[1] "cpu, float32, 08"
[1] "cpu, float32, 09"
[1] "cpu, float32, 10"
    Device FP_Type   Iter Y_RMSE  Y_MAE B_RMSE  B_MAE    Tim

In [17]:
fmt_2dec <- function(x) {
  sprintf('%.2f', as.numeric(x))
}
res_df <- df[, lapply(.SD, fmt_2dec), by = list(Device, FP_Type)]

# Format in B_mae(avg±sd)/B_rmse(avg±sd) Y_mae(avg±sd)/Y_rmse(avg±sd)
res_df <- res_df[, .(
  B_res = paste(B_MAE_avg, '±', B_MAE_sd, '/', B_RMSE_avg, '±', B_RMSE_sd),
  Y_res = paste(Y_MAE_avg, '±', Y_MAE_sd, '/', Y_RMSE_avg, '±', Y_RMSE_sd),
  Time_res = paste(Time_avg, '±', Time_sd)
), by = list(Device, FP_Type)]
setkey(res_df, Device)
res_df

Device,FP_Type,B_res,Y_res,Time_res
<chr>,<chr>,<chr>,<chr>,<chr>
cpu,float64,0.08 ± 0.01 / 0.12 ± 0.02,0.79 ± 0.00 / 0.99 ± 0.00,520.12 ± 6.05
cpu,float32,0.09 ± 0.01 / 0.14 ± 0.03,0.79 ± 0.00 / 0.99 ± 0.01,424.63 ± 13.81
cuda,float64,0.08 ± 0.01 / 0.12 ± 0.02,0.79 ± 0.00 / 0.99 ± 0.00,187.09 ± 4.15
cuda,float32,0.09 ± 0.01 / 0.14 ± 0.03,0.79 ± 0.01 / 0.98 ± 0.01,192.17 ± 7.22
