## System Setup

In [1]:
shell_call <- function(command, ...) {
  result <- system(command, intern = TRUE, ...)
  cat(paste0(result, collapse = "\n"))
}
# This is to overcome torch CUDA version issue. (It is not needed when using cpu only)
shell_call("wget https://storage.googleapis.com/torch-lantern-builds/packages/cu117/0.11.0/src/contrib/torch_0.11.0_R_x86_64-pc-linux-gnu.tar.gz")
install.packages("torch_0.11.0_R_x86_64-pc-linux-gnu.tar.gz", repos = NULL)
# For some reasons, the coro package (a torch dependency) is not installed from torch when using binaries
install.packages("coro")


Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)

Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)



In [2]:
shell_call("grep -m1 'model name' /proc/cpuinfo | awk -F': ' '{printf \" CPU Model: %s \\n \",  $2}'")
shell_call("grep 'cpu cores' /proc/cpuinfo  | awk -F': ' '{a[cores]+=$2}END{printf \"CPU Cores: %s \\n \", a[cores] }'")
shell_call("grep MemTotal /proc/meminfo | awk '{printf \"RAM: %.1fGB \\n \", $2 / 1024 / 1024}'")
shell_call("R --version | head -n 1")

 CPU Model: Intel(R) Xeon(R) CPU @ 2.00GHz 
 CPU Cores: 32 
 RAM: 51.0GB 
 R version 4.3.1 (2023-06-16) -- "Beagle Scouts"

In [3]:
shell_call("nvidia-smi")

Mon Oct 23 12:40:01 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.105.17   Driver Version: 525.105.17   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   33C    P0    24W / 300W |      0MiB / 16384MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [4]:
install.packages("BKTR")

Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)

also installing the dependencies ‘sp’, ‘Rcpp’, ‘collections’, ‘RgoogleMaps’, ‘png’, ‘plyr’, ‘jpeg’, ‘bitops’, ‘R6P’, ‘ggmap’




## Appendix B: Covariates reshaping

In [5]:
library(BKTR)
bixi_data <- BixiData$new()
spatial_df <- bixi_data$spatial_features_df
temporal_df <- bixi_data$temporal_features_df
y_df <- bixi_data$departure_df
p_s <- ncol(spatial_df) - 1 # Not counting index column
p_t <- ncol(temporal_df) - 1
sprintf('Response M=%d and N=%d', nrow(y_df), ncol(y_df))
sprintf('Spatial features M=%d x p_s=%d', nrow(spatial_df), p_s)
sprintf('Temporal features N=%d x p_t=%d', nrow(temporal_df), p_t)
data_df <- reshape_covariate_dfs(spatial_df, temporal_df,
    y_df, 'nb_departure')
sprintf('Should obtain MN=%d x P=%d', nrow(spatial_df) *
   nrow(temporal_df), 1 + p_s + p_t)
sprintf('Reshaped MN=%d x P=%d', nrow(data_df), ncol(data_df) - 2)

## Appendix D: Influence of device and floating point format

In [19]:
library(data.table)

TSR$set_params(seed = 1)
res_colnames <- c(
  'Device', 'FP_Type', 'Iter', 'Y_RMSE',
  'Y_MAE', 'B_RMSE', 'B_MAE', 'Time'
)
nb_res_cols <- length(res_colnames)
res_vals <- c()
burn_in_iter <- 500
sampling_iter <- 500
for (fp_device in c('cuda', 'cpu')) {
  for (fp_type in c('float64', 'float32')) {
    for (i in 1:10) {
      TSR$set_params(fp_type = fp_type, fp_device = fp_device)
      matern_lengthscale <- KernelParameter$new(value = 14)
      se_lengthscale <- KernelParameter$new(value = 5)
      spatial_kernel <- KernelMatern$new(lengthscale = matern_lengthscale)
      temporal_kernel <- KernelSE$new(lengthscale = se_lengthscale)

      simu_data <- simulate_spatiotemporal_data(
        nb_locations=100,
        nb_time_points=150,
        nb_spatial_dimensions=2,
        spatial_scale=10,
        time_scale=10,
        spatial_covariates_means=c(0, 2, 4),
        temporal_covariates_means=c(1, 3),
        spatial_kernel=spatial_kernel,
        temporal_kernel=temporal_kernel,
        noise_variance_scale=1
      )

      bktr_regressor <- BKTRRegressor$new(
        data_df=simu_data$data_df,
        spatial_kernel=KernelMatern$new(),
        spatial_positions_df=simu_data$spatial_positions_df,
        temporal_kernel=KernelSE$new(),
        temporal_positions_df=simu_data$temporal_positions_df,
        burn_in_iter=burn_in_iter,
        sampling_iter=sampling_iter,
        has_geo_coords=FALSE
      )
      # Hide output of sampling because its volume creates notebook errors
      .unused_out <- capture.output(bktr_regressor$mcmc_sampling())

      # Calc Beta Errors
      beta_err <- unlist(abs(
        lapply(bktr_regressor$beta_estimates[, -c(1, 2)], as.numeric)
        - simu_data$beta_df[, -c(1, 2)]
      ))
      beta_rmse <- sqrt(mean(beta_err^2))
      beta_mae <- mean(abs(beta_err))
      # Formatting Values
      res_vals <- c(
        res_vals,
        fp_device,
        fp_type,
        sprintf('%04d', i),
        sprintf('%.4f', bktr_regressor$result_logger$error_metrics$RMSE),
        sprintf('%.4f', bktr_regressor$result_logger$error_metrics$MAE),
        sprintf('%.4f', beta_rmse),
        sprintf('%.4f', beta_mae),
        sprintf('%.3f', as.numeric(
          bktr_regressor$result_logger$total_elapsed_time,
          units="secs"
        ))
      )
      df <- as.data.table(matrix(res_vals, ncol = nb_res_cols, byrow = TRUE))
    }
  }
}

colnames(df) <- res_colnames
print(df)

# Aggregate results (Table 8)
mean_fmt <- function(x) sprintf('%.4f', mean(x))
sd_fmt <- function(x) sprintf('%.4f', sd(x))
df <- df[, lapply(.SD, as.numeric), by = list(Device, FP_Type)]
df <- df[, .(
    Y_RMSE_avg = mean_fmt(Y_RMSE),
    Y_RMSE_sd = sd_fmt(Y_RMSE),
    Y_MAE_avg = mean_fmt(Y_MAE),
    Y_MAE_sd = sd_fmt(Y_MAE),
    B_RMSE_avg = mean_fmt(B_RMSE),
    B_RMSE_sd = sd_fmt(B_RMSE),
    B_MAE_avg = mean_fmt(B_MAE),
    B_MAE_sd = sd_fmt(B_MAE),
    Time_avg = mean_fmt(Time),
    Time_sd = sd_fmt(Time)
), by=list(Device, FP_Type)]
print(df)

    Device FP_Type Iter Y_RMSE  Y_MAE B_RMSE  B_MAE    Time
 1:   cuda float64 0001 0.9916 0.7900 0.0962 0.0660 251.739
 2:   cuda float64 0002 0.9890 0.7894 0.1187 0.0744 249.900
 3:   cuda float64 0003 1.0024 0.8008 0.1287 0.0730 237.683
 4:   cuda float64 0004 0.9915 0.7924 0.1010 0.0653 240.335
 5:   cuda float64 0005 0.9811 0.7850 0.1554 0.0906 248.005
 6:   cuda float64 0006 0.9922 0.7912 0.1784 0.0945 247.257
 7:   cuda float64 0007 0.9907 0.7895 0.1075 0.0672 244.170
 8:   cuda float64 0008 0.9947 0.7956 0.1055 0.0734 252.154
 9:   cuda float64 0009 1.0003 0.7958 0.1489 0.0784 250.306
10:   cuda float64 0010 1.0021 0.7990 0.1101 0.0688 244.489
11:   cuda float32 0001 0.9834 0.7826 0.1477 0.0863 194.141
12:   cuda float32 0002 0.9964 0.7954 0.1137 0.0729 191.600
13:   cuda float32 0003 0.9977 0.7941 0.1050 0.0720 191.137
14:   cuda float32 0004 0.9856 0.7893 0.1013 0.0693 192.489
15:   cuda float32 0005 0.9733 0.7784 0.1504 0.0952 197.858
16:   cuda float32 0006 0.9852 0.7862 0.