## System Setup

In [1]:
# Find CPU info
!grep -m1 'model name' /proc/cpuinfo | awk -F": " '{print $2}'
!grep 'cpu cores' /proc/cpuinfo | awk -F": " '{a[cores]+=$2}END{print "CPU cores: " a[cores]}'

Intel(R) Xeon(R) CPU @ 2.00GHz
CPU cores: 8


In [2]:
# Find Ram Info
!grep MemTotal /proc/meminfo | awk '{printf "%.1fGB RAM", $2 / 1024 / 1024}'

25.5GB RAM

In [3]:
# Find GPU info
!nvidia-smi

Fri Jul 21 08:20:15 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.105.17   Driver Version: 525.105.17   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   35C    P0    25W / 300W |      0MiB / 16384MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [4]:
!python --version

Python 3.10.6


In [5]:
!R --version

R version 4.3.1 (2023-06-16) -- "Beagle Scouts"
Copyright (C) 2023 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under the terms of the
GNU General Public License versions 2 or 3.
For more information about these matters see
https://www.gnu.org/licenses/.



In [6]:
#Install torch manually from prebuilt binaries since colab use cu11.8 which does not build
!wget https://storage.googleapis.com/torch-lantern-builds/packages/cu117/0.11.0/src/contrib/torch_0.11.0_R_x86_64-pc-linux-gnu.tar.gz
!Rscript -e 'install.packages("torch_0.11.0_R_x86_64-pc-linux-gnu.tar.gz", repos = NULL)'

--2023-07-21 08:20:15--  https://storage.googleapis.com/torch-lantern-builds/packages/cu117/0.11.0/src/contrib/torch_0.11.0_R_x86_64-pc-linux-gnu.tar.gz
Resolving storage.googleapis.com (storage.googleapis.com)... 173.194.79.128, 108.177.119.128, 108.177.126.128, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|173.194.79.128|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [application/gzip]
Saving to: ‘torch_0.11.0_R_x86_64-pc-linux-gnu.tar.gz’

torch_0.11.0_R_x86_     [    <=>             ]   1.83G  27.4MB/s    in 53s     

2023-07-21 08:21:08 (35.5 MB/s) - ‘torch_0.11.0_R_x86_64-pc-linux-gnu.tar.gz’ saved [1966658359]

Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)
* installing *binary* package ‘torch’ ...
* DONE (torch)


In [7]:
# Install BKTR
!Rscript -e "library(devtools); devtools::install_github('julien-hec/BKTR', ref = 'main')"

Loading required package: usethis
[?25hDownloading GitHub repo julien-hec/BKTR@main
cpp11       (0.4.4  -> 0.4.5  ) [CRAN]
openssl     (2.0.6  -> 2.1.0  ) [CRAN]
curl        (5.0.0  -> 5.0.1  ) [CRAN]
Rcpp        (NA     -> 1.0.11 ) [CRAN]
sp          (NA     -> 2.0-0  ) [CRAN]
png         (NA     -> 0.1-8  ) [CRAN]
isoband     (0.2.6  -> 0.2.7  ) [CRAN]
bit         (4.0.4  -> 4.0.5  ) [CRAN]
bitops      (NA     -> 1.0-7  ) [CRAN]
digest      (0.6.32 -> 0.6.33 ) [CRAN]
jpeg        (NA     -> 0.1-10 ) [CRAN]
plyr        (NA     -> 1.8.8  ) [CRAN]
RgoogleMaps (NA     -> 1.4.5.3) [CRAN]
collections (NA     -> 0.3.7  ) [CRAN]
coro        (NA     -> 1.0.3  ) [CRAN]
ggmap       (NA     -> 3.0.2  ) [CRAN]
R6P         (NA     -> 0.3.0  ) [CRAN]
Installing 17 packages: cpp11, openssl, curl, Rcpp, sp, png, isoband, bit, bitops, digest, jpeg, plyr, RgoogleMaps, collections, coro, ggmap, R6P
Installing packages into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)
trying URL 'https://cra

### Interpol Analysis (R)

In [None]:
sh = """
library(BKTR)
library(data.table)

nb_aside_locs <- 4
nb_aside_times <- 6

TSR$set_params(seed = 2, fp_type = 'float32', fp_device = 'cuda')
res_colnames <- c('Dataset_Type', 'Lengthscale', 'Iter', 'B_MAE', 'B_RMSE', 'Y_MAE', 'Y_RMSE', 'Time')
nb_res_cols <- length(res_colnames)
res_vals <- c()
for (ds_type in c('Smaller', 'Larger')) {
  for (len_scale in c(3, 6)) {
    for (i in 1:10) {
      matern_lengthscale <- KernelParameter$new(value = len_scale)
      se_lengthscale <- KernelParameter$new(value = len_scale)
      spatial_kernel <- KernelMatern$new(lengthscale = matern_lengthscale, smoothness_factor = 5)
      temporal_kernel <- KernelSE$new(lengthscale = se_lengthscale)

      is_small_ds <- ds_type == 'Smaller'
      nb_locs <- ifelse(is_small_ds, 20, 100)
      nb_times <- ifelse(is_small_ds, 30, 150)
      spa_cov_means <- if(is_small_ds) c(0, 2) else c(0, 2, 4)
      tem_cov_means <- if(is_small_ds) c(1) else c(1, 3)

      simu_data <- simulate_spatiotemporal_data(
        nb_locations=nb_locs,
        nb_time_points=nb_times,
        nb_spatial_dimensions=2,
        spatial_scale=10,
        time_scale=10,
        spatial_covariates_means=spa_cov_means,
        temporal_covariates_means=tem_cov_means,
        spatial_kernel=spatial_kernel,
        temporal_kernel=temporal_kernel,
        noise_variance_scale=1
      )

      # Set some values aside for M_new locs and N_new times
      obs_nb_locs <- nb_locs - nb_aside_locs
      obs_nb_times <- nb_times - nb_aside_times

      data_df <- simu_data$data_df
      spatial_pos_df <- simu_data$spatial_positions_df
      temporal_pos_df <- simu_data$temporal_positions_df

      all_locs <- spatial_pos_df$location
      all_times <- temporal_pos_df$time

      locs_indx_sample <- TSR$rand_choice(TSR$tensor(1:length(all_locs)), obs_nb_locs)
      obs_locs <- all_locs[as.numeric(locs_indx_sample$cpu())]
      new_locs <- setdiff(all_locs, obs_locs)

      times_indx_sample <- TSR$rand_choice(TSR$tensor(1:length(all_times)), obs_nb_times)
      obs_times <- all_times[as.numeric(times_indx_sample$cpu())]
      new_times <- setdiff(all_times, obs_times)

      obs_data_df <- data_df[data_df[, .I[location %in% obs_locs & time %in% obs_times]], ]
      obs_spatial_pos_df <- spatial_pos_df[spatial_pos_df[, .I[location %in% obs_locs]], ]
      obs_temporal_pos_df <- temporal_pos_df[temporal_pos_df[, .I[time %in% obs_times]], ]

      new_data_df <- data_df[data_df[, .I[location %in% new_locs | time %in% new_times]], ]
      new_spatial_positions_df <- spatial_pos_df[spatial_pos_df[, .I[location %in% new_locs]], ]
      new_temporal_positions_df <- temporal_pos_df[temporal_pos_df[, .I[time %in% new_times]], ]

      # Run mcmc sampling
      bktr_regressor <- BKTRRegressor$new(
        data_df=obs_data_df,
        rank_decomp = 10,
        burn_in_iter = 500,
        sampling_iter = 500,
        spatial_kernel=KernelMatern$new(smoothness_factor = 5),
        spatial_positions_df=obs_spatial_pos_df,
        temporal_kernel=KernelSE$new(),
        temporal_positions_df=obs_temporal_pos_df,
        has_geo_coords=FALSE
      )
      bktr_regressor$mcmc_sampling()

      # Run interpolation
      preds <- bktr_regressor$predict(
        new_data_df,
        new_spatial_positions_df,
        new_temporal_positions_df
      )

      # Align both datasets
      sim_data_df <- simu_data$data_df
      pred_y_df <- preds$new_y_df
      beta_data_df <- simu_data$beta_df
      beta_pred_df <- preds$new_beta_df
      setkey(beta_pred_df, location, time)
      sim_y_df <- sim_data_df[sim_data_df[, .I[location %in% new_locs | time %in% new_times]], c('location', 'time', 'y')]
      setorderv(pred_y_df, c('location', 'time'))
      setorderv(sim_y_df, c('location', 'time'))

      # Calc Errors
      preds_y_err <- (
        sim_data_df[sim_data_df[, .I[location %in% new_locs | time %in% new_times]], 'y']
        - pred_y_df[pred_y_df[, .I[location %in% new_locs | time %in% new_times]], 'y_est']
      )
      preds_y_err <- unlist(preds_y_err)
      preds_beta_err <- (
        beta_data_df[beta_data_df[, .I[location %in% new_locs | time %in% new_times]], -c('location', 'time')]
        - beta_pred_df[beta_pred_df[, .I[location %in% new_locs | time %in% new_times]], -c('location', 'time')]
      )
      preds_beta_err <- unlist(preds_beta_err)

      y_rmse <- sqrt(mean(preds_y_err^2))
      y_mae <- mean(abs(preds_y_err))
      beta_rmse <- sqrt(mean(preds_beta_err^2))
      beta_mae <- mean(abs(preds_beta_err))

      # Formatting Values
      res_vals <- c(
        res_vals,
        ds_type,
        len_scale,
        sprintf('%04d', i),
        sprintf('%.4f', beta_mae),
        sprintf('%.4f', beta_rmse),
        sprintf('%.4f', y_mae),
        sprintf('%.4f', y_rmse),
        sprintf('%.3f', as.numeric(bktr_regressor$result_logger$total_elapsed_time,units="secs"))
      )
      df <- as.data.table(matrix(res_vals, ncol = nb_res_cols, byrow = TRUE))
      colnames(df) <- res_colnames
      print(df)
    }
  }
}

# Format data.table
mean_fmt <- function(x) sprintf('%.4f', mean(x))
sd_fmt <- function(x) sprintf('%.4f', sd(x))

df <- df[, lapply(.SD, as.numeric), by=list(Dataset_Type, Lengthscale)]
df <- df[, .(
  B_MAE_avg = mean_fmt(B_MAE),
  B_MAE_sd = sd_fmt(B_MAE),
  B_RMSE_avg = mean_fmt(B_RMSE),
  B_RMSE_sd = sd_fmt(B_RMSE),
  Y_MAE_avg = mean_fmt(Y_MAE),
  Y_MAE_sd = sd_fmt(Y_MAE),
  Y_RMSE_avg = mean_fmt(Y_RMSE),
  Y_RMSE_sd = sd_fmt(Y_RMSE),
  Time_avg = mean_fmt(Time),
  Time_sd = sd_fmt(Time)
), by=list(Dataset_Type, Lengthscale)]
setkey(df, Dataset_Type, Lengthscale)
print(df)
"""
with open('5_5_simu_interpol.R', 'w') as file:
  file.write(sh)
!Rscript 5_5_simu_interpol.R

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
[1] "Iter 218   | Elapsed Time:     0.19s | MAE:  0.7993 | RMSE:  0.9981"
[1] "Iter 219   | Elapsed Time:     0.21s | MAE:  0.7989 | RMSE:  0.9993"
[1] "Iter 220   | Elapsed Time:     0.17s | MAE:  0.7999 | RMSE:  1.0004"
[1] "Iter 221   | Elapsed Time:     0.17s | MAE:  0.7992 | RMSE:  0.9991"
[1] "Iter 222   | Elapsed Time:     0.22s | MAE:  0.7993 | RMSE:  0.9982"
[1] "Iter 223   | Elapsed Time:     0.19s | MAE:  0.7991 | RMSE:  0.9994"
[1] "Iter 224   | Elapsed Time:     0.20s | MAE:  0.7993 | RMSE:  0.9994"
[1] "Iter 225   | Elapsed Time:     0.23s | MAE:  0.8016 | RMSE:  1.0014"
[1] "Iter 226   | Elapsed Time:     0.18s | MAE:  0.7986 | RMSE:  0.9984"
[1] "Iter 227   | Elapsed Time:     0.19s | MAE:  0.8002 | RMSE:  1.0016"
[1] "Iter 228   | Elapsed Time:     0.27s | MAE:  0.7998 | RMSE:  0.9991"
[1] "Iter 229   | Elapsed Time:     0.17s | MAE:  0.7994 | RMSE:  0.9989"
[1] "Iter 230   | Elapsed Time:     0.19s | MAE