In [None]:
#To load the Numpy and Pandas from python using reticulate
Sys.setenv(RETICULATE_PYTHON = "/usr/bin/python3")
library(reticulate)
py_config()


In [None]:
library(reticulate)

np <- import("numpy")
pd <- import("pandas")

In [None]:
library(torch)
library(reticulate)
library(data.table)
library(tictoc)

Training data 

In [None]:
convert_data_to_tensor <- function(data_path, regions, output_path = NULL) {
  if (is.null(output_path)) {
    output_path <- file.path(data_path, "torch_converted")
  }
  
  if (!dir.exists(output_path)) {
    dir.create(output_path, recursive = TRUE)
  }
  
  cat("Converting NPY files to optimized torch tensors\n")
  
  # Import numpy
  np <- import("numpy", convert = FALSE)
  
  all_X <- list()
  all_y <- list()
  
  for (region in regions) {
    cat("Processing region:", region, "\n")
    
    # Load NPY files
    X_file <- file.path(data_path, paste0("X_", region, ".npy"))
    y_file <- file.path(data_path, paste0("y_true_", region, ".npy"))
    
    if (!file.exists(X_file) || !file.exists(y_file)) {
      stop(paste("Files not found for region:", region))
    }
    
    # Load and convert
    X_np <- py_to_r(np$load(X_file))
    y_np <- py_to_r(np$load(y_file))
    
    cat("  Original X shape:", paste(dim(X_np), collapse = " x "), "\n")
    cat("  Original y shape:", paste(dim(y_np), collapse = " x "), "\n")
    
    X_tensor <- torch_tensor(aperm(X_np, c(1, 3, 2)), dtype = torch_float32())
    y_tensor <- torch_tensor(as.integer(y_np + 1), dtype = torch_long())  # Convert to 1-indexed
    
    cat("  Converted X shape:", paste(X_tensor$shape, collapse = " x "), "\n")
    cat("  Samples:", X_tensor$size(1), "\n")
    
    all_X[[region]] <- X_tensor
    all_y[[region]] <- y_tensor
  }
  
  # Concatenate all regions
  cat("Concatenating all regions\n")
  X_combined <- torch_cat(all_X, dim = 1)
  y_combined <- torch_cat(all_y, dim = 1)
  
  cat("Final combined shapes:\n")
  cat("  X:", paste(X_combined$shape, collapse = " x "), "\n")
  cat("  y:", paste(y_combined$shape, collapse = " x "), "\n")
  
  # Save as torch tensors 
  cat("Saving converted data\n")
  torch_save(X_combined, file.path(output_path, "X_combined.pt"))
  torch_save(y_combined, file.path(output_path, "y_combined.pt"))
    
  cat("Data conversion completed!\n")
  cat("Files saved in:", output_path, "\n")
  
  return(list(
    X = X_combined,
    y = y_combined,
    metadata = metadata,
    output_path = output_path
  ))
}

In [None]:
converted_data <- convert_data_to_tensor(
  data_path = "path to numpy files",
  regions = c("frh01", "frh02", "frh03"), 
  output_path = "path to save the torch data for training"
)

FRH04 Data Conversion for Evaluation

In [None]:
convert_data_frh04 <- function(data_path, regions, output_path = NULL) {
  if (is.null(output_path)) output_path <- file.path(data_path, "torch_converted")
  if (!dir.exists(output_path)) dir.create(output_path, recursive = TRUE)

  cat("Converting NPY files to optimized torch tensors\n")
  np <- import("numpy", convert = FALSE)

  all_X <- list()
  all_y <- list()
  all_fid <- list()

  for (region in regions) {
    cat("Processing region:", region, "\n")
    X_file   <- file.path(data_path, paste0("X_", region, ".npy"))
    y_file   <- file.path(data_path, paste0("y_true_", region, ".npy"))
    fid_file <- file.path(data_path, paste0("field_ids_", region, ".npy"))

    if (!file.exists(X_file) || !file.exists(y_file)) {
      stop(paste("Files not found for region:", region))
    }

    X_np   <- reticulate::py_to_r(np$load(X_file))
    y_np   <- reticulate::py_to_r(np$load(y_file))
    fid_np <- if (file.exists(fid_file)) reticulate::py_to_r(np$load(fid_file)) else NULL

    cat("  Original X shape:", paste(dim(X_np), collapse = " x "), "\n")
    cat("  Original y shape:", paste(length(y_np), collapse = " x "), "\n")
    if (!is.null(fid_np)) cat("  field_ids length:", length(fid_np), "\n")

    X_tensor <- torch::torch_tensor(aperm(X_np, c(1, 3, 2)), dtype = torch::torch_float32())
    y_tensor <- torch::torch_tensor(as.integer(y_np + 1), dtype = torch::torch_long())
    fid_tensor <- if (!is.null(fid_np)) {
      torch::torch_tensor(as.integer(fid_np), dtype = torch::torch_int64())
    } else {
      warning("No field_ids_* file found for ", region, " â€” creating sequential IDs.")
      torch::torch_arange(start = 1, end = X_tensor$size(1), dtype = torch::torch_int64())
    }

    # check
    if (X_tensor$size(1) != y_tensor$size(1) || X_tensor$size(1) != fid_tensor$size(1)) {
      stop("Length mismatch for X / y / field_ids in region ", region)
    }

    all_X[[region]]   <- X_tensor
    all_y[[region]]   <- y_tensor
    all_fid[[region]] <- fid_tensor
  }

  cat("Concatenating all regions along sample dimension\n")
  X_combined   <- torch::torch_cat(all_X, dim = 1)  
  y_combined   <- torch::torch_cat(all_y, dim = 1)
  fid_combined <- torch::torch_cat(all_fid, dim = 1)

  cat("Final combined shapes:\n")
  cat("  X:", paste(X_combined$shape, collapse = " x "), "\n")    
  cat("  y:", paste(y_combined$shape, collapse = " x "), "\n")    
  cat("  fid:", paste(fid_combined$shape, collapse = " x "), "\n")

  cat("Saving converted data...\n")
  torch::torch_save(X_combined,   file.path(output_path, "X_combined_4.pt"))
  torch::torch_save(y_combined,   file.path(output_path, "y_combined_4.pt"))
  torch::torch_save(fid_combined, file.path(output_path, "field_ids_combined_4.pt"))


  cat("Data conversion completed!\nFiles saved in:", output_path, "\n")
  invisible(list(X = X_combined, y = y_combined, field_ids = fid_combined,
                 metadata = metadata, output_path = output_path))
}

In [None]:

converted_data_frh04 <- convert_data_frh04(
  data_path  = "numpy of frh04 path"
  regions    = c("frh04"),
  output_path= "path to save data for frh04 evaluation"
)