# Input data for MeteoScape, TC Dolphin

## Environment

In [1]:
# Libraries
libs <- c("tidyverse", "metR", "data.table", "sf", "anndata")
invisible(suppressPackageStartupMessages(lapply(libs, library, character.only = TRUE)))
Sys.setenv(TZ = "UTC")

# Set seed for Random Number Generation. Necessary for {jitter} to reproduce the same jittering
set.seed(42)

# Set jitter amount of +/- 1 meter maximum
jitter_amount <- 1

## Miscellaneous variables

In [None]:
# Tropical cyclone ID
ID <- "2020-12.DOLPHIN"

# Geodetic coordinate system for World
lonlat <- "EPSG:4326"
# Projected coordinate system for Japan 
eqdc <- "EPSG:6690"

# Location of NetCDF data for TC
# Data are available from https://doi.org/10.5281/zenodo.11064128
basedir <- paste("../Data", ID, sep = "/")
lst <- list.dirs(basedir)
lst <- lst[!(list.dirs(basedir) %in% basedir)]

# Parent folder, one by ensemble member
BN <- basename(lst)
# EM = total number of ensemble members
EM <- length(BN)
# IT = total number of initialization times
IT <- length(list.files(paste(basedir, BN[1], sep = "/")))
# FT = total number of forecasting time by IT
FT <- 14

# List of all NetCDF files:
lst <- list.files(basedir, recursive = TRUE, full.names = TRUE)
lst <- split(lst, rep(seq_len(IT), EM))

## Creating the data.table from NetCDF files

In [None]:
# Initialization of final data.table
Tracks <- NULL
for(i in 1:IT){  # Loop through initialization times IT
  for(m in 1:EM){  # Loop through ensemble members EM
    f0 <- lst[[i]][m]  # Files by IT and EM
    InitTime <- as.POSIXct(strptime(sub("([0-9]+).*$", "\\1",  basename(f0)), "%Y%m%d%H%M%S")) # Write initializatin time
    tmp <- ReadNetCDF(f0)  # Read NetCDF file into temporary data.table
    setnames(tmp, "time", "ForecastingTime")  # Change column "time" name for "ForecastingTime"
    tmp[ , Member := factor(m-1) ]  # Factorization of ensemble member name
    tmp[ , InitializationTime := InitTime ]  # Add IT column to the data.table
    Tracks <- rbind(Tracks, tmp)  # Concatenate with final data.table
    rm(tmp); gc()  # Remove temporary data.table and free memory
  }
}
# Forecast horizon HZ (1 = analysis, 14 = +39 hrs)
Tracks[ , Horizon := as.numeric((ForecastingTime - InitializationTime)/3600)]
# Needed modification of the TC center position (+/- 1m maximum).
Tracks[ , `:=`(X = jitter(X, amount = jitter_amount),  Y = jitter(Y, amount = jitter_amount)) ]
# Grouping variable based on the combination of IT and EM
Tracks[ , gr := .GRP, .(InitializationTime, Member)]
# Normalization by global mean and sd in both X and Y directions
Tracks[ , `:=`(X = scale(X), Y = scale(Y)) ]
# Time delta
Tracks[ , dT := (as.numeric(shift(ForecastingTime, -1)) - as.numeric(ForecastingTime))/(60*60), .(gr)]
# Spatial delta
Tracks[ , dX := round(shift(X, -1) - X, 4), .(gr)]
Tracks[ , dY := round(shift(Y, -1) - Y, 4), .(gr)]
# Midpoints between 2 consecutive positions
Tracks[ , `:=`(Xm = (shift(X, -1) + X)/2, Ym = (shift(Y, -1) + Y)/2), .(gr) ]
# Velocities
Tracks[ , velocX := round(dX/dT, 4)/1, .(gr)]
Tracks[ , velocY := round(dY/dT, 4)/1, .(gr)]
# Removing rows with no position/velocity
Tracks <- Tracks[ !is.na(dT) ]
# Set order by IT, then EM, then HZ
setorder(Tracks, InitializationTime, Member, Horizon)

In [None]:
# Vizualization of the 5 first rows of "Tracks" data.table
Tracks[1:5]

## Clusters

In [None]:
# Definition of the clusters

# Cluster "Northward"
xs <- list(x = c(1.38018508218361, 1.64516648167687, 1.31045313494854, 
                 0.668919220385898), y = c(2.30715915131801, 1.97244580458967, 
                                           1.42853661615613, 1.72141079454342))$x
ys <- list(x = c(1.38018508218361, 1.64516648167687, 1.31045313494854, 
                 0.668919220385898), y = c(2.30715915131801, 1.97244580458967, 
                                           1.42853661615613, 1.72141079454342))$y

# Spatial polygon (Projected coordinate system)
Northward <- tibble(x = xs, y = ys, ID = 1) %>% 
  st_as_sf(coords = c("x","y"), crs = eqdc) %>% 
  dplyr::group_by(ID) %>% 
  dplyr::summarise() %>%
  st_cast("POLYGON") %>% 
  st_convex_hull()
rm(xs, ys)


# Cluster "Eastward"
xs <- list(x = c(1.12915007213736, 2.03566538619326, 2.23091483845146, 
                 1.40807786107763), y = c(0.787002701593491, 1.19144799555689, 
                                          0.661485196570366, 0.243093513159949))$x
ys <- list(x = c(1.12915007213736, 2.03566538619326, 2.23091483845146, 
                 1.40807786107763), y = c(0.787002701593491, 1.19144799555689, 
                                          0.661485196570366, 0.243093513159949))$y

# Spatial polygon (Projected coordinate system)
Eastward <- tibble(x = xs, y = ys, ID = 1) %>% 
  st_as_sf(coords = c("x","y"), crs = eqdc) %>% 
  dplyr::group_by(ID) %>% 
  dplyr::summarise() %>%
  st_cast("POLYGON") %>% 
  st_convex_hull()
rm(xs, ys)


# Cluster "Start"
xs <- list(x = c(-1.35324066714766, -1.57836675332549, -1.42175730207135, 
                 -1.06938603674953, -1.10853839956307), y = c(-1.73032284600434, 
                                                              -1.68138239248742, -1.40731585279267, -1.35837539927575, -1.60307766686035
                 ))$x
ys <- list(x = c(-1.35324066714766, -1.57836675332549, -1.42175730207135, 
                 -1.06938603674953, -1.10853839956307), y = c(-1.73032284600434, 
                                                              -1.68138239248742, -1.40731585279267, -1.35837539927575, -1.60307766686035
                 ))$y

# Spatial polygon (Projected coordinate system)
Start <- tibble(x = xs, y = ys, ID = 1) %>% 
  st_as_sf(coords = c("x","y"), crs = eqdc) %>% 
  dplyr::group_by(ID) %>% 
  dplyr::summarise() %>%
  st_cast("POLYGON") %>% 
  st_convex_hull()
rm(xs, ys)


# Extracting TC centers and project into new coordinate system
Tracks[ , .(X, Y)] %>%
  st_as_sf(coords = c("X","Y"), crs = eqdc) -> Y

# Initialiaze Cluster column (TC centers not belonging to any cluster is blank)
Tracks[ , Cluster := " "]

# Capturing row index of TC centers inside polygon "Start"
idx <- st_contains(Start, Y)[[1]]
# Write cluster name
Tracks[idx, Cluster := "Start"]
rm(idx)

# Capturing row index of TC centers inside polygon "Northward"
idx <- st_contains(Northward, Y)[[1]]
# Write cluster name
Tracks[idx, Cluster := "Northward"]
rm(idx)

# Capturing row index of TC centers inside polygon "Eastward"
idx <- st_contains(Eastward, Y)[[1]]
# Write cluster name
Tracks[idx, Cluster := "Eastward"]
rm(idx)

In [None]:
# Vizualization of the 5 first rows of "Tracks" data.table
Tracks[1:5]

## Creating the annotated data matrix (anndata)

In [None]:
# anndata
adata <- AnnData(
  as.data.frame(Tracks[,.(Xm,Ym)]),
  obs = data.frame(clusters = factor(Tracks$Cluster), row.names = rownames(Tracks)),
  obsm = list(X_std = as.matrix(Tracks[,.(Xm,Ym)]), velocity_std = as.matrix(Tracks[,.(velocX,velocY)])),
  layers = list(velocity = as.matrix(Tracks[,.(velocX,velocY)]))
)
# Range of midpoints
cat(range(adata$obsm$X_std), "\n")

In [None]:
# Vizualization of anndata
adata

In [None]:
# Write anndata on disk 
write_h5ad(adata, file = paste(".", paste("adata", "h5ad", sep = "."), sep = "/"))