# Input data for MeteoScape, TC Nepartak

## Environment

In [1]:
#Libraries
libs <- c("tidyverse", "metR", "data.table", "sf", "anndata")
invisible(suppressPackageStartupMessages(lapply(libs, library, character.only = TRUE)))
Sys.setenv(TZ = "UTC")

# Set seed for Random Number Generation. Necessary for {jitter} to reproduce the same jittering
set.seed(42)

# Set jitter amount of +/- 1 meter maximum
jitter_amount <- 1

## Miscellaneous variables

In [2]:
# Tropical cyclone ID
ID <- "2021-08.NEPARTAK"

# Geodetic coordinate system for World
lonlat <- "EPSG:4326"
# Projected coordinate system for Japan 
eqdc <- "EPSG:6690"

# Location of NetCDF data for TC
# Data are available from https://doi.org/10.5281/zenodo.11064128
basedir <- paste("../Data", ID, sep = "/")
lst <- list.dirs(basedir)
lst <- lst[!(list.dirs(basedir) %in% basedir)]

# Parent folder, one by ensemble member
BN <- basename(lst)
# EM = total number of ensemble members
EM <- length(BN)
# IT = total number of initialization times
IT <- length(list.files(paste(basedir, BN[1], sep = "/")))
# FT = total number of forecasting time by IT
FT <- 14

# List of all NetCDF files:
lst <- list.files(basedir, recursive = TRUE, full.names = TRUE)
lst <- split(lst, rep(seq_len(IT), EM))

## Creating the data.table from NetCDF files

In [3]:
# Initialization of final data.table
Tracks <- NULL
for(i in 1:IT){  # Loop through initialization times IT
  for(m in 1:EM){  # Loop through ensemble members EM
    f0 <- lst[[i]][m]  # Files by IT and EM
    InitTime <- as.POSIXct(strptime(sub("([0-9]+).*$", "\\1",  basename(f0)), "%Y%m%d%H%M%S")) # Write initializatin time
    tmp <- ReadNetCDF(f0)  # Read NetCDF file into temporary data.table
    setnames(tmp, "time", "ForecastingTime")  # Change column "time" name for "ForecastingTime"
    tmp[ , Member := factor(m-1) ]  # Factorization of ensemble member name
    tmp[ , InitializationTime := InitTime ]  # Add IT column to the data.table
    Tracks <- rbind(Tracks, tmp)  # Concatenate with final data.table
    rm(tmp); gc()  # Remove temporary data.table and free memory
  }
}
# Forecast horizon HZ (1 = analysis, 14 = +39 hrs)
Tracks[ , Horizon := as.numeric((ForecastingTime - InitializationTime)/3600)]
# Needed modification of the TC center position (+/- 1m maximum).
Tracks[ , `:=`(X = jitter(X, amount = jitter_amount),  Y = jitter(Y, amount = jitter_amount)) ]
# Grouping variable based on the combination of IT and EM
Tracks[ , gr := .GRP, .(InitializationTime, Member)]
# Normalization by global mean and sd in both X and Y directions
Tracks[ , `:=`(X = scale(X), Y = scale(Y)) ]
# Time delta
Tracks[ , dT := (as.numeric(shift(ForecastingTime, -1)) - as.numeric(ForecastingTime))/(60*60), .(gr)]
# Spatial delta
Tracks[ , dX := round(shift(X, -1) - X, 4), .(gr)]
Tracks[ , dY := round(shift(Y, -1) - Y, 4), .(gr)]
# Midpoints between 2 consecutive positions
Tracks[ , `:=`(Xm = (shift(X, -1) + X)/2, Ym = (shift(Y, -1) + Y)/2), .(gr) ]
# Velocities
Tracks[ , velocX := round(dX/dT, 4)/1, .(gr)]
Tracks[ , velocY := round(dY/dT, 4)/1, .(gr)]
# Removing rows with no position/velocity
Tracks <- Tracks[ !is.na(dT) ]
# Set order by IT, then EM, then HZ
setorder(Tracks, InitializationTime, Member, Horizon)

In [4]:
# Vizualization of the 5 first rows of "Tracks" data.table
Tracks[1:5]

ForecastingTime,lon,lat,X,Y,X1,Y1,Member,InitializationTime,Horizon,gr,dT,dX,dY,Xm,Ym,velocX,velocY
<dttm>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<fct>,<dttm>,<dbl>,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
2021-07-25 18:00:00,149.28,35.17,3.192334,-1.35457,0.0,0.0,0,2021-07-25 18:00:00,0,1,3,-0.3961,-0.0091,2.994291,-1.359119,-0.132,-0.003
2021-07-25 21:00:00,148.06,35.29,2.796248,-1.363668,-0.4281234,-0.02797162,0,2021-07-25 18:00:00,3,1,3,-0.3049,-0.0539,2.643802,-1.39061,-0.1016,-0.018
2021-07-26 00:00:00,147.1,35.27,2.491356,-1.417552,-0.757673,-0.19367054,0,2021-07-25 18:00:00,6,1,3,-0.1591,-0.1793,2.411782,-1.507182,-0.053,-0.0598
2021-07-26 03:00:00,146.55,34.92,2.332208,-1.596811,-0.9296974,-0.74485058,0,2021-07-25 18:00:00,9,1,3,-0.1075,-0.059,2.278463,-1.626312,-0.0358,-0.0197
2021-07-26 06:00:00,146.2,34.82,2.224718,-1.655813,-1.045881,-0.92628914,0,2021-07-25 18:00:00,12,1,3,-0.1449,0.0937,2.152244,-1.608978,-0.0483,0.0312


## Clusters

In [5]:
# Definition of the clusters

# Cluster "Northward"
xs <- list(x = c(-0.781715339849895, -0.442336080399338, -0.0899037725083766, 
                 0.57580169795233, 0.38000597134624, -0.298752547554873, -0.768662291409489), 
           y = c(1.56780824252617, 0.967368014267491, 0.706307045459371, 
                 0.797678384542214, 1.26758812839683, 1.69833872693023, 1.67223263004942
           ))$x
ys <- list(x = c(-0.781715339849895, -0.442336080399338, -0.0899037725083766, 
                 0.57580169795233, 0.38000597134624, -0.298752547554873, -0.768662291409489), 
           y = c(1.56780824252617, 0.967368014267491, 0.706307045459371, 
                 0.797678384542214, 1.26758812839683, 1.69833872693023, 1.67223263004942
           ))$y

# Spatial polygon (Projected coordinate system)
Northward <- tibble(x = xs, y = ys, ID = 1) %>% 
  st_as_sf(coords = c("x","y"), crs = eqdc) %>% 
  dplyr::group_by(ID) %>% 
  dplyr::summarise() %>%
  st_cast("POLYGON") %>% 
  st_convex_hull()
rm(xs, ys)


# Cluster "Westward"
xs <- list(x = c(-2.5308238308643, -2.20449761985415, -1.34299642278735, 
                 -1.38215556810857, -2.36113420113902), 
           y = c(1.21537593463521, 0.0275485265582587, 0.223344253164349, 
                 1.29369422527764, 1.54170214564536
           ))$x
ys <- list(x = c(-2.5308238308643, -2.20449761985415, -1.34299642278735, 
                 -1.38215556810857, -2.36113420113902), 
           y = c(1.21537593463521, 0.0275485265582587, 0.223344253164349, 
                 1.29369422527764, 1.54170214564536
           ))$y

# Spatial polygon (Projected coordinate system)
Westward <- tibble(x = xs, y = ys, ID = 1) %>% 
  st_as_sf(coords = c("x","y"), crs = eqdc) %>% 
  dplyr::group_by(ID) %>% 
  dplyr::summarise() %>%
  st_cast("POLYGON") %>% 
  st_convex_hull()
rm(xs, ys)


# Cluster "Start"
xs <- list(x = c(1.07181753868776, 0.353899874465428, 0.353899874465428, 
                 1.22845411997263, 1.42424984657872, 1.30677241061507, 1.00655229648573), 
           y = c(-2.37421238647645, -2.21757580519157, -1.69545386757533, 
                 -1.48660509252884, -1.73461301289655, -2.29589409583401, -2.36115933803604
           ))$x
ys <- list(x = c(1.07181753868776, 0.353899874465428, 0.353899874465428, 
                 1.22845411997263, 1.42424984657872, 1.30677241061507, 1.00655229648573), 
           y = c(-2.37421238647645, -2.21757580519157, -1.69545386757533, 
                 -1.48660509252884, -1.73461301289655, -2.29589409583401, -2.36115933803604
           ))$y

# Spatial polygon (Projected coordinate system)
Start <- tibble(x = xs, y = ys, ID = 1) %>% 
  st_as_sf(coords = c("x","y"), crs = eqdc) %>% 
  dplyr::group_by(ID) %>% 
  dplyr::summarise() %>%
  st_cast("POLYGON") %>% 
  st_convex_hull()
rm(xs, ys)


# Extracting TC centers and project into new coordinate system
Tracks[ , .(X, Y)] %>%
  st_as_sf(coords = c("X","Y"), crs = eqdc) -> Y

# Initialiaze Cluster column (TC centers not belonging to any cluster is blank)
Tracks[ , Cluster := " "]

# Capturing row index of TC centers inside polygon "Start"
idx <- st_contains(Start, Y)[[1]]
# Write cluster name
Tracks[idx, Cluster := "Start"]
rm(idx)

# Capturing row index of TC centers inside polygon "Northward"
idx <- st_contains(Northward, Y)[[1]]
# Write cluster name
Tracks[idx, Cluster := "Northward"]
rm(idx)

# Capturing row index of TC centers inside polygon "Eastward"
idx <- st_contains(Westward, Y)[[1]]
# Write cluster name
Tracks[idx, Cluster := "Westward"]
rm(idx)

In [6]:
# Vizualization of the 5 first rows of "Tracks" data.table
Tracks[1:5]

ForecastingTime,lon,lat,X,Y,X1,Y1,Member,InitializationTime,Horizon,gr,dT,dX,dY,Xm,Ym,velocX,velocY,Cluster
<dttm>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<fct>,<dttm>,<dbl>,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
2021-07-25 18:00:00,149.28,35.17,3.192334,-1.35457,0.0,0.0,0,2021-07-25 18:00:00,0,1,3,-0.3961,-0.0091,2.994291,-1.359119,-0.132,-0.003,
2021-07-25 21:00:00,148.06,35.29,2.796248,-1.363668,-0.4281234,-0.02797162,0,2021-07-25 18:00:00,3,1,3,-0.3049,-0.0539,2.643802,-1.39061,-0.1016,-0.018,
2021-07-26 00:00:00,147.1,35.27,2.491356,-1.417552,-0.757673,-0.19367054,0,2021-07-25 18:00:00,6,1,3,-0.1591,-0.1793,2.411782,-1.507182,-0.053,-0.0598,
2021-07-26 03:00:00,146.55,34.92,2.332208,-1.596811,-0.9296974,-0.74485058,0,2021-07-25 18:00:00,9,1,3,-0.1075,-0.059,2.278463,-1.626312,-0.0358,-0.0197,
2021-07-26 06:00:00,146.2,34.82,2.224718,-1.655813,-1.045881,-0.92628914,0,2021-07-25 18:00:00,12,1,3,-0.1449,0.0937,2.152244,-1.608978,-0.0483,0.0312,


## Creating the annotated data matrix (anndata)

In [7]:
# Annotated data matrix (anndata)
adata <- AnnData(
  as.data.frame(Tracks[,.(Xm,Ym)]),
  obs = data.frame(clusters = factor(Tracks$Cluster), row.names = rownames(Tracks)),
  obsm = list(X_std = as.matrix(Tracks[,.(Xm,Ym)]), velocity_std = as.matrix(Tracks[,.(velocX,velocY)])),
  layers = list(velocity = as.matrix(Tracks[,.(velocX,velocY)]))
)
# Range of midpoints
cat(range(adata$obsm$X_std), "\n")

-2.176201 3.029079 


In [8]:
# Vizualization of anndata
adata

AnnData object with n_obs × n_vars = 4368 × 2
    obs: 'clusters'
    obsm: 'X_std', 'velocity_std'
    layers: 'velocity'

In [9]:
# Write anndata on disk 
write_h5ad(adata, file = paste(".", paste("adata", "h5ad", sep = "."), sep = "/"))