<a href="https://colab.research.google.com/github/rodgpt/MAR_FUTURA/blob/Optimization-(trying)/NDSI/NDSI_Sites_Comparisson_R.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# This code compares the NDSI and bio-anthro energy for different cases. It allows to set a date range

This notebook is structured to run on Google Colab using a Python runtime with R via rpy2.
Three changes to run locally vs run in colab:
1. Remove the from google.colab import drive line
2. Remove the drive.mount("/content/gdrive") line
3. Change the path to the data folder in the third cell

In [1]:
#This cell mounts the google drive and install packages to be able to run the rest in R, because the whole runtime is in phyton

#Just for google colab
from google.colab import drive
drive.mount('/content/drive')

!pip install rpy2
%load_ext rpy2.ipython

Mounted at /content/drive


In [2]:
%%R

install.packages("tuneR")
install.packages("seewave")
install.packages("dplyr")
install.packages("purrr")
install.packages("ggplot2")
install.packages("scales")
install.packages("lubridate")
install.packages("tidyr")
install.packages("soundecology")
install.packages("beepr")
install.packages("future")
install.packages("future.apply")

library(tuneR)
library(seewave)
library(dplyr)
library(purrr)
library(ggplot2)
library(scales)
library(lubridate)
library(tidyr)
library(soundecology)
library(beepr)
library(future)
library(future.apply)

Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)
also installing the dependency ‘signal’

trying URL 'https://cran.rstudio.com/src/contrib/signal_1.8-1.tar.gz'
trying URL 'https://cran.rstudio.com/src/contrib/tuneR_1.4.7.tar.gz'

The downloaded source packages are in
	‘/tmp/RtmpAYoGmi/downloaded_packages’
Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)
trying URL 'https://cran.rstudio.com/src/contrib/seewave_2.2.4.tar.gz'
Content type 'application/x-gzip' length 2933825 bytes (2.8 MB)
downloaded 2.8 MB


The downloaded source packages are in
	‘/tmp/RtmpAYoGmi/downloaded_packages’
Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)
trying URL 'https://cran.rstudio.com/src/contrib/dplyr_1.1.4.tar.gz'
Content type 'application/x-gzip' length 1207521 bytes (1.2 MB)
downloaded 1.2 MB


The downloaded source packages are in
	‘/tmp/RtmpAYoGmi/downloaded_packages’
Installing package into ‘/usr/loca

In [3]:
%%R

#For running locally (Rod)
#dirs_sites <- list(
#  "San Antonio 28" = "/Users/rod/Library/CloudStorage/GoogleDrive-royanedel@marfutura.org/Unidades compartidas/MAR FUTURA/Hydrophones/San Antonio/28",
#  "Ventanas 39" = "/Users/rod/Library/CloudStorage/GoogleDrive-royanedel@marfutura.org/Unidades compartidas/MAR FUTURA/Hydrophones/Ventanas/20-10-25/39/Untitled",
#  "Zapallar 32" = "/Users/rod/Library/CloudStorage/GoogleDrive-royanedel@marfutura.org/Unidades compartidas/MAR FUTURA/Hydrophones/Zapallar/20-10-25/32",
#  "Las Cruces 41" = "/Users/rod/Library/CloudStorage/GoogleDrive-royanedel@marfutura.org/Unidades compartidas/MAR FUTURA/Hydrophones/LasCruces/20-10-25/41"
#)
#output_dir <- "/Users/rod/Library/CloudStorage/GoogleDrive-royanedel@marfutura.org/Unidades compartidas/MAR FUTURA/Hydrophones/NDSIResults"

#For running in Colab
dirs_sites <- list(
  "San Antonio 38" = "/content/drive/Shareddrives/MAR FUTURA/Hydrophones/San Antonio/12-11-25/38",
  "Ventanas 36" = "/content/drive/Shareddrives/MAR FUTURA/Hydrophones/Ventanas/07-11-25/36",
  "Zapallar 34" = "/content/drive/Shareddrives/MAR FUTURA/Hydrophones/Zapallar/07-11-25/34",
  "Las Cruces 26" = "/content/drive/Shareddrives/MAR FUTURA/Hydrophones/LasCruces/12-11-25/26",
  "Matanzas 32" = "/content/drive/Shareddrives/MAR FUTURA/Hydrophones/Matanzas/13-11-25/32"
)
output_dir <- "/content/drive/Shareddrives/MAR FUTURA/Hydrophones/NDSIResults"

segment_sec <- 60
anthro_band <- c(1000, 2000)
bio_band    <- c(2000, 3000)
tz <- "UTC"
analysis_duration <- NA
files_per_folder <- NA
files_per_hour <- NA
output_csv <- "ndsi_casestudies_resultsD2.csv"

start_date <- as.POSIXct("2025-10-21 00:00:00", tz = tz)
end_date   <- as.POSIXct("2025-11-05 23:59:59", tz = tz)

workers <- max(1, parallel::detectCores() - 1)
future::plan(future::multisession, workers = workers)

extract_datetime <- function(filename) {
  dt_str <- sub("^(?:ST_\\d+_)?(\\d{8}_\\d{6})\\.WAV$", "\\1", basename(filename), ignore.case = TRUE)
  as.POSIXct(dt_str, format = "%Y%m%d_%H%M%S", tz = tz)
}

# Cache FFT bin indices across segments when segment length + sample rate stay constant
.ndsi_cache <- new.env(parent = emptyenv())

calculate_ndsi <- function(wave_obj) {
  samples <- wave_obj@left
  sample_rate <- wave_obj@samp.rate
  n <- length(samples)

  cache_key <- paste(n, sample_rate, anthro_band[1], anthro_band[2], bio_band[1], bio_band[2], sep = "|")
  cached <- .ndsi_cache[[cache_key]]

  if (is.null(cached)) {
    freqs_hz <- (0:(n/2 - 1)) * (sample_rate / as.double(n))
    anthro_indices <- which(freqs_hz >= anthro_band[1] & freqs_hz <= anthro_band[2])
    bio_indices <- which(freqs_hz >= bio_band[1] & freqs_hz <= bio_band[2])
    cached <- list(anthro_indices = anthro_indices, bio_indices = bio_indices)
    .ndsi_cache[[cache_key]] <- cached
  }

  fft_result <- fft(samples)
  power_spectrum <- Mod(fft_result[1:(n/2)])^2 / n^2

  anthro_energy <- sum(power_spectrum[cached$anthro_indices])
  bio_energy <- sum(power_spectrum[cached$bio_indices])

  denom <- bio_energy + anthro_energy
  ndsi_val <- if (!is.finite(denom) || denom == 0) {
    NA_real_
  } else {
    (bio_energy - anthro_energy) / denom
  }

  list(ndsi = ndsi_val, anthro_energy = anthro_energy, bio_energy = bio_energy)
}

.evenly_spaced_indices <- function(n, k) {
  if (k <= 0) return(integer(0))
  if (n <= k) return(seq_len(n))
  idx <- round((seq_len(k) * (n + 1)) / (k + 1))
  idx <- pmax(1L, pmin(n, idx))
  sort(unique(idx))
}

.fmt_secs <- function(secs) {
  if (!is.finite(secs) || is.na(secs) || secs < 0) return("NA")
  secs <- as.integer(round(secs))
  h <- secs %/% 3600
  m <- (secs %% 3600) %/% 60
  s <- secs %% 60
  if (h > 0) return(sprintf("%dh%02dm%02ds", h, m, s))
  if (m > 0) return(sprintf("%dm%02ds", m, s))
  sprintf("%ds", s)
}

process_site <- function(directory, label) {
  files <- list.files(directory, pattern = "\\.wav$", full.names = TRUE, recursive = TRUE, ignore.case = TRUE)
  message("Found ", length(files), " files in ", label, " (searching recursively, case-insensitive)")

  if (length(files) == 0) {
    return(tibble(
      Site = character(),
      Time = as.POSIXct(character()),
      NDSI = numeric(),
      Anthro_Energy = numeric(),
      Bio_Energy = numeric()
    ))
  }

  files <- sort(files)

  if (!is.na(files_per_hour) && (is.na(files_per_hour) || files_per_hour <= 0)) {
    stop("`files_per_hour` must be NA or a positive number.")
  }

  file_dt <- purrr::map_df(files, ~tibble(
    filepath = .x,
    start_dt = extract_datetime(.x)
  ))

  if (nrow(file_dt) > 0) {
    if (!is.na(start_date)) {
      file_dt <- dplyr::filter(file_dt, start_dt >= (start_date - segment_sec))
    }
    if (!is.na(end_date)) {
      file_dt <- dplyr::filter(file_dt, start_dt <= end_date)
    }
  }

  file_dt <- dplyr::arrange(file_dt, start_dt)

  if (!is.na(files_per_hour)) {
    k <- as.integer(files_per_hour)
    file_dt <- file_dt %>%
      mutate(.hour = lubridate::floor_date(start_dt, unit = "hour")) %>%
      group_by(.hour) %>%
      arrange(start_dt, .by_group = TRUE) %>%
      group_modify(function(.x, .g) {
        idx <- .evenly_spaced_indices(nrow(.x), k)
        .x[idx, , drop = FALSE]
      }) %>%
      ungroup() %>%
      select(-.hour)
  }

  files <- file_dt$filepath

  if (!is.na(files_per_folder)) {
    files <- head(files, files_per_folder)
  }

  if (length(files) == 0) {
    return(tibble(
      Site = character(),
      Time = as.POSIXct(character()),
      NDSI = numeric(),
      Anthro_Energy = numeric(),
      Bio_Energy = numeric()
    ))
  }

  pb <- utils::txtProgressBar(min = 0, max = length(files), style = 3)
  on.exit({
    try(close(pb), silent = TRUE)
  }, add = TRUE)

  t0 <- Sys.time()
  last_print <- Sys.time()

  results <- vector("list", length(files))
  out_k <- 0L

  for (i in seq_along(files)) {
    fp <- files[[i]]
    utils::setTxtProgressBar(pb, i)

    now <- Sys.time()
    if (i == 1L || i == length(files) || as.numeric(difftime(now, last_print, units = "secs")) >= 2) {
      elapsed <- as.numeric(difftime(now, t0, units = "secs"))
      avg_sec <- elapsed / i
      remaining <- avg_sec * (length(files) - i)
      pct <- 100 * i / length(files)
      eta <- now + remaining
      cat(sprintf(
        "\r%s | %5.1f%% | %d/%d | avg %s/file | ETA %s | ~%s remaining",
        label,
        pct,
        i,
        length(files),
        .fmt_secs(avg_sec),
        format(eta, "%H:%M:%S"),
        .fmt_secs(remaining)
      ))
      flush.console()
      last_print <- now
    }

    start_dt <- extract_datetime(fp)

    wav <- tryCatch(readWave(fp), error = function(e) {
      warning("Skipping unreadable file: ", fp)
      return(NULL)
    })
    if (is.null(wav)) next

    dur_sec <- length(wav@left) / wav@samp.rate
    seg_len <- min(segment_sec, dur_sec)
    if (!is.finite(seg_len) || seg_len <= 0) next

    max_start <- max(0, dur_sec - seg_len)
    starts <- seq(0, max_start, by = seg_len)

    for (st in starts) {
      segment_time <- start_dt + st

      if (!is.na(start_date) && segment_time < start_date) {
        next
      }
      if (!is.na(end_date) && segment_time > end_date) {
        break
      }

      seg <- tryCatch(
        extractWave(wav, from = st, to = st + seg_len, xunit = "time"),
        error = function(e) return(NULL)
      )
      if (is.null(seg)) next

      ndsi_res <- calculate_ndsi(seg)

      out_k <- out_k + 1L
      results[[out_k]] <- tibble(
        Site = label,
        Time = segment_time,
        NDSI = ndsi_res$ndsi,
        Anthro_Energy = ndsi_res$anthro_energy,
        Bio_Energy = ndsi_res$bio_energy
      )
    }
  }

  cat("\n")

  if (out_k == 0L) {
    return(tibble(
      Site = character(),
      Time = as.POSIXct(character()),
      NDSI = numeric(),
      Anthro_Energy = numeric(),
      Bio_Energy = numeric()
    ))
  }

  bind_rows(results[seq_len(out_k)])
}

anthro_tag <- paste0("anthro_", anthro_band[1], "-", anthro_band[2])
bio_tag <- paste0("bio_", bio_band[1], "-", bio_band[2])

date_tag <- if (!is.na(start_date) && !is.na(end_date)) {
  paste0(format(start_date, "%Y%m%d"), "-", format(end_date, "%Y%m%d"))
} else {
  "alltime"
}

dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)

all_results <- bind_rows(
  lapply(names(dirs_sites), function(label) {
    tryCatch(
      process_site(dirs_sites[[label]], label),
      error = function(e) {
        warning("Site failed: ", label, " - ", conditionMessage(e))
        return(tibble(
          Site = character(),
          Time = as.POSIXct(character()),
          NDSI = numeric(),
          Anthro_Energy = numeric(),
          Bio_Energy = numeric()
        ))
      }
    )
  })
)

if (!exists("all_results") || nrow(all_results) == 0) {
  stop("No WAV files found in the provided directories and date range. Please verify the paths in `dirs_sites` and the date filters.")
}

output_csv_all <- file.path(output_dir, paste0(
  "ndsi_casestudies_", date_tag, "_", anthro_tag, "_", bio_tag, ".csv"
))
write.csv(all_results, output_csv_all, row.names = FALSE)
message("Saved to: ", output_csv_all)

summary_stats <- all_results %>%
  group_by(Site) %>%
  summarize(
    Segments  = n(),
    Mean_NDSI = mean(NDSI, na.rm = TRUE),
    SD_NDSI   = sd(NDSI, na.rm = TRUE),
    Mean_Anthro_Energy = mean(Anthro_Energy, na.rm = TRUE),
    SD_Anthro_Energy = sd(Anthro_Energy, na.rm = TRUE),
    Mean_Bio_Energy = mean(Bio_Energy, na.rm = TRUE),
    SD_Bio_Energy = sd(Bio_Energy, na.rm = TRUE)
  )
print(summary_stats)

plot_data <- all_results

plot_data$Site <- factor(plot_data$Site, levels = names(dirs_sites))

## NDSI time-series plot
p_ndsi <- ggplot(plot_data, aes(x = Time, y = NDSI, color = Site)) +
  geom_line(size = 0.8) +
  facet_wrap(~Site, ncol = 1, scales = "free_x") +
  scale_x_datetime(
    date_labels = "%d-%b %H:%M",
    date_breaks = "2 hour",
    expand = expansion(mult = c(0.01, 0.01))
  ) +
  labs(
    title = "NDSI Over Time for Case Study Sites",
    x = "Date-Time",
    y = "NDSI"
  ) +
  theme_minimal() +
  theme(
    legend.position = "none",
    plot.title = element_text(face = "bold", size = 14),
    axis.text.x = element_text(
      angle = 45,
      hjust = 1,
      vjust = 1,
      size = 6,
      margin = margin(t = 5)
    ),
    strip.text = element_text(face = "bold")
  )

print(p_ndsi)

## Anthropogenic energy time-series plot
p_anthro <- ggplot(plot_data, aes(x = Time, y = Anthro_Energy, color = Site)) +
  geom_line(size = 0.8) +
  facet_wrap(~Site, ncol = 1, scales = "free_x") +
  scale_x_datetime(
    date_labels = "%d-%b %H:%M",
    date_breaks = "2 hour",
    expand = expansion(mult = c(0.01, 0.01))
  ) +
  labs(
    title = "Anthropogenic Energy Over Time for Case Study Sites",
    x = "Date-Time",
    y = "Anthropogenic Energy (arbitrary units)"
  ) +
  theme_minimal() +
  theme(
    legend.position = "none",
    plot.title = element_text(face = "bold", size = 14),
    axis.text.x = element_text(
      angle = 45,
      hjust = 1,
      vjust = 1,
      size = 6,
      margin = margin(t = 5)
    ),
    strip.text = element_text(face = "bold")
  )

print(p_anthro)

## Biophonic energy time-series plot
p_bio <- ggplot(plot_data, aes(x = Time, y = Bio_Energy, color = Site)) +
  geom_line(size = 0.8) +
  facet_wrap(~Site, ncol = 1, scales = "free_x") +
  scale_x_datetime(
    date_labels = "%d-%b %H:%M",
    date_breaks = "2 hour",
    expand = expansion(mult = c(0.01, 0.01))
  ) +
  labs(
    title = "Biophonic Energy Over Time for Case Study Sites",
    x = "Date-Time",
    y = "Biophonic Energy (arbitrary units)"
  ) +
  theme_minimal() +
  theme(
    legend.position = "none",
    plot.title = element_text(face = "bold", size = 14),
    axis.text.x = element_text(
      angle = 45,
      hjust = 1,
      vjust = 1,
      size = 6,
      margin = margin(t = 5)
    ),
    strip.text = element_text(face = "bold")
  )

print(p_bio)

## NDSI quadrant table (percentage of time per site in each quadrant)
ndsi_quadrant_table <- plot_data %>%
  mutate(
    NDSI_Quadrant = case_when(
      NDSI >= 0.5  & NDSI <= 1   ~ "[0.5, 1]",
      NDSI >  0    & NDSI <  0.5 ~ "(0, 0.5)",
      NDSI >= -0.5 & NDSI <= 0   ~ "[-0.5, 0]",
      NDSI >= -1   & NDSI < -0.5 ~ "[-1, -0.5)",
      TRUE ~ NA_character_
    )
  ) %>%
  filter(!is.na(NDSI_Quadrant)) %>%
  group_by(Site, NDSI_Quadrant) %>%
  summarise(
    Segments = n(),
    .groups = "drop_last"
  ) %>%
  mutate(
    Total_Segments = sum(Segments),
    Percent_Time = 100 * Segments / Total_Segments
  ) %>%
  ungroup()

print(ndsi_quadrant_table)

beepr::beep(3)


Found 18496 files in San Antonio 38 (searching recursively, case-insensitive)
Found 15185 files in Ventanas 36 (searching recursively, case-insensitive)
Found 15164 files in Zapallar 34 (searching recursively, case-insensitive)
Found 15612 files in Las Cruces 26 (searching recursively, case-insensitive)
Found 0 files in Matanzas 32 (searching recursively, case-insensitive)
Error in (function (expr, envir = parent.frame(), enclos = if (is.list(envir) ||  : 
  No WAV files found in the provided directories and date range. Please verify the paths in `dirs_sites` and the date filters.


RInterpreterError: Failed to parse and evaluate line '\n#For running locally (Rod)\n#dirs_sites <- list(\n#  "San Antonio 28" = "/Users/rod/Library/CloudStorage/GoogleDrive-royanedel@marfutura.org/Unidades compartidas/MAR FUTURA/Hydrophones/San Antonio/28",\n#  "Ventanas 39" = "/Users/rod/Library/CloudStorage/GoogleDrive-royanedel@marfutura.org/Unidades compartidas/MAR FUTURA/Hydrophones/Ventanas/20-10-25/39/Untitled",\n#  "Zapallar 32" = "/Users/rod/Library/CloudStorage/GoogleDrive-royanedel@marfutura.org/Unidades compartidas/MAR FUTURA/Hydrophones/Zapallar/20-10-25/32",\n#  "Las Cruces 41" = "/Users/rod/Library/CloudStorage/GoogleDrive-royanedel@marfutura.org/Unidades compartidas/MAR FUTURA/Hydrophones/LasCruces/20-10-25/41"\n#)\n#output_dir <- "/Users/rod/Library/CloudStorage/GoogleDrive-royanedel@marfutura.org/Unidades compartidas/MAR FUTURA/Hydrophones/NDSIResults"\n\n#For running in Colab\ndirs_sites <- list(\n  "San Antonio 38" = "/content/drive/Shareddrives/MAR FUTURA/Hydrophones/San Antonio/12-11-25/38",\n  "Ventanas 36" = "/content/drive/Shareddrives/MAR FUTURA/Hydrophones/Ventanas/07-11-25/36",\n  "Zapallar 34" = "/content/drive/Shareddrives/MAR FUTURA/Hydrophones/Zapallar/07-11-25/34",\n  "Las Cruces 26" = "/content/drive/Shareddrives/MAR FUTURA/Hydrophones/LasCruces/12-11-25/26",\n  "Matanzas 32" = "/content/drive/Shareddrives/MAR FUTURA/Hydrophones/LasCruces/13-11-25/32"\n)\noutput_dir <- "/content/drive/Shareddrives/MAR FUTURA/Hydrophones/NDSIResults"\n\nsegment_sec <- 60\nanthro_band <- c(1000, 2000)\nbio_band    <- c(2000, 3000) \ntz <- "UTC"\nanalysis_duration <- NA\nfiles_per_folder <- NA\nfiles_per_hour <- NA\noutput_csv <- "ndsi_casestudies_resultsD2.csv"\n\nstart_date <- as.POSIXct("2025-10-21 00:00:00", tz = tz)\nend_date   <- as.POSIXct("2025-11-05 23:59:59", tz = tz)\n\nworkers <- max(1, parallel::detectCores() - 1)\nfuture::plan(future::multisession, workers = workers)\n\nextract_datetime <- function(filename) {\n  dt_str <- sub("^(?:ST_\\\\d+_)?(\\\\d{8}_\\\\d{6})\\\\.WAV$", "\\\\1", basename(filename), ignore.case = TRUE)\n  as.POSIXct(dt_str, format = "%Y%m%d_%H%M%S", tz = tz)\n}\n\n# Cache FFT bin indices across segments when segment length + sample rate stay constant\n.ndsi_cache <- new.env(parent = emptyenv())\n\ncalculate_ndsi <- function(wave_obj) {\n  samples <- wave_obj@left\n  sample_rate <- wave_obj@samp.rate\n  n <- length(samples)\n\n  cache_key <- paste(n, sample_rate, anthro_band[1], anthro_band[2], bio_band[1], bio_band[2], sep = "|")\n  cached <- .ndsi_cache[[cache_key]]\n\n  if (is.null(cached)) {\n    freqs_hz <- (0:(n/2 - 1)) * (sample_rate / as.double(n))\n    anthro_indices <- which(freqs_hz >= anthro_band[1] & freqs_hz <= anthro_band[2])\n    bio_indices <- which(freqs_hz >= bio_band[1] & freqs_hz <= bio_band[2])\n    cached <- list(anthro_indices = anthro_indices, bio_indices = bio_indices)\n    .ndsi_cache[[cache_key]] <- cached\n  }\n\n  fft_result <- fft(samples)\n  power_spectrum <- Mod(fft_result[1:(n/2)])^2 / n^2\n\n  anthro_energy <- sum(power_spectrum[cached$anthro_indices])\n  bio_energy <- sum(power_spectrum[cached$bio_indices])\n\n  denom <- bio_energy + anthro_energy\n  ndsi_val <- if (!is.finite(denom) || denom == 0) {\n    NA_real_\n  } else {\n    (bio_energy - anthro_energy) / denom\n  }\n\n  list(ndsi = ndsi_val, anthro_energy = anthro_energy, bio_energy = bio_energy)\n}\n\n.evenly_spaced_indices <- function(n, k) {\n  if (k <= 0) return(integer(0))\n  if (n <= k) return(seq_len(n))\n  idx <- round((seq_len(k) * (n + 1)) / (k + 1))\n  idx <- pmax(1L, pmin(n, idx))\n  sort(unique(idx))\n}\n\n.fmt_secs <- function(secs) {\n  if (!is.finite(secs) || is.na(secs) || secs < 0) return("NA")\n  secs <- as.integer(round(secs))\n  h <- secs %/% 3600\n  m <- (secs %% 3600) %/% 60\n  s <- secs %% 60\n  if (h > 0) return(sprintf("%dh%02dm%02ds", h, m, s))\n  if (m > 0) return(sprintf("%dm%02ds", m, s))\n  sprintf("%ds", s)\n}\n\nprocess_site <- function(directory, label) {\n  files <- list.files(directory, pattern = "\\\\.wav$", full.names = TRUE, recursive = TRUE, ignore.case = TRUE)\n  message("Found ", length(files), " files in ", label, " (searching recursively, case-insensitive)")\n\n  if (length(files) == 0) {\n    return(tibble(\n      Site = character(),\n      Time = as.POSIXct(character()),\n      NDSI = numeric(),\n      Anthro_Energy = numeric(),\n      Bio_Energy = numeric()\n    ))\n  }\n\n  files <- sort(files)\n\n  if (!is.na(files_per_hour) && (is.na(files_per_hour) || files_per_hour <= 0)) {\n    stop("`files_per_hour` must be NA or a positive number.")\n  }\n\n  file_dt <- purrr::map_df(files, ~tibble(\n    filepath = .x,\n    start_dt = extract_datetime(.x)\n  ))\n\n  if (nrow(file_dt) > 0) {\n    if (!is.na(start_date)) {\n      file_dt <- dplyr::filter(file_dt, start_dt >= (start_date - segment_sec))\n    }\n    if (!is.na(end_date)) {\n      file_dt <- dplyr::filter(file_dt, start_dt <= end_date)\n    }\n  }\n\n  file_dt <- dplyr::arrange(file_dt, start_dt)\n\n  if (!is.na(files_per_hour)) {\n    k <- as.integer(files_per_hour)\n    file_dt <- file_dt %>%\n      mutate(.hour = lubridate::floor_date(start_dt, unit = "hour")) %>%\n      group_by(.hour) %>%\n      arrange(start_dt, .by_group = TRUE) %>%\n      group_modify(function(.x, .g) {\n        idx <- .evenly_spaced_indices(nrow(.x), k)\n        .x[idx, , drop = FALSE]\n      }) %>%\n      ungroup() %>%\n      select(-.hour)\n  }\n\n  files <- file_dt$filepath\n\n  if (!is.na(files_per_folder)) {\n    files <- head(files, files_per_folder)\n  }\n\n  if (length(files) == 0) {\n    return(tibble(\n      Site = character(),\n      Time = as.POSIXct(character()),\n      NDSI = numeric(),\n      Anthro_Energy = numeric(),\n      Bio_Energy = numeric()\n    ))\n  }\n\n  pb <- utils::txtProgressBar(min = 0, max = length(files), style = 3)\n  on.exit({\n    try(close(pb), silent = TRUE)\n  }, add = TRUE)\n\n  t0 <- Sys.time()\n  last_print <- Sys.time()\n\n  results <- vector("list", length(files))\n  out_k <- 0L\n\n  for (i in seq_along(files)) {\n    fp <- files[[i]]\n    utils::setTxtProgressBar(pb, i)\n\n    now <- Sys.time()\n    if (i == 1L || i == length(files) || as.numeric(difftime(now, last_print, units = "secs")) >= 2) {\n      elapsed <- as.numeric(difftime(now, t0, units = "secs"))\n      avg_sec <- elapsed / i\n      remaining <- avg_sec * (length(files) - i)\n      pct <- 100 * i / length(files)\n      eta <- now + remaining\n      cat(sprintf(\n        "\\r%s | %5.1f%% | %d/%d | avg %s/file | ETA %s | ~%s remaining",\n        label,\n        pct,\n        i,\n        length(files),\n        .fmt_secs(avg_sec),\n        format(eta, "%H:%M:%S"),\n        .fmt_secs(remaining)\n      ))\n      flush.console()\n      last_print <- now\n    }\n\n    start_dt <- extract_datetime(fp)\n\n    wav <- tryCatch(readWave(fp), error = function(e) {\n      warning("Skipping unreadable file: ", fp)\n      return(NULL)\n    })\n    if (is.null(wav)) next\n\n    dur_sec <- length(wav@left) / wav@samp.rate\n    seg_len <- min(segment_sec, dur_sec)\n    if (!is.finite(seg_len) || seg_len <= 0) next\n\n    max_start <- max(0, dur_sec - seg_len)\n    starts <- seq(0, max_start, by = seg_len)\n\n    for (st in starts) {\n      segment_time <- start_dt + st\n\n      if (!is.na(start_date) && segment_time < start_date) {\n        next\n      }\n      if (!is.na(end_date) && segment_time > end_date) {\n        break\n      }\n\n      seg <- tryCatch(\n        extractWave(wav, from = st, to = st + seg_len, xunit = "time"),\n        error = function(e) return(NULL)\n      )\n      if (is.null(seg)) next\n\n      ndsi_res <- calculate_ndsi(seg)\n\n      out_k <- out_k + 1L\n      results[[out_k]] <- tibble(\n        Site = label,\n        Time = segment_time,\n        NDSI = ndsi_res$ndsi,\n        Anthro_Energy = ndsi_res$anthro_energy,\n        Bio_Energy = ndsi_res$bio_energy\n      )\n    }\n  }\n\n  cat("\\n")\n\n  if (out_k == 0L) {\n    return(tibble(\n      Site = character(),\n      Time = as.POSIXct(character()),\n      NDSI = numeric(),\n      Anthro_Energy = numeric(),\n      Bio_Energy = numeric()\n    ))\n  }\n\n  bind_rows(results[seq_len(out_k)])\n}\n\nanthro_tag <- paste0("anthro_", anthro_band[1], "-", anthro_band[2])\nbio_tag <- paste0("bio_", bio_band[1], "-", bio_band[2])\n\ndate_tag <- if (!is.na(start_date) && !is.na(end_date)) {\n  paste0(format(start_date, "%Y%m%d"), "-", format(end_date, "%Y%m%d"))\n} else {\n  "alltime"\n}\n\ndir.create(output_dir, recursive = TRUE, showWarnings = FALSE)\n\nall_results <- bind_rows(\n  lapply(names(dirs_sites), function(label) {\n    tryCatch(\n      process_site(dirs_sites[[label]], label),\n      error = function(e) {\n        warning("Site failed: ", label, " - ", conditionMessage(e))\n        return(tibble(\n          Site = character(),\n          Time = as.POSIXct(character()),\n          NDSI = numeric(),\n          Anthro_Energy = numeric(),\n          Bio_Energy = numeric()\n        ))\n      }\n    )\n  })\n)\n\nif (!exists("all_results") || nrow(all_results) == 0) {\n  stop("No WAV files found in the provided directories and date range. Please verify the paths in `dirs_sites` and the date filters.")\n}\n\noutput_csv_all <- file.path(output_dir, paste0(\n  "ndsi_casestudies_", date_tag, "_", anthro_tag, "_", bio_tag, ".csv"\n))\nwrite.csv(all_results, output_csv_all, row.names = FALSE)\nmessage("Saved to: ", output_csv_all)\n\nsummary_stats <- all_results %>%\n  group_by(Site) %>%\n  summarize(\n    Segments  = n(),\n    Mean_NDSI = mean(NDSI, na.rm = TRUE),\n    SD_NDSI   = sd(NDSI, na.rm = TRUE),\n    Mean_Anthro_Energy = mean(Anthro_Energy, na.rm = TRUE),\n    SD_Anthro_Energy = sd(Anthro_Energy, na.rm = TRUE),\n    Mean_Bio_Energy = mean(Bio_Energy, na.rm = TRUE),\n    SD_Bio_Energy = sd(Bio_Energy, na.rm = TRUE)\n  )\nprint(summary_stats)\n\nplot_data <- all_results\n\nplot_data$Site <- factor(plot_data$Site, levels = names(dirs_sites))\n\n## NDSI time-series plot\np_ndsi <- ggplot(plot_data, aes(x = Time, y = NDSI, color = Site)) +\n  geom_line(size = 0.8) +\n  facet_wrap(~Site, ncol = 1, scales = "free_x") +\n  scale_x_datetime(\n    date_labels = "%d-%b %H:%M",\n    date_breaks = "2 hour",\n    expand = expansion(mult = c(0.01, 0.01))\n  ) +\n  labs(\n    title = "NDSI Over Time for Case Study Sites",\n    x = "Date-Time",\n    y = "NDSI"\n  ) +\n  theme_minimal() +\n  theme(\n    legend.position = "none",\n    plot.title = element_text(face = "bold", size = 14),\n    axis.text.x = element_text(\n      angle = 45,\n      hjust = 1,\n      vjust = 1,\n      size = 6,\n      margin = margin(t = 5)\n    ),\n    strip.text = element_text(face = "bold")\n  )\n\nprint(p_ndsi)\n\n## Anthropogenic energy time-series plot\np_anthro <- ggplot(plot_data, aes(x = Time, y = Anthro_Energy, color = Site)) +\n  geom_line(size = 0.8) +\n  facet_wrap(~Site, ncol = 1, scales = "free_x") +\n  scale_x_datetime(\n    date_labels = "%d-%b %H:%M",\n    date_breaks = "2 hour",\n    expand = expansion(mult = c(0.01, 0.01))\n  ) +\n  labs(\n    title = "Anthropogenic Energy Over Time for Case Study Sites",\n    x = "Date-Time",\n    y = "Anthropogenic Energy (arbitrary units)"\n  ) +\n  theme_minimal() +\n  theme(\n    legend.position = "none",\n    plot.title = element_text(face = "bold", size = 14),\n    axis.text.x = element_text(\n      angle = 45,\n      hjust = 1,\n      vjust = 1,\n      size = 6,\n      margin = margin(t = 5)\n    ),\n    strip.text = element_text(face = "bold")\n  )\n\nprint(p_anthro)\n\n## Biophonic energy time-series plot\np_bio <- ggplot(plot_data, aes(x = Time, y = Bio_Energy, color = Site)) +\n  geom_line(size = 0.8) +\n  facet_wrap(~Site, ncol = 1, scales = "free_x") +\n  scale_x_datetime(\n    date_labels = "%d-%b %H:%M",\n    date_breaks = "2 hour",\n    expand = expansion(mult = c(0.01, 0.01))\n  ) +\n  labs(\n    title = "Biophonic Energy Over Time for Case Study Sites",\n    x = "Date-Time",\n    y = "Biophonic Energy (arbitrary units)"\n  ) +\n  theme_minimal() +\n  theme(\n    legend.position = "none",\n    plot.title = element_text(face = "bold", size = 14),\n    axis.text.x = element_text(\n      angle = 45,\n      hjust = 1,\n      vjust = 1,\n      size = 6,\n      margin = margin(t = 5)\n    ),\n    strip.text = element_text(face = "bold")\n  )\n\nprint(p_bio)\n\n## NDSI quadrant table (percentage of time per site in each quadrant)\nndsi_quadrant_table <- plot_data %>%\n  mutate(\n    NDSI_Quadrant = case_when(\n      NDSI >= 0.5  & NDSI <= 1   ~ "[0.5, 1]",\n      NDSI >  0    & NDSI <  0.5 ~ "(0, 0.5)",\n      NDSI >= -0.5 & NDSI <= 0   ~ "[-0.5, 0]",\n      NDSI >= -1   & NDSI < -0.5 ~ "[-1, -0.5)",\n      TRUE ~ NA_character_\n    )\n  ) %>%\n  filter(!is.na(NDSI_Quadrant)) %>%\n  group_by(Site, NDSI_Quadrant) %>%\n  summarise(\n    Segments = n(),\n    .groups = "drop_last"\n  ) %>%\n  mutate(\n    Total_Segments = sum(Segments),\n    Percent_Time = 100 * Segments / Total_Segments\n  ) %>%\n  ungroup()\n\nprint(ndsi_quadrant_table)\n\nbeepr::beep(3)\n'.
R error message: 'Error in (function (expr, envir = parent.frame(), enclos = if (is.list(envir) ||  : \n  No WAV files found in the provided directories and date range. Please verify the paths in `dirs_sites` and the date filters.'
R stdout:
Found 18496 files in San Antonio 38 (searching recursively, case-insensitive)
Found 15185 files in Ventanas 36 (searching recursively, case-insensitive)
Found 15164 files in Zapallar 34 (searching recursively, case-insensitive)
Found 15612 files in Las Cruces 26 (searching recursively, case-insensitive)
Found 0 files in Matanzas 32 (searching recursively, case-insensitive)
Error in (function (expr, envir = parent.frame(), enclos = if (is.list(envir) ||  : 
  No WAV files found in the provided directories and date range. Please verify the paths in `dirs_sites` and the date filters.