In [13]:
# Import required Julia packages for oceanographic data analysis
using NCDatasets      # For reading and writing NetCDF files
using PhysOcean       # Physical oceanography utilities
using DataStructures  # For ordered dictionaries and other data structures
using DIVAnd          # Data-Interpolating Variational Analysis in n-dimensions
using PyPlot          # Plotting library (matplotlib wrapper)
using Dates           # Date and time handling
using Statistics      # Statistical functions (mean, etc.)
using Random          # Random number generation
using Printf          # String formatting with printf-style syntax


In [14]:
datafile = "data.nc"

"data.nc"

In [15]:
# Examine the NetCDF file structure to see what variables are available
using NCDatasets
ds = NCDataset(datafile, "r")
println("Available variables in the NetCDF file:")
for (varname, var) in ds
    println("  Variable: $varname")
    if haskey(var.attrib, "long_name")
        println("    long_name: $(var.attrib["long_name"])")
    end
    if haskey(var.attrib, "standard_name")
        println("    standard_name: $(var.attrib["standard_name"])")
    end
    if haskey(var.attrib, "units")
        println("    units: $(var.attrib["units"])")
    end
    println()
end
close(ds)

Available variables in the NetCDF file:
  Variable: cruise_id
    long_name: Cruise
    units: 

  Variable: station_id
    long_name: Station
    units: 

  Variable: station_type
    long_name: Type
    units: 

  Variable: longitude
    long_name: Longitude
    standard_name: longitude
    units: degrees_east

  Variable: latitude
    long_name: Latitude
    standard_name: latitude
    units: degrees_north

  Variable: LOCAL_CDI_ID
    long_name: LOCAL_CDI_ID
    units: 

  Variable: EDMO_code
    long_name: EDMO_code
    units: 

  Variable: Bot_Depth
    long_name: Bot. Depth
    units: m

  Variable: Instrument_Info
    long_name: Instrument Info
    units: 

  Variable: Codes_in_Originator_File
    long_name: Codes in Originator File
    units: 

  Variable: P35_Contributor_Codes
    long_name: P35 Contributor Codes
    units: 

  Variable: References
    long_name: References
    units: 

  Variable: Comments
    long_name: Comments
    units: 

  Variable: Data_set_name
    lo

closed Dataset

In [16]:
# Define spatial grid parameters for the Mediterranean Sea analysis
dx, dy = 0.125, 0.125  # Grid resolution in degrees (longitude, latitude)

# Extended Mediterranean Sea boundaries:
# Longitude: from -6°W (Gibraltar) to 36°E (Eastern Mediterranean/Cyprus)
# Latitude: from 30°N (North Africa) to 46°N (Northern Italy/France)
lonr = -6:dx:36        # Longitude range covering entire Mediterranean
latr = 30:dy:46        # Latitude range covering entire Mediterranean

timerange = [Date(2003,06,06),Date(2012,01,01)];  # Time period for analysis

println("Mediterranean Sea grid:")
println("  Longitude: $(minimum(lonr))° to $(maximum(lonr))°E ($(length(lonr)) points)")
println("  Latitude: $(minimum(latr))° to $(maximum(latr))°N ($(length(latr)) points)")
println("  Total grid points: $(length(lonr)) × $(length(latr)) = $(length(lonr) * length(latr))")

Mediterranean Sea grid:
  Longitude: -6.0° to 36.0°E (337 points)
  Latitude: 30.0° to 46.0°N (129 points)
  Total grid points: 337 × 129 = 43473


In [17]:
# Define depth levels for 3D analysis (in meters)
# Full depth range commented out, using simplified 3-level version
depthr = [0.,5., 10., 15., 20., 25., 30., 40., 50., 66, 
    75, 85, 100, 112, 125, 135, 150, 175, 200, 225, 250, 
    275, 300, 350, 400, 450, 500, 550, 600, 650, 700, 750, 
    800, 850, 900, 950, 1000, 1050, 1100, 1150, 1200, 1250, 
    1300, 1350, 1400, 1450, 1500, 1600, 1750, 1850, 2000];
depthr = [0.,10.,20.];  # Simplified to 3 depth levels: surface, 10m, 20m

# Define analysis parameters
varname = "Water body chlorophyll-a"    # Variable being analyzed (using correct long_name)
yearlist = [2003:2012]; # Years to include in analysis
monthlist = [[1,2,3],[4,5,6],[7,8,9],[10,11,12]]; # Seasonal groupings (quarters)

# Create time selector for seasonal analysis
TS = DIVAnd.TimeSelectorYearListMonthList(yearlist,monthlist);
@show TS;

TS = TimeSelectorYearListMonthList{Vector{UnitRange{Int64}}, Vector{Vector{Int64}}}(UnitRange{Int64}[2003:2012], [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]])


In [18]:
# Then load from full dataset (overwrites the small dataset variables)
# Use the correct long_name attribute: "Water body chlorophyll-a" (with hyphen)
@time obsval,obslon,obslat,obsdepth,obstime,obsid = NCODV.load(Float64, datafile, 
    "Water body chlorophyll-a");

# ========================================================================
# PLOTTING OBSERVATIONAL DATA DISTRIBUTION
# ========================================================================

# Create a figure showing the geographic distribution of observation points
figure("Mediterranean-Data-Distribution", figsize=(12,8))
ax = subplot(1,1,1)
plot(obslon, obslat, "ko", markersize=0.5)  # Plot observation locations as small black dots
aspectratio = 1/cos(mean(latr) * pi/180)   # Calculate proper aspect ratio for latitude
ax.tick_params("both",labelsize=8)
gca().set_aspect(aspectratio)
xlim(-6, 36)  # Mediterranean longitude range
ylim(30, 46)  # Mediterranean latitude range
xlabel("Longitude (°E)", fontsize=10)
ylabel("Latitude (°N)", fontsize=10)
title("Chlorophyll-a Observation Distribution - Mediterranean Sea", fontsize=12)
grid(true, alpha=0.3)

# Print data statistics
println("Mediterranean Chlorophyll-a Dataset:")
println("  Total observations: $(length(obsval))")
println("  Longitude range: $(round(minimum(obslon), digits=2))° to $(round(maximum(obslon), digits=2))°")
println("  Latitude range: $(round(minimum(obslat), digits=2))° to $(round(maximum(obslat), digits=2))°")
println("  Depth range: $(round(minimum(obsdepth), digits=1))m to $(round(maximum(obsdepth), digits=1))m")
println("  Chlorophyll range: $(round(minimum(obsval), digits=3)) to $(round(maximum(obsval), digits=3)) mg/m³")

# Check quality and consistency of observations
checkobs((obslon,obslat,obsdepth,obstime),obsval,obsid)

6330 out of 30189 - 20.967902216038954 %
12710 out of 30189 - 42.101427672330985 %

19230 out of 30189 - 63.69869820133161 %
19230 out of 30189 - 63.69869820133161 %
25870 out of 30189 - 85.69346450693962 %
  9.501269 seconds (1.18 M allocations: 55.137 MiB)
Mediterranean Chlorophyll-a Dataset:
  Total observations: 30839
  Longitude range: 3.22° to 19.2°
  Latitude range: 39.11° to 45.77°
  Depth range: 0.0m to 100.0m
  Chlorophyll range: 0.0 to 147.0 mg/m³
              minimum and maximum of obs. dimension 1: (3.2175331115722656, 19.19866943359375)
              minimum and maximum of obs. dimension 2: (39.10667037963867, 45.77027893066406)
              minimum and maximum of obs. dimension 3: (0.0, 100.0)
              minimum and maximum of obs. dimension 4: (DateTime("2003-01-07T12:07:21"), DateTime("2012-12-28T08:04:25"))
                          minimum and maximum of data: (9.999999747378752e-5, 147.0)
25870 out of 30189 - 85.69346450693962 %
  9.501269 seconds (1.18 M alloc

┌ Info: Checking ranges for dimensions and observations
└ @ DIVAnd C:\Users\nholodkov\.julia\packages\DIVAnd\4UymR\src\obsstat.jl:77


In [19]:
# Download bathymetry data (seafloor depth) for the Mediterranean Sea region
bathname = "gebco_30sec_8.nc"
#if !isfile(bathname)
#    download("https://dox.ulg.ac.be/index.php/s/U0pqyXhcQrXjEUX/download",bathname)
#else
#    @info("Bathymetry file already downloaded")
#end

# Load bathymetry data and interpolate to our Mediterranean grid
@time bx,by,b = load_bath(bathname,true,lonr,latr);

# Plot the bathymetry data
figure("Mediterranean-Bathymetry")
ax = subplot(1,1,1)
pcolor(bx, by, permutedims(b, [2,1]));  # Create colored map of bathymetry
colorbar(orientation="vertical", shrink=0.8).ax.tick_params(labelsize=8)
contour(bx, by, permutedims(b, [2,1]), [0, 0.1], colors="k", linewidths=.5)  # Add coastline contour
gca().set_aspect(aspectratio)
ax.tick_params("both",labelsize=6)
title("Mediterranean Sea Bathymetry")

# ========================================================================
# MASK CREATION AND EDITING FOR ANALYSIS DOMAIN
# ========================================================================

# Create a 3D mask for the analysis domain
# This mask determines which grid points are valid for analysis (water vs land)
mask = falses(size(b,1),size(b,2),length(depthr))
for k = 1:length(depthr)
    for j = 1:size(b,2)
        for i = 1:size(b,1)
            mask[i,j,k] = b[i,j] >= depthr[k]  # True where water depth >= analysis depth
        end
    end
end
@show size(mask)

# Plot the initial mask (surface level)
figure("Mediterranean-Mask")
ax = subplot(1,1,1)
gca().set_aspect(aspectratio)
ax.tick_params("both",labelsize=6)
pcolor(bx,by, transpose(mask[:,:,1])); 
title("Mediterranean Sea Analysis Mask")

# Create coordinate grids for mask editing
grid_bx = [i for i in bx, j in by];
grid_by = [j for i in bx, j in by];

# Edit the mask to remove specific problematic regions for Mediterranean analysis
mask_edit = copy(mask);

# Remove Atlantic Ocean areas beyond Gibraltar (west of -5.5°W)
sel_mask_atlantic = (grid_bx .<= -5.5);

# Remove areas that are too shallow (less than 10m depth) to avoid coastal issues
# This helps with numerical stability in very shallow regions
sel_mask_shallow = [b[i,j] < 10.0 && b[i,j] >= 0.0 for i in 1:size(b,1), j in 1:size(b,2)];

# Apply masks: remove Atlantic and very shallow areas
for k = 1:length(depthr)
    mask_edit[:,:,k] = mask_edit[:,:,k] .* .!sel_mask_atlantic .* .!sel_mask_shallow;
end

@show size(mask_edit)
println("Mask editing completed for Mediterranean Sea")

# Plot the edited mask
figure("Mediterranean-Mask-Edited")
ax = subplot(1,1,1)
ax.tick_params("both",labelsize=6)
pcolor(bx, by, transpose(mask_edit[:,:,1])); 
gca().set_aspect(aspectratio)
title("Mediterranean Sea Edited Analysis Mask")

  0.008373 seconds (650 allocations: 2.130 MiB)
size(mask) = (337, 129, 3)
size(mask) = (337, 129, 3)
size(mask_edit) = (337, 129, 3)
Mask editing completed for Mediterranean Sea
size(mask_edit) = (337, 129, 3)
Mask editing completed for Mediterranean Sea


PyObject Text(0.5, 1.0, 'Mediterranean Sea Edited Analysis Mask')

In [20]:
# ========================================================================
# DATA FILTERING AND QUALITY CONTROL
# ========================================================================

# Filter observational data to keep only realistic salinity values
# sel = (obsval .<= 40) .& (obsval .>= 25);  # Typical Adriatic Sea salinity range

# Apply the filter to all observation arrays
# obsval = obsval[sel]
# obslon = obslon[sel]
# obslat = obslat[sel]
# obsdepth = obsdepth[sel]
# obstime = obstime[sel]
# obsid = obsid[sel];

# No filtering applied - keeping all data points
println("Total observations: ", length(obsval))
println("Data range: ", minimum(obsval), " to ", maximum(obsval))

Total observations: 30839
Data range: 9.999999747378752e-5 to 147.0


In [21]:
# ========================================================================
# DIVAND ANALYSIS PARAMETERS SETUP
# ========================================================================

# Optional: Calculate observation weights based on data density
# Uncommented code would create spatially varying error estimates
#@time rdiag=1.0./DIVAnd.weight_RtimesOne((obslon,obslat),(0.03,0.03));
#@show maximum(rdiag),mean(rdiag)

# Define grid dimensions for parameter arrays
sz = (length(lonr),length(latr),length(depthr));

# Set correlation lengths (influence radius) for each dimension
# For Mediterranean-scale analysis, use larger correlation lengths
lenx = fill(150_000.,sz)   # 150 km correlation length in longitude direction (increased for Med scale)
leny = fill(150_000.,sz)   # 150 km correlation length in latitude direction (increased for Med scale)
lenz = fill(25.,sz);       # 25 m correlation length in depth direction (keep same)
len = (lenx, leny, lenz);  # Combine into tuple for DIVAnd

# Set noise-to-signal ratio (regularization parameter)
epsilon2 = 0.1;            # Controls smoothness vs data fidelity tradeoff
#epsilon2 = epsilon2 * rdiag;  # Optional: spatially varying epsilon

println("Correlation lengths set for Mediterranean analysis:")
println("  Horizontal: 150 km")
println("  Vertical: 25 m")
println("  Grid size: $(sz)")

Correlation lengths set for Mediterranean analysis:
  Horizontal: 150 km
  Vertical: 25 m
  Grid size: (337, 129, 3)


In [22]:
# ========================================================================
# OUTPUT FILE SETUP AND METADATA CONFIGURATION
# ========================================================================

# Set up output directory and filename
outputdir = "./"
if !isdir(outputdir)
    mkpath(outputdir)
end
filename = joinpath(outputdir, "Water_body_$(replace(varname," "=>"_"))_Mediterranean.4Danl.nc")

# Define comprehensive metadata for NetCDF file following SeaDataNet standards
metadata = OrderedDict(
    # Name of the project (SeaDataCloud, SeaDataNet, EMODNET-chemistry, ...)
    "project" => "SeaDataCloud",

    # URN code for the institution EDMO registry,
    # e.g. SDN:EDMO::1579
    "institution_urn" => "SDN:EDMO::1579",

    # Production group
    #"production" => "Diva group",

    # Name and emails from authors
    "Author_e-mail" => ["Your Name1 <name1@example.com>", "Other Name <name2@example.com>"],

    # Source of the observation
    "source" => "observational data from SeaDataNet and World Ocean Atlas",

    # Additional comment
    "comment" => "Mediterranean Sea chlorophyll-a analysis using DIVAnd - Duplicate removal applied to the merged dataset",

    # SeaDataNet Vocabulary P35 URN for chlorophyll-a
    # http://seadatanet.maris2.nl/v_bodc_vocab_v2/search.asp?lib=p35
    "parameter_keyword_urn" => "SDN:P35::CPHLHPP1",

    # List of SeaDataNet Parameter Discovery Vocabulary P02 URNs for chlorophyll
    # http://seadatanet.maris2.nl/v_bodc_vocab_v2/search.asp?lib=p02
    "search_keywords_urn" => ["SDN:P02::CPHL"],

    # List of SeaDataNet Vocabulary C19 area URNs
    # SeaVoX salt and fresh water body gazetteer (C19) - Mediterranean Sea
    # http://seadatanet.maris2.nl/v_bodc_vocab_v2/search.asp?lib=C19
    "area_keywords_urn" => ["SDN:C19::3_3"],

    "product_version" => "1.0",
    
    "product_code" => "MED-CHLA-DIVAnd-V1.0",
    
    # bathymetry source acknowledgement
    "bathymetry_source" => "The GEBCO Digital Atlas published by the British Oceanographic Data Centre on behalf of IOC and IHO, 2003",

    # NetCDF CF standard name for chlorophyll-a
    # http://cfconventions.org/Data/cf-standard-names/current/build/cf-standard-name-table.html
    "netcdf_standard_name" => "mass_concentration_of_chlorophyll_a_in_sea_water",

    "netcdf_long_name" => "Mass concentration of chlorophyll-a in sea water",

    "netcdf_units" => "mg m-3",

    # Abstract for the product
    "abstract" => "Mediterranean Sea chlorophyll-a concentration fields derived from in-situ observations using DIVAnd interpolation method. Covers the period 2003-2012 with seasonal resolution and 3D spatial coverage.",

    # This option provides a place to acknowledge various types of support for the
    # project that produced the data
    "acknowledgement" => "This work was supported by the Blue Cloud project and SeaDataNet infrastructure.",

    "documentation" => "https://doi.org/doi_of_doc",

    # Digital Object Identifier of the data product
    "doi" => "...");

# Convert metadata to NetCDF-compatible attributes
ncglobalattrib, ncvarattrib = SDNMetadata(metadata, filename, varname, lonr, latr)

# Remove any existing analysis file to start fresh
if isfile(filename)
    rm(filename) # delete the previous analysis
    @info "Removing file $filename"
end

println("Output configured for Mediterranean Sea chlorophyll-a analysis:")
println("  Filename: $filename")
println("  Spatial domain: $(minimum(lonr))° to $(maximum(lonr))°E, $(minimum(latr))° to $(maximum(latr))°N")

HTTP.Exceptions.StatusError: HTTP.Exceptions.StatusError(404, "GET", "/collection/P35/current/CPHLHPP1/", HTTP.Messages.Response:
"""
HTTP/1.1 404 Not Found
Date: Tue, 08 Jul 2025 14:52:01 GMT
Server: Apache
X-Content-Type-Options: nosniff
X-XSS-Protection: 1; mode=block
Access-Control-Allow-Origin: *
Content-Length: 196
Content-Type: text/html; charset=iso-8859-1

<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
<html><head>
<title>404 Not Found</title>
</head><body>
<h1>Not Found</h1>
<p>The requested URL was not found on this server.</p>
</body></html>
""")

In [23]:
# ========================================================================
# PLOTTING FUNCTION DEFINITION
# ========================================================================

# Set up figure output directory
figdir = "./"

# Define a function to plot interpolation results for each time step
function plotres(timeindex,sel,fit,erri)
    tmp = copy(fit)                            # Copy the fitted data to avoid modifying original
    nx,ny,nz = size(tmp)                       # Get dimensions of the fitted data array
    
    for i in 1:nz                             # Loop through each depth level
        figure("Mediterranean-Chlorophyll-Analysis", figsize=(12,8))  # Larger figure for Mediterranean
        ax = subplot(1,1,1)                   # Create subplot
        ax.tick_params("both",labelsize=8)    # Set tick parameters
        ylim(30.0, 46.0);                     # Set latitude limits for Mediterranean
        xlim(-6.0, 36.0);                     # Set longitude limits for Mediterranean
        title("Mediterranean Chlorophyll-a - Depth: $(depthr[i])m, Time: $(timeindex)", fontsize=10)
        
        # Create colored plot of the interpolated chlorophyll field
        pcolor(lonr.-dx/2.,latr.-dy/2, permutedims(tmp[:,:,i], [2,1]);
               vmin = 0, vmax = 4)           # Set color scale limits for chlorophyll-a (mg/m³)
        colorbar(extend="both", orientation="vertical", shrink=0.8, label="Chlorophyll-a (mg/m³)").ax.tick_params(labelsize=8)

        # Add land mask as gray contour 
        contourf(bx,by,permutedims(b,[2,1]), levels = [-1e5,0],colors = [[.5,.5,.5]])
        
        # Add major geographic features
        xlabel("Longitude (°E)", fontsize=8)
        ylabel("Latitude (°N)", fontsize=8)
        
        aspectratio = 1/cos(mean(latr) * pi/180)  # Calculate proper aspect ratio
        gca().set_aspect(aspectratio)
        
        # Save the figure with formatted filename
        figname = "Mediterranean_" * replace(varname, " " => "_") * @sprintf("_%02d",i) * @sprintf("_%03d.png",timeindex)
        PyPlot.savefig(joinpath(figdir, figname), dpi=300, bbox_inches="tight");
        PyPlot.close_figs()                   # Close figure to free memory
    end
end

println("Plotting function configured for Mediterranean Sea visualization")


Plotting function configured for Mediterranean Sea visualization


In [24]:
# ========================================================================
# MAIN DIVAND ANALYSIS EXECUTION
# ========================================================================

# Execute the main DIVAnd 3D analysis
@time dbinfo = diva3d((lonr,latr,depthr,TS),        # Grid coordinates and time selector
    (obslon,obslat,obsdepth,obstime), obsval,        # Observation coordinates and values
    len, epsilon2,                                    # Correlation lengths and regularization
    filename,varname,                                 # Output file and variable name
    bathname=bathname,                               # Bathymetry file for land/sea mask
    #plotres = plotres,                               # Plotting function to call during analysis
    mask = mask_edit,                                # Edited mask for analysis domain
    fitcorrlen = false,                              # Don't fit correlation lengths automatically
    niter_e = 2,                                     # Number of iterations for error estimation
    ncvarattrib = ncvarattrib,                       # NetCDF variable attributes
    ncglobalattrib = ncglobalattrib,                 # NetCDF global attributes
    surfextend = true                                # Extend surface values to deeper levels if needed
    );

# Save observation metadata to the output file
DIVAnd.saveobs(filename,(obslon,obslat,obsdepth,obstime),obsid);

┌ Info: Creating netCDF file ./Water_body_Water_body_chlorophyll-a_Mediterranean.4Danl.nc
└ @ DIVAnd C:\Users\nholodkov\.julia\packages\DIVAnd\4UymR\src\diva.jl:383
┌ Info: Time step 1 / 4
└ @ DIVAnd C:\Users\nholodkov\.julia\packages\DIVAnd\4UymR\src\diva.jl:436
┌ Info: scaled correlation length (min,max) in dimension 1: (150000.0, 150000.0)
└ @ DIVAnd C:\Users\nholodkov\.julia\packages\DIVAnd\4UymR\src\diva.jl:621
┌ Info: scaled correlation length (min,max) in dimension 2: (150000.0, 150000.0)
└ @ DIVAnd C:\Users\nholodkov\.julia\packages\DIVAnd\4UymR\src\diva.jl:621
┌ Info: scaled correlation length (min,max) in dimension 3: (25.0, 25.0)
└ @ DIVAnd C:\Users\nholodkov\.julia\packages\DIVAnd\4UymR\src\diva.jl:621
┌ Info: number of windows: 2
└ @ DIVAnd C:\Users\nholodkov\.julia\packages\DIVAnd\4UymR\src\DIVAndgo.jl:110
┌ Info: number of windows: 2
└ @ DIVAnd C:\Users\nholodkov\.julia\packages\DIVAnd\4UymR\src\DIVAndgo.jl:110
┌ Info: number of windows: 2
└ @ DIVAnd C:\Users\nholodkov\.

 41.185127 seconds (5.32 M allocations: 54.678 GiB, 27.28% gc time, 6.17% compilation time)
