In [29]:
# Import required Julia packages for oceanographic data analysis
using NCDatasets      # For reading and writing NetCDF files
using PhysOcean       # Physical oceanography utilities
using DataStructures  # For ordered dictionaries and other data structures
using DIVAnd          # Data-Interpolating Variational Analysis in n-dimensions
using PyPlot          # Plotting library (matplotlib wrapper)
using Dates           # Date and time handling
using Statistics      # Statistical functions (mean, etc.)
using Random          # Random number generation
using Printf          # String formatting with printf-style syntax


In [30]:
datafile = "data.nc"

"data.nc"

In [31]:
# Examine the NetCDF file structure to see what variables are available
using NCDatasets
ds = NCDataset(datafile, "r")
println("Available variables in the NetCDF file:")
for (varname, var) in ds
    println("  Variable: $varname")
    if haskey(var.attrib, "long_name")
        println("    long_name: $(var.attrib["long_name"])")
    end
    if haskey(var.attrib, "standard_name")
        println("    standard_name: $(var.attrib["standard_name"])")
    end
    if haskey(var.attrib, "units")
        println("    units: $(var.attrib["units"])")
    end
    println()
end
close(ds)

Available variables in the NetCDF file:
  Variable: cruise_id
    long_name: Cruise
    units: 

  Variable: station_id
    long_name: Station
    units: 

  Variable: station_type
    long_name: Type
    units: 

  Variable: longitude
    long_name: Longitude
    standard_name: longitude
    units: degrees_east

  Variable: latitude
    long_name: Latitude
    standard_name: latitude
    units: degrees_north

  Variable: LOCAL_CDI_ID
    long_name: LOCAL_CDI_ID
    units: 

  Variable: EDMO_code
    long_name: EDMO_code
    units: 

  Variable: Bot_Depth
    long_name: Bot. Depth
    units: m

  Variable: Instrument_Info
    long_name: Instrument Info
    units: 

  Variable: Codes_in_Originator_File
    long_name: Codes in Originator File
    units: 

  Variable: P35_Contributor_Codes
    long_name: P35 Contributor Codes
    units: 

  Variable: References
    long_name: References
    units: 

  Variable: Comments
    long_name: Comments
    units: 

  Variable: Data_set_name
    lo

closed Dataset

In [32]:
# Define spatial grid parameters for the Mediterranean Sea analysis
dx, dy = 0.125, 0.125  # Grid resolution in degrees (longitude, latitude)
lonr = -6:dx:37        # Longitude range from -6° to 37° E covering entire Mediterranean
latr = 30:dy:46        # Latitude range from 30° to 46° N covering entire Mediterranean
timerange = [Date(2003,06,06),Date(2012,01,01)];  # Time period for analysis

In [33]:
# Define depth levels for chlorophyll-a 3D analysis (in meters)
# Chlorophyll-a is primarily found in the euphotic zone (0-150m depth)
# Full depth range commented out, focusing on biologically relevant depths
depthr = [0.,5., 10., 15., 20., 25., 30., 40., 50., 66, 
    75, 85, 100, 112, 125, 135, 150, 175, 200, 225, 250, 
    275, 300, 350, 400, 450, 500, 550, 600, 650, 700, 750, 
    800, 850, 900, 950, 1000, 1050, 1100, 1150, 1200, 1250, 
    1300, 1350, 1400, 1450, 1500, 1600, 1750, 1850, 2000];

# Reduced depth levels for faster computation - key chlorophyll-a depths only
depthr = [0., 10., 25., 50., 100., 150.];  # 6 key depths covering main chlorophyll features

# Define analysis parameters
varname = "Water body total phosphorus"    # Variable being analyzed (using correct long_name)
yearlist = [2003:2012]; # Years to include in analysis
monthlist = [[1,2,3],[4,5,6],[7,8,9],[10,11,12]]; # Seasonal groupings (quarters)

# Create time selector for seasonal analysis
TS = DIVAnd.TimeSelectorYearListMonthList(yearlist,monthlist);
@show TS;

TS = TimeSelectorYearListMonthList{Vector{UnitRange{Int64}}, Vector{Vector{Int64}}}(UnitRange{Int64}[2003:2012], [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]])


In [41]:
# Load data from the original file and apply QC filtering in Julia
# Since the filtered file has memory issues, we'll work with the original data.nc file

# First, let's use the original data.nc file instead of the filtered one
original_datafile = "data.nc"

# Load the chlorophyll-a data from the original file
@time obsval_raw,obslon,obslat,obsdepth,obstime,obsid = NCODV.load(Float64, original_datafile, 
    "Water body chlorophyll-a");

# Find the correct QC variable name for chlorophyll-a
# The error shows that 'Water body chlorophyll-a qc' is not found
# Let's search for all chlorophyll-a related variables and their QC counterparts

using NCDatasets
ds = NCDataset("data.nc", "r")

println("Searching for chlorophyll-a and QC-related variables:")
println("=" * 60)

chlorophyll_vars = []
qc_vars = []

for (varname, var) in ds
    long_name = get(var.attrib, "long_name", "")
    
    # Check for chlorophyll-a variables
    if occursin("chlorophyll", lowercase(long_name)) || occursin("chlorophyll", lowercase(varname))
        push!(chlorophyll_vars, (varname, long_name))
        println("CHLOROPHYLL Variable: $varname")
        println("  long_name: '$long_name'")
        println()
    end
    
    # Check for QC variables (look for 'qc' in name or long_name)
    if occursin("qc", lowercase(varname)) || occursin("qc", lowercase(long_name)) || 
       occursin("quality", lowercase(long_name))
        push!(qc_vars, (varname, long_name))
        println("QC Variable: $varname")
        println("  long_name: '$long_name'")
        println()
    end
end

close(ds)

println("Summary:")
println("Found $(length(chlorophyll_vars)) chlorophyll-a variable(s)")
println("Found $(length(qc_vars)) QC variable(s)")
println()

# Now try to match chlorophyll-a with its QC variable
if length(chlorophyll_vars) > 0
    chl_var, chl_long_name = chlorophyll_vars[1]
    println("Main chlorophyll-a variable:")
    println("  Variable name: $chl_var")
    println("  Long name: '$chl_long_name'")
    
    # Look for matching QC variable
    println("\nLooking for matching QC variable...")
    qc_var_found = nothing
    
    # Try different patterns for QC variable names
    possible_qc_patterns = [
        chl_long_name * " qc",
        chl_long_name * "_qc", 
        chl_long_name * " quality control",
        replace(chl_long_name, "Water body" => "Water_body") * "_qc"
    ]
    
    for qc_varname, qc_long_name in qc_vars
        for pattern in possible_qc_patterns
            if qc_long_name == pattern || occursin(lowercase(replace(chl_long_name, " " => "")), lowercase(replace(qc_long_name, " " => "")))
                qc_var_found = (qc_varname, qc_long_name)
                break
            end
        end
        if qc_var_found !== nothing
            break
        end
    end
    
    if qc_var_found !== nothing
        println("✓ Found matching QC variable:")
        println("  QC Variable name: $(qc_var_found[1])")
        println("  QC Long name: '$(qc_var_found[2])'")
    else
        println("✗ No exact match found. Available QC variables:")
        for (qc_varname, qc_long_name) in qc_vars
            println("  $qc_varname: '$qc_long_name'")
        end
    end
end

# Load the QC data separately using the found QC variable name
@time qc_val,qc_lon,qc_lat,qc_depth,qc_time,qc_id = NCODV.load(Float64, original_datafile, 
    qc_var_found[1]);

# Define acceptable QC values (matching Python analysis)
acceptable_qc_values = [49.0, 50.0, 51.0, 81.0]
println("Acceptable QC values: ", acceptable_qc_values)

# Apply QC filtering in Julia
println("Applying QC filtering...")
qc_mask = [qc in acceptable_qc_values for qc in qc_val]
println("QC mask created. Points with acceptable QC: ", sum(qc_mask))

# Filter all observation arrays based on QC
obsval = obsval_raw[qc_mask]
obslon = obslon[qc_mask]
obslat = obslat[qc_mask]
obsdepth = obsdepth[qc_mask]
obstime = obstime[qc_mask]
obsid = obsid[qc_mask]

println("After QC filtering:")
println("  Total observations: ", length(obsval))
println("  Data range: ", minimum(obsval), " to ", maximum(obsval), " mg/m³")
println("  Depth range: ", minimum(obsdepth), " to ", maximum(obsdepth), " m")

# ========================================================================
# PLOTTING OBSERVATIONAL DATA DISTRIBUTION
# ========================================================================

# Create a figure showing the geographic distribution of observation points
figure("Mediterranean-Data")
ax = subplot(1,1,1)
plot(obslon, obslat, "ko", markersize=.1)  # Plot observation locations as small black dots
aspectratio = 1/cos(mean(latr) * pi/180)   # Calculate proper aspect ratio for latitude
ax.tick_params("both",labelsize=6)
gca().set_aspect(aspectratio)
title("Mediterranean Sea Observation Locations (QC Filtered)")

# Check quality and consistency of observations
checkobs((obslon,obslat,obsdepth,obstime),obsval,obsid)

6360 out of 30189 - 21.067276160190797 %
12830 out of 30189 - 42.498923448938356 %
12830 out of 30189 - 42.498923448938356 %
19360 out of 30189 - 64.1293186259896 %
25970 out of 30189 - 86.02471098744576 %19360 out of 30189 - 64.1293186259896 %
25970 out of 30189 - 86.02471098744576 %
  9.531401 seconds (1.18 M allocations: 55.137 MiB, 0.15% gc time)

  9.531401 seconds (1.18 M allocations: 55.137 MiB, 0.15% gc time)
Searching for chlorophyll-a and QC-related variables:
Searching for chlorophyll-a and QC-related variables:


MethodError: MethodError: no method matching *(::String, ::Int64)
The function `*` exists, but no method is defined for this combination of argument types.

Closest candidates are:
  *(::Any, ::Any, !Matched::Any, !Matched::Any...)
   @ Base operators.jl:596
  *(!Matched::ChainRulesCore.NoTangent, ::Any)
   @ ChainRulesCore C:\Users\nholodkov\.julia\packages\ChainRulesCore\U6wNx\src\tangent_arithmetic.jl:64
  *(::Any, !Matched::PyCall.PyObject)
   @ PyCall C:\Users\nholodkov\.julia\packages\PyCall\1gn3u\src\pyoperators.jl:14
  ...


In [35]:
# Download bathymetry data (seafloor depth) for the Mediterranean Sea region
bathname = "gebco_30sec_8.nc"
#if !isfile(bathname)
#    download("https://dox.ulg.ac.be/index.php/s/U0pqyXhcQrXjEUX/download",bathname)
#else
#    @info("Bathymetry file already downloaded")
#end

# Load bathymetry data and interpolate to our Mediterranean grid
@time bx,by,b = load_bath(bathname,true,lonr,latr);

# Plot the bathymetry data for the Mediterranean Sea
figure("Mediterranean-Bathymetry")
ax = subplot(1,1,1)
pcolor(bx, by, permutedims(b, [2,1]));  # Create colored map of bathymetry
colorbar(orientation="vertical", shrink=0.8).ax.tick_params(labelsize=8)
contour(bx, by, permutedims(b, [2,1]), [0, 0.1], colors="k", linewidths=.5)  # Add coastline contour
gca().set_aspect(aspectratio)
ax.tick_params("both",labelsize=6)
title("Mediterranean Sea Bathymetry")

# ========================================================================
# MASK CREATION AND EDITING FOR MEDITERRANEAN ANALYSIS DOMAIN
# ========================================================================

# Create a 3D mask for the Mediterranean analysis domain
# This mask determines which grid points are valid for analysis (water vs land)
mask = falses(size(b,1),size(b,2),length(depthr))
for k = 1:length(depthr)
    for j = 1:size(b,2)
        for i = 1:size(b,1)
            mask[i,j,k] = b[i,j] >= depthr[k]  # True where water depth >= analysis depth
        end
    end
end
@show size(mask)

# Plot the initial mask (surface level) for Mediterranean
figure("Mediterranean-Mask")
ax = subplot(1,1,1)
gca().set_aspect(aspectratio)
ax.tick_params("both",labelsize=6)
pcolor(bx,by, transpose(mask[:,:,1])); 
title("Mediterranean Sea Initial Mask")

# Create coordinate grids for mask editing
grid_bx = [i for i in bx, j in by];
grid_by = [j for i in bx, j in by];

# Edit the mask to remove specific regions (adapted for Mediterranean)
mask_edit = copy(mask);
# Remove Atlantic Ocean areas west of Gibraltar (longitude < -5.5°)
sel_mask1 = (grid_bx .<= -5.5);  
# Remove Black Sea connections (north of 42° and east of 27°)
sel_mask2 = (grid_by .>= 42.0) .& (grid_bx .>= 27.0);
# Remove areas that are too far north (> 45.5°) to focus on main Mediterranean basin
sel_mask3 = (grid_by .>= 45.5);
# Apply all mask edits
mask_edit = mask_edit .* .!sel_mask1 .* .!sel_mask2 .* .!sel_mask3;
@show size(mask_edit)

# Plot the edited mask for Mediterranean
figure("Mediterranean-Mask-Edited")
ax = subplot(1,1,1)
ax.tick_params("both",labelsize=6)
pcolor(bx, by, transpose(mask_edit[:,:,1])); 
gca().set_aspect(aspectratio)
title("Mediterranean Sea Edited Mask")

  0.012147 seconds (650 allocations: 2.177 MiB)


UndefVarError: UndefVarError: `aspectratio` not defined in `Main`
Suggestion: check for spelling errors or missing imports.

In [36]:
## ========================================================================
## DATA FILTERING AND QUALITY CONTROL
## ========================================================================
#
## Apply EMODnet Chemistry recommended quality control for chlorophyll-a
## Filter observational data to keep only realistic chlorophyll-a values for Mediterranean
#sel = (obsval .>= 0.001) .& (obsval .<= 30.0);  # Mediterranean chlorophyll-a range: 0.001-30 mg/m³
#println("Before QC: $(length(obsval)) observations")
#
## Apply the filter to all observation arrays
#obsval = obsval[sel]
#obslon = obslon[sel]
#obslat = obslat[sel]
#obsdepth = obsdepth[sel]
#obstime = obstime[sel]
#obsid = obsid[sel];
#
## Additional QC: Remove observations from depths > 200m (below euphotic zone for chlorophyll-a)
#depth_sel = obsdepth .<= 200.0;
#obsval = obsval[depth_sel]
#obslon = obslon[depth_sel]
#obslat = obslat[depth_sel]
#obsdepth = obsdepth[depth_sel]
#obstime = obstime[depth_sel]
#obsid = obsid[depth_sel];
#
#println("After QC: $(length(obsval)) observations")
#println("Data range: $(minimum(obsval)) to $(maximum(obsval)) mg/m³")
#println("Depth range: $(minimum(obsdepth)) to $(maximum(obsdepth)) m")

In [37]:
# ========================================================================
# DIVAND ANALYSIS PARAMETERS SETUP (EMODnet Chemistry Standards)
# ========================================================================

# Optional: Calculate observation weights based on data density
# Recommended for high-density datasets to account for spatial clustering
@time rdiag=1.0./DIVAnd.weight_RtimesOne((obslon,obslat),(0.05,0.05));
@show maximum(rdiag),mean(rdiag)

# Define grid dimensions for parameter arrays
sz = (length(lonr),length(latr),length(depthr));

# Set correlation lengths (influence radius) for each dimension
# EMODnet Chemistry recommended values for Mediterranean chlorophyll-a:
lenx = fill(75_000.,sz)    # 75 km correlation length in longitude direction (Mediterranean-specific)
leny = fill(75_000.,sz)    # 75 km correlation length in latitude direction (Mediterranean-specific)
lenz = fill(15.,sz);       # 15 m correlation length in depth direction (for chlorophyll-a vertical structure)
len = (lenx, leny, lenz);  # Combine into tuple for DIVAnd

# Set noise-to-signal ratio (regularization parameter)
# Lower epsilon2 for chlorophyll-a due to high variability
epsilon2 = 0.05;           # Reduced for better data fidelity with chlorophyll-a
epsilon2 = epsilon2 * rdiag;  # Apply spatially varying epsilon based on data density

┌ Info: Computing weights using 1 CPU thread(s)
└ @ DIVAnd C:\Users\nholodkov\.julia\packages\DIVAnd\4UymR\src\DIVAnd_weights.jl:101


  1.229196 seconds (3.01 M allocations: 157.501 MiB, 9.39% gc time, 85.46% compilation time)
(maximum(rdiag), mean(rdiag)) = (10631.393062538093, 3842.041761449309)
(maximum(rdiag), mean(rdiag)) = (10631.393062538093, 3842.041761449309)


In [38]:
# ========================================================================
# OUTPUT FILE SETUP AND METADATA CONFIGURATION
# ========================================================================

# Set up output directory and filename
outputdir = "./"
if !isdir(outputdir)
    mkpath(outputdir)
end
filename = joinpath(outputdir, "Water_body_$(replace(varname," "=>"_"))_Mediterranean.4Danl.nc")

# Define comprehensive metadata for NetCDF file following SeaDataNet standards
metadata = OrderedDict(
    # Name of the project (SeaDataCloud, SeaDataNet, EMODNET-chemistry, ...)
    "project" => "SeaDataCloud",

    # URN code for the institution EDMO registry,
    # e.g. SDN:EDMO::1579
    "institution_urn" => "SDN:EDMO::1579",

    # Production group
    #"production" => "Diva group",

    # Name and emails from authors
    "Author_e-mail" => ["Your Name1 <name1@example.com>", "Other Name <name2@example.com>"],

    # Source of the observation
    "source" => "observational data from SeaDataNet and World Ocean Atlas",

    # Additional comment
    "comment" => "Duplicate removal applied to the merged dataset. EMODnet Chemistry QC procedures applied.",

    # SeaDataNet Vocabulary P35 URN for chlorophyll-a
    # http://seadatanet.maris2.nl/v_bodc_vocab_v2/search.asp?lib=p35
    "parameter_keyword_urn" => "SDN:P35::EPC00001", # Chlorophyll-a concentration

    # List of SeaDataNet Parameter Discovery Vocabulary P02 URNs for chlorophyll-a
    # http://seadatanet.maris2.nl/v_bodc_vocab_v2/search.asp?lib=p02
    "search_keywords_urn" => ["SDN:P02::CPWC"], # Chlorophyll pigment concentrations

    # List of SeaDataNet Vocabulary C19 area URNs
    # SeaVoX salt and fresh water body gazetteer (C19)
    # http://seadatanet.maris2.nl/v_bodc_vocab_v2/search.asp?lib=C19
    "area_keywords_urn" => ["SDN:C19::3_1"], # Mediterranean Sea

    "product_version" => "1.0",
    
    "product_code" => "Mediterranean-Chlorophyll-a-Analysis",
    
    # bathymetry source acknowledgement
    "bathymetry_source" => "The GEBCO Digital Atlas published by the British Oceanographic Data Centre on behalf of IOC and IHO, 2003",

    # NetCDF CF standard name for chlorophyll-a
    # http://cfconventions.org/Data/cf-standard-names/current/build/cf-standard-name-table.html
    "netcdf_standard_name" => "mass_concentration_of_chlorophyll_a_in_sea_water",

    "netcdf_long_name" => "Mass concentration of chlorophyll-a in sea water",

    "netcdf_units" => "mg m-3",

    # Abstract for the product
    "abstract" => "4D analysis of chlorophyll-a concentration in Mediterranean Sea using DIVAnd interpolation following EMODnet Chemistry methodology",

    # This option provides a place to acknowledge various types of support for the
    # project that produced the data
    "acknowledgement" => "EMODnet Chemistry project, SeaDataNet infrastructure",

    "documentation" => "https://doi.org/10.6092/9f75ad8a-ca32-4a72-bf69-167119b2cc12",

    # Digital Object Identifier of the data product
    "doi" => "...");

# Convert metadata to NetCDF-compatible attributes
ncglobalattrib, ncvarattrib = SDNMetadata(metadata, filename, varname, lonr, latr)

# Remove any existing analysis file to start fresh
if isfile(filename)
    rm(filename) # delete the previous analysis
    @info "Removing file $filename"
end

In [39]:
# ========================================================================
# PLOTTING FUNCTION DEFINITION
# ========================================================================

# Set up figure output directory
figdir = "./"

# Define a function to plot interpolation results for each time step
function plotres(timeindex,sel,fit,erri)
    tmp = copy(fit)                            # Copy the fitted data to avoid modifying original
    nx,ny,nz = size(tmp)                       # Get dimensions of the fitted data array
    
    for i in 1:nz                             # Loop through each depth level
        figure("Mediterranean-Chlorophyll-Analysis")     # Create or select figure window
        ax = subplot(1,1,1)                   # Create subplot
        ax.tick_params("both",labelsize=6)    # Set tick parameters
        ylim(30.0, 46.0);                     # Set latitude limits for Mediterranean
        xlim(-6.0, 37.0);                     # Set longitude limits for Mediterranean
        title("Mediterranean Sea - Chlorophyll-a \n Depth: $(depthr[i])m, Time index: $(timeindex)", fontsize=8)  # Add descriptive title
        
        # Create colored plot of the interpolated chlorophyll-a field
        # Mediterranean chlorophyll-a typically ranges 0.05-1.5 mg/m³, with blooms up to 3-5 mg/m³
        pcolor(lonr.-dx/2.,latr.-dy/2, permutedims(tmp[:,:,i], [2,1]);
               vmin = 0.05, vmax = 1.5)     # Realistic Mediterranean chlorophyll-a range (mg/m³)
        colorbar(extend="both", orientation="vertical", shrink=0.8, label="Chlorophyll-a (mg/m³)").ax.tick_params(labelsize=8)

        # Add land mask as gray contour 
        contourf(bx,by,permutedims(b,[2,1]), levels = [-1e5,0],colors = [[.5,.5,.5]])
        aspectratio = 1/cos(mean(latr) * pi/180)  # Calculate proper aspect ratio
        gca().set_aspect(aspectratio)
        
        # Save the figure with formatted filename
        figname = "Mediterranean_Chlorophyll_a" * @sprintf("_%02d",i) * @sprintf("_%03d.png",timeindex)
        PyPlot.savefig(joinpath(figdir, figname), dpi=600, bbox_inches="tight");
        PyPlot.close_figs()                   # Close figure to free memory
    end
end


plotres (generic function with 1 method)

In [40]:
# ========================================================================
# MAIN DIVAND ANALYSIS EXECUTION (OPTIMIZED FOR SPEED)
# ========================================================================

# Execute the main DIVAnd 3D analysis
@time dbinfo = diva3d((lonr,latr,depthr,TS),        # Grid coordinates and time selector
    (obslon,obslat,obsdepth,obstime), obsval,        # Observation coordinates and values
    len, epsilon2,                                    # Correlation lengths and regularization
    filename,varname,                                 # Output file and variable name
    bathname=bathname,                               # Bathymetry file for land/sea mask
    # plotres = plotres,                             # Plotting function commented out - no maps generated
    mask = mask_edit,                                # Edited mask for analysis domain
    fitcorrlen = false,                              # Don't fit correlation lengths automatically
    niter_e = 0,                                     # Disable error estimation for speed (was 2)
    ncvarattrib = ncvarattrib,                       # NetCDF variable attributes
    ncglobalattrib = ncglobalattrib,                 # NetCDF global attributes
    surfextend = true                                # Extend surface values to deeper levels if needed
    );

# Save observation metadata to the output file
DIVAnd.saveobs(filename,(obslon,obslat,obsdepth,obstime),obsid);

UndefVarError: UndefVarError: `mask_edit` not defined in `Main`
Suggestion: check for spelling errors or missing imports.