In [11]:
############################################################
# SINDy PROPRE – STLSQ + BASE CANONIQUE + HIÉRARCHIE
############################################################

using NPZ
using LinearAlgebra
using Statistics
using Printf

############################################################
# CONFIG
############################################################

FILEPATH = "data/processed/sstReducedState2COPERNICUS20102019.npz"

POLY_DEGREE = 2
K_DOMINANT  = 3          # modes dominants autorisés dans les interactions
LAMBDA_LIST = [0.05, 0.1, 0.2, 0.5]
TAU_POST    = 0.1        # seuil post-STLSQ

############################################################
# LOAD DATA
############################################################

data = npzread(FILEPATH)

Z  = Float64.(data["Z"])    # (time, state)
dZ = Float64.(data["dZ"])

Z  = permutedims(Z)         # (state, time)
dZ = permutedims(dZ)

n_state, T = size(Z)

############################################################
# BUILD CANONICAL LIBRARY Θ
############################################################

basis_labels = String[]

# constant
push!(basis_labels, "1")

# linear
for i in 1:n_state
    push!(basis_labels, "x[$i]")
end

# quadratic (CANONICAL + HIÉRARCHIE)
for i in 1:n_state
    for j in i:min(n_state, K_DOMINANT)
        push!(basis_labels, "x[$i]*x[$j]")
    end
end

n_basis = length(basis_labels)

Θ = zeros(Float64, T, n_basis)

for t in 1:T
    col = 1
    Θ[t, col] = 1.0
    col += 1

    # linear
    for i in 1:n_state
        Θ[t, col] = Z[i, t]
        col += 1
    end

    # quadratic
    for i in 1:n_state
        for j in i:min(n_state, K_DOMINANT)
            Θ[t, col] = Z[i, t] * Z[j, t]
            col += 1
        end
    end
end

############################################################
# STLSQ
############################################################

function stlsq(Θ, y; λ=0.1, n_iter=10)
    ξ = Θ \ y
    for _ in 1:n_iter
        small = abs.(ξ) .< λ
        ξ[small] .= 0.0
        if any(.!small)
            ξ[.!small] = Θ[:, .!small] \ y
        end
    end
    ξ
end

function hard_threshold!(Ξ, τ)
    Ξ[abs.(Ξ) .< τ] .= 0.0
end

############################################################
# TRAIN + MODEL SELECTION
############################################################

best_rmse = Inf
best_Ξ = nothing
best_λ = nothing

for λ in LAMBDA_LIST
    Ξ = zeros(Float64, n_basis, n_state)

    for i in 1:n_state
        Ξ[:, i] = stlsq(Θ, dZ[i, :]; λ=λ)
    end

    hard_threshold!(Ξ, TAU_POST)

    dZ_pred = (Θ * Ξ)'     # (state, time)
    rmse_b = sqrt(mean((dZ_pred .- dZ).^2))
    sparsity = 100 * count(iszero, Ξ) / length(Ξ)

    @printf "λ = %.2f | RMSE = %.4f | Sparsity = %.1f%%\n" λ rmse_b sparsity

    if rmse_b < best_rmse && sparsity < 100
        best_rmse = rmse_b
        best_Ξ = copy(Ξ)
        best_λ = λ
    end
end

############################################################
# RESULTS
############################################################

println("\nBEST λ = ", best_λ)
@printf "RMSE = %.4f\n" best_rmse

println("\nDiscovered equations (first 5 states):")

for i in 1:min(5, n_state)
    println("\ndx$i/dt =")
    for j in findall(!iszero, best_Ξ[:, i])
        @printf "  %+0.4f * %s\n" best_Ξ[j, i] basis_labels[j]
    end
end


λ = 0.05 | RMSE = 0.3677 | Sparsity = 61.0%
λ = 0.10 | RMSE = 0.2856 | Sparsity = 84.3%
λ = 0.20 | RMSE = 0.2862 | Sparsity = 99.0%
λ = 0.50 | RMSE = 0.2863 | Sparsity = 100.0%

BEST λ = 0.1
RMSE = 0.2856

Discovered equations (first 5 states):

dx1/dt =
  +0.1442 * x[5]
  -0.1566 * x[10]
  +0.2435 * x[11]
  -0.2292 * x[16]

dx2/dt =
  +0.1399 * x[4]
  +0.1448 * x[5]
  -0.1619 * x[10]
  +0.1189 * x[11]
  +0.2272 * x[12]
  -0.3107 * x[15]
  -0.1588 * x[16]

dx3/dt =
  -0.1148 * x[1]
  +0.1022 * x[4]
  +0.1669 * x[11]
  -0.1540 * x[15]

dx4/dt =
  -0.1024 * x[1]
  +0.1015 * x[11]
  -0.1106 * x[1]*x[1]
  +0.2359 * x[1]*x[3]
  -0.1219 * x[3]*x[3]

dx5/dt =
  -0.2045 * x[1]
  +0.2432 * x[11]
  +0.1589 * x[13]
  -0.1980 * x[16]


In [8]:
############################################################
# PURE SINDy – STLSQ MANUEL (STABLE + CONTRAINTE)
############################################################

using NPZ
using LinearAlgebra
using Statistics
using ModelingToolkit
using Printf

############################################################
# 1. LOAD DATA
############################################################

data = npzread("data/processed/sstReducedState2COPERNICUS20102019.npz")

Z  = Float64.(data["Z"])     # (time, state)
dZ = Float64.(data["dZ"])

Z  = permutedims(Z)          # (state, time)
dZ = permutedims(dZ)

n_state, T = size(Z)

############################################################
# 2. NORMALISATION (SAFE)
############################################################

μ = mean(Z, dims=2)
σ = std(Z, dims=2)
σ[σ .== 0.0] .= 1.0

Ẑ  = (Z .- μ) ./ σ
dẐ = dZ ./ σ

############################################################
# 3. LIBRAIRIE POLYNOMIALE CONTRAINTE (FIX)
############################################################

POLY_DEGREE = 2
K_DOMINANT  = 3

@variables x[1:n_state]

basis = Num[]

# constante
push!(basis, 1)

# linéaire (FIX ICI)
for i in 1:n_state
    push!(basis, x[i])
end

# carrés (modes dominants)
for i in 1:K_DOMINANT
    push!(basis, x[i]^2)
end

# interactions limitées
for i in 1:n_state
    for j in 1:K_DOMINANT
        i != j && push!(basis, x[i] * x[j])
    end
end

############################################################
# 4. BUILD Θ
############################################################

Θ_fun = ModelingToolkit.build_function(
    basis, x, expression=Val(false)
)[1]

Θ = zeros(Float64, T, length(basis))
for t in 1:T
    Θ[t, :] .= Θ_fun(Ẑ[:, t])
end

############################################################
# 5. STLSQ (ROBUSTE)
############################################################

function stlsq(Θ, y; λ=1e-2, n_iter=10)
    ξ = Θ \ y
    for _ in 1:n_iter
        small = abs.(ξ) .< λ
        ξ[small] .= 0.0
        big = .!small
        if any(big)
            ξ[big] = Θ[:, big] \ y
        end
    end
    ξ
end

############################################################
# 6. SCAN DE λ
############################################################

lambdas = [0.05, 0.1, 0.2, 0.5, 1.0]

best_rmse = Inf
best_λ = nothing
best_Ξ = nothing

for λ in lambdas
    Ξ = zeros(Float64, length(basis), n_state)

    for i in 1:n_state
        Ξ[:, i] = stlsq(Θ, dẐ[i, :]; λ=λ)
    end

    dẐ_pred = (Θ * Ξ)'
    rmse_y = sqrt(mean((dẐ_pred .- dẐ).^2))
    sparsity = count(abs.(Ξ) .< 1e-8) / length(Ξ)

    @printf("λ = %.2f | RMSE = %.4f | Sparsity = %.1f%%\n",
            λ, rmse_y, 100sparsity)

    if rmse_y < best_rmse && sparsity > 0.2
        best_rmse = rmse_y
        best_λ = λ
        best_Ξ = copy(Ξ)
    end
end

############################################################
# 7. RÉSULTATS
############################################################

println("\nBEST λ = ", best_λ)
println("RMSE = ", round(best_rmse, digits=4))

############################################################
# 8. AFFICHAGE DES ÉQUATIONS
############################################################

println("\nDiscovered equations (first 5 states):")

for i in 1:min(5, n_state)
    println("\ndx$i/dt =")
    for j in findall(!iszero, best_Ξ[:, i])
        @printf("  %+ .4f * %s\n", best_Ξ[j,i], basis[j])
    end
end


λ = 0.05 | RMSE = 0.2873 | Sparsity = 20.1%
λ = 0.10 | RMSE = 0.2881 | Sparsity = 47.3%
λ = 0.20 | RMSE = 0.2906 | Sparsity = 82.8%
λ = 0.50 | RMSE = 0.2927 | Sparsity = 100.0%
λ = 1.00 | RMSE = 0.2927 | Sparsity = 100.0%

BEST λ = 0.05
RMSE = 0.2873

Discovered equations (first 5 states):

dx1/dt =
  -0.1703 * x[1]
  -0.0608 * x[3]
  +0.2030 * x[4]
  +0.1135 * x[5]
  +0.0585 * x[6]
  -0.0869 * x[8]
  -0.1019 * x[10]
  +0.3503 * x[11]
  +0.0923 * x[12]
  +0.0581 * x[13]
  +0.0621 * x[14]
  -0.2666 * x[15]
  -0.2467 * x[16]
  -0.0884 * x[1]^2
  +0.2587 * x[3]^2
  +0.1200 * x[1]*x[2]
  -0.2793 * x[1]*x[3]
  +0.1200 * x[1]*x[2]
  +0.1233 * x[2]*x[3]
  -0.2793 * x[1]*x[3]
  +0.1233 * x[2]*x[3]
  +0.3151 * x[1]*x[4]
  -0.0763 * x[2]*x[4]
  -0.4892 * x[3]*x[4]
  -0.0801 * x[1]*x[5]
  +0.1605 * x[3]*x[5]
  +0.0660 * x[1]*x[6]
  -0.1166 * x[3]*x[6]
  +0.4081 * x[1]*x[7]
  +0.3168 * x[2]*x[7]
  -0.5562 * x[3]*x[7]
  +0.1683 * x[1]*x[8]
  -0.0594 * x[3]*x[8]
  -0.2799 * x[1]*x[9]
  -0.1982 * x[2