From 711f2b59c9d3075f5a423d562e24f01d88241327 Mon Sep 17 00:00:00 2001
From: Oskar Laverny <oskar.laverny@gmail.com>
Date: Wed, 21 Feb 2024 22:43:43 +0100
Subject: [PATCH] [Docs] Add kendall function example

* Add kendall function example
---
 docs/Manifest.toml              |  2 +-
 docs/Project.toml               |  1 +
 docs/make.jl                    |  1 +
 docs/src/examples/lambda_viz.md | 81 +++++++++++++++++++++++++++++++++
 4 files changed, 84 insertions(+), 1 deletion(-)
 create mode 100644 docs/src/examples/lambda_viz.md

diff --git a/docs/Manifest.toml b/docs/Manifest.toml
index 628b582..1c72dc1 100644
--- a/docs/Manifest.toml
+++ b/docs/Manifest.toml
@@ -2,7 +2,7 @@
 
 julia_version = "1.10.0"
 manifest_format = "2.0"
-project_hash = "ab0d1e9747a499a467f203cc4e7c184d6495c3d5"
+project_hash = "9003cf1c54885a2e3e12adce1def8b14e84ad055"
 
 [[deps.ANSIColoredPrinters]]
 git-tree-sha1 = "574baf8110975760d391c710b6341da1afa48d8c"
diff --git a/docs/Project.toml b/docs/Project.toml
index f892adc..0ebc571 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -5,3 +5,4 @@ Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 DocumenterCitations = "daee34ce-89f3-4625-b898-19384cb65244"
 LiveServer = "16fef848-5104-11e9-1b77-fb7a48bbb589"
 Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
+StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
diff --git a/docs/make.jl b/docs/make.jl
index 5e0fe5e..efae9cb 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -41,6 +41,7 @@ makedocs(;
             "Transformed Copulas" => "transformations.md",
         ],
         "Examples" => [
+            "examples/lambda_viz.md",
             "examples/lossalae.md",
             "examples/fitting_sklar.md",
             "examples/turing.md",
diff --git a/docs/src/examples/lambda_viz.md b/docs/src/examples/lambda_viz.md
new file mode 100644
index 0000000..320994a
--- /dev/null
+++ b/docs/src/examples/lambda_viz.md
@@ -0,0 +1,81 @@
+# Empirical Kendall function and Archimedean's λ function.
+
+The Kendall function is an important function in dependence structure analysis. It is defined for a $d$-variate copula $C$ as 
+
+$$K(t) = \mathbb P \left( C(U_1,...,U_d) \le t \right),$$
+
+where $\bm U = \left(U_1,...,U_n\right)$ are drawn according to $C$.
+From a computational point of view, we often do not access to true observations of the random vector $\m U \sim C$ but rather only observations on the marginal scales. 
+
+Suppose for the sake of the argument that we have a multivariate sample on marignal scales $\left(X_{i,j}\right)_{i \in 1,...,d,\; j \in 1,...,n} with dependence structure $C$. 
+A standard way to approximate $K$ is to rather compute 
+$$Z_j = \frac{1}{n-1} \sum_{k \neq j} \bm 1_{X_{i,j} < X_{i,k} \forall i \in 1,...,d}.$$
+
+Indeed, $K$ can be approximated as the empirical distribution function of $Z_1,...,Z_n$. Here is a sketch implementation of this concept:
+```@example lambda
+struct KendallFunction{T}
+    z::Vector{T}
+    function KendallFunction(x)
+    d,n = size(x)
+    z = zeros(n)
+    for i in 1:n
+        for j in 1:n
+            if j ≠ i
+                z[i] += reduce(&, x[:,j] .< x[:,i])
+            end
+        end
+    end
+    z ./= (n-1)
+    sort!(z)
+    return  new{eltype(z)}(z)
+    end
+end
+function (K::KendallFunction)(t)
+    # Then the K function is simply the empirical cdf of the Z sample:
+    return sum(K.z .≤ t)/length(K.z)
+end
+nothing # hide
+```
+
+Let us try it on a random example: 
+
+```@example lambda
+using Copulas, Distributions, Plots
+X = SklarDist(ClaytonCopula(2,2.7),(Normal(),Pareto()))
+x = rand(X,1000)
+K = KendallFunction(x)
+plot(u -> K(u), xlims = (0,1), title="Empirical Kendall function")
+```
+
+One notable detail on the Kendall function is that is does **not** characterize the copula in all generality. On the other hand, for Archimedean copulas, we have:
+$$K(t) = t - \phi'\{\phi^{-1}(t)\} \phi^{-1}(t).$$
+
+Due to this partical relationship, the Kendall function actually characterizes the generator of the archimedean copula. In fact, this relationship is generally expressed in term of a λ function defined as $$\lambda(t) = t - K(t),$$ which, for archimedean copulas, is obviously equal to $\phi'\{\phi^{-1}(t)\} \phi^{-1}(t)$.
+
+Common λ functions can be easily derived by hand for standard archimedean generators. For any archimedean generator in the package, however, it is even easier to let Julia do the derivation. 
+
+Let's try to compare the empirical λ function from our dataset to a few theoretical ones. For that, we setup parameters of the relevant generators to match the kendall τ of the dataset (because we can). We include for the record the independent and completely monotonous cases.
+
+```@example lambda
+using Copulas: ϕ⁽¹⁾, ϕ⁻¹, τ⁻¹, ClaytonGenerator, GumbelGenerator
+using StatsBase: corkendall
+λ(G,t) = ϕ⁽¹⁾(G,ϕ⁻¹(G,t)) * ϕ⁻¹(G,t)
+plot(u -> u - K(u), xlims = (0,1), label="Empirical λ function")
+κ = corkendall(x')[1,2] # empirical kendall tau
+θ_cl = τ⁻¹(ClaytonGenerator,κ)
+θ_gb = τ⁻¹(GumbelGenerator,κ)
+plot!(u -> λ(ClaytonGenerator(θ_cl),u), label="Clayton")
+plot!(u -> λ(GumbelGenerator(θ_gb),u), label="Gumbel")
+plot!(u -> 0, label="Comonotony")
+plot!(u -> u*log(u), label="Independence")
+```
+
+The variance of the empirical λ function is notable on this example. In particular, we note that the estimated parameter
+```@example lambda
+θ_cl
+```
+is not very far for the true $2.7$ we used to generate the dataset. A few more things could be tried before closing up the analysis on a real dataset: 
+
+- Empirical validation of the archimedean property of the data, and then
+- Non-parametric estimation of the generator from the empirical Kendall function, or through other means.
+- Non-archimedean parametric models.