From 711f2b59c9d3075f5a423d562e24f01d88241327 Mon Sep 17 00:00:00 2001 From: Oskar Laverny Date: Wed, 21 Feb 2024 22:43:43 +0100 Subject: [PATCH] [Docs] Add kendall function example * Add kendall function example --- docs/Manifest.toml | 2 +- docs/Project.toml | 1 + docs/make.jl | 1 + docs/src/examples/lambda_viz.md | 81 +++++++++++++++++++++++++++++++++ 4 files changed, 84 insertions(+), 1 deletion(-) create mode 100644 docs/src/examples/lambda_viz.md diff --git a/docs/Manifest.toml b/docs/Manifest.toml index 628b582..1c72dc1 100644 --- a/docs/Manifest.toml +++ b/docs/Manifest.toml @@ -2,7 +2,7 @@ julia_version = "1.10.0" manifest_format = "2.0" -project_hash = "ab0d1e9747a499a467f203cc4e7c184d6495c3d5" +project_hash = "9003cf1c54885a2e3e12adce1def8b14e84ad055" [[deps.ANSIColoredPrinters]] git-tree-sha1 = "574baf8110975760d391c710b6341da1afa48d8c" diff --git a/docs/Project.toml b/docs/Project.toml index f892adc..0ebc571 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -5,3 +5,4 @@ Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" DocumenterCitations = "daee34ce-89f3-4625-b898-19384cb65244" LiveServer = "16fef848-5104-11e9-1b77-fb7a48bbb589" Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" +StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" diff --git a/docs/make.jl b/docs/make.jl index 5e0fe5e..efae9cb 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -41,6 +41,7 @@ makedocs(; "Transformed Copulas" => "transformations.md", ], "Examples" => [ + "examples/lambda_viz.md", "examples/lossalae.md", "examples/fitting_sklar.md", "examples/turing.md", diff --git a/docs/src/examples/lambda_viz.md b/docs/src/examples/lambda_viz.md new file mode 100644 index 0000000..320994a --- /dev/null +++ b/docs/src/examples/lambda_viz.md @@ -0,0 +1,81 @@ +# Empirical Kendall function and Archimedean's λ function. + +The Kendall function is an important function in dependence structure analysis. It is defined for a $d$-variate copula $C$ as + +$$K(t) = \mathbb P \left( C(U_1,...,U_d) \le t \right),$$ + +where $\bm U = \left(U_1,...,U_n\right)$ are drawn according to $C$. +From a computational point of view, we often do not access to true observations of the random vector $\m U \sim C$ but rather only observations on the marginal scales. + +Suppose for the sake of the argument that we have a multivariate sample on marignal scales $\left(X_{i,j}\right)_{i \in 1,...,d,\; j \in 1,...,n} with dependence structure $C$. +A standard way to approximate $K$ is to rather compute +$$Z_j = \frac{1}{n-1} \sum_{k \neq j} \bm 1_{X_{i,j} < X_{i,k} \forall i \in 1,...,d}.$$ + +Indeed, $K$ can be approximated as the empirical distribution function of $Z_1,...,Z_n$. Here is a sketch implementation of this concept: +```@example lambda +struct KendallFunction{T} + z::Vector{T} + function KendallFunction(x) + d,n = size(x) + z = zeros(n) + for i in 1:n + for j in 1:n + if j ≠ i + z[i] += reduce(&, x[:,j] .< x[:,i]) + end + end + end + z ./= (n-1) + sort!(z) + return new{eltype(z)}(z) + end +end +function (K::KendallFunction)(t) + # Then the K function is simply the empirical cdf of the Z sample: + return sum(K.z .≤ t)/length(K.z) +end +nothing # hide +``` + +Let us try it on a random example: + +```@example lambda +using Copulas, Distributions, Plots +X = SklarDist(ClaytonCopula(2,2.7),(Normal(),Pareto())) +x = rand(X,1000) +K = KendallFunction(x) +plot(u -> K(u), xlims = (0,1), title="Empirical Kendall function") +``` + +One notable detail on the Kendall function is that is does **not** characterize the copula in all generality. On the other hand, for Archimedean copulas, we have: +$$K(t) = t - \phi'\{\phi^{-1}(t)\} \phi^{-1}(t).$$ + +Due to this partical relationship, the Kendall function actually characterizes the generator of the archimedean copula. In fact, this relationship is generally expressed in term of a λ function defined as $$\lambda(t) = t - K(t),$$ which, for archimedean copulas, is obviously equal to $\phi'\{\phi^{-1}(t)\} \phi^{-1}(t)$. + +Common λ functions can be easily derived by hand for standard archimedean generators. For any archimedean generator in the package, however, it is even easier to let Julia do the derivation. + +Let's try to compare the empirical λ function from our dataset to a few theoretical ones. For that, we setup parameters of the relevant generators to match the kendall τ of the dataset (because we can). We include for the record the independent and completely monotonous cases. + +```@example lambda +using Copulas: ϕ⁽¹⁾, ϕ⁻¹, τ⁻¹, ClaytonGenerator, GumbelGenerator +using StatsBase: corkendall +λ(G,t) = ϕ⁽¹⁾(G,ϕ⁻¹(G,t)) * ϕ⁻¹(G,t) +plot(u -> u - K(u), xlims = (0,1), label="Empirical λ function") +κ = corkendall(x')[1,2] # empirical kendall tau +θ_cl = τ⁻¹(ClaytonGenerator,κ) +θ_gb = τ⁻¹(GumbelGenerator,κ) +plot!(u -> λ(ClaytonGenerator(θ_cl),u), label="Clayton") +plot!(u -> λ(GumbelGenerator(θ_gb),u), label="Gumbel") +plot!(u -> 0, label="Comonotony") +plot!(u -> u*log(u), label="Independence") +``` + +The variance of the empirical λ function is notable on this example. In particular, we note that the estimated parameter +```@example lambda +θ_cl +``` +is not very far for the true $2.7$ we used to generate the dataset. A few more things could be tried before closing up the analysis on a real dataset: + +- Empirical validation of the archimedean property of the data, and then +- Non-parametric estimation of the generator from the empirical Kendall function, or through other means. +- Non-archimedean parametric models.