Merge pull request #33 from s-broda/glm

Support GLM, predicting Regressions, and fix a bug in predict
s-broda · Apr 5, 2019 · 9bd20df · 9bd20df
2 parents 0861a7e + 3356309
commit 9bd20df
Show file tree

Hide file tree

Showing 12 changed files with 98 additions and 53 deletions.
diff --git a/Project.toml b/Project.toml
@@ -13,6 +13,7 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Optim = "429524aa-4258-5aef-a3af-852621145aeb"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
+Requires = "ae029012-a4dd-5104-9daa-d747884805df"
 Roots = "f2b01f46-fcfa-551c-844a-d8ac1e96c665"
 SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
@@ -23,7 +24,10 @@ StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c"
 julia = "1.0.0"
 
 [extras]
+DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
+GLM = "38e38edf-8417-5370-95a0-9cbb8c7f171a"
+StatsModels = "3eaba693-59b7-5ba5-a881-562e759f1c8d"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["Test"]
+test = ["Test", "GLM", "DataFrames"]
diff --git a/REQUIRE b/REQUIRE
@@ -5,6 +5,7 @@ ForwardDiff
 HypothesisTests
 Optim
 Reexport
+Requires
 Roots
 SpecialFunctions
 StatsBase

diff --git a/docs/Project.toml b/docs/Project.toml
@@ -2,3 +2,5 @@
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
+GLM = "38e38edf-8417-5370-95a0-9cbb8c7f171a"
+DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
diff --git a/docs/make.jl b/docs/make.jl
@@ -2,7 +2,7 @@ push!(LOAD_PATH,"../src/")
 using Documenter, ARCHModels
 makedocs(modules=[ARCHModels],
         sitename="ARCHModels.jl",
-        assets=["assets/invenia.css"],
+        format = Documenter.HTML(assets=["assets/invenia.css"]),
         doctest=true,
         strict=true,
         pages = ["Home" => "index.md",

diff --git a/docs/src/assets/LOGO.jpg → docs/src/assets/EULOGO.jpg b/docs/src/assets/LOGO.jpg → docs/src/assets/EULOGO.jpg
diff --git a/docs/src/index.md b/docs/src/index.md
@@ -43,4 +43,4 @@ Depth = 2
 
 This project has received funding from the European Research Council (ERC) under the European Union's Horizon 2020 research and innovation program (grant agreement No. 750559).
 
-![EU LOGO](assets/LOGO.jpg)
+![EU LOGO](assets/EULOGO.jpg)
diff --git a/docs/src/manual.md b/docs/src/manual.md
@@ -52,6 +52,7 @@ Details:
 The null is strongly rejected, again providing evidence for the presence of volatility clustering.
 
 ## Estimation
+### Standalone Models
 Having established the presence of volatility clustering, we can begin by fitting the workhorse model of volatility modeling, a GARCH(1, 1) with standard normal errors;  for other model classes such as [`EGARCH`](@ref), see the [section on volatility specifications](@ref volaspec).
 
 ```
@@ -179,7 +180,35 @@ false
 julia> am2.spec.coefs == am.spec.coefs
 true
 ```
+### Integration with GLM.jl
+Assuming the [GLM](https://github.com/JuliaStats/GLM.jl) (and possibly [DataFrames](https://github.com/JuliaData/DataFrames.jl)) packages are installed, it is also possible to pass a `LinearModel` (or `DataFrameRegressionModel`) to [`fit`](@ref) instead of a data vector. This is equivalent to using a [`Regression`](@ref) as a mean specification. In the following example, we fit a linear model with [`GARCH{1, 1}`](@ref) errors, where the design matrix consists of a breaking intercept and time trend:
+```jldoctest MANUAL
+julia> using GLM, DataFrames
+
+julia> data = DataFrame(B=[ones(1000); zeros(974)], T=1:1974, Y=BG96);
+
+julia> model = lm(@formula(Y ~ B*T), data);
+
+julia> fit(GARCH{1, 1}, model)
+
+TGARCH{0,1,1} model with Gaussian errors, T=1974.
+
 
+Mean equation parameters:
+
+                Estimate  Std.Error  z value Pr(>|z|)
+(Intercept)     0.061008  0.0598973  1.01854   0.3084
+B              -0.104142  0.0660947 -1.57565   0.1151
+T            -3.79532e-5 3.61469e-5 -1.04997   0.2937
+B & T         8.11722e-5 4.95122e-5  1.63944   0.1011
+
+Volatility parameters:
+
+      Estimate  Std.Error z value Pr(>|z|)
+ω    0.0103294 0.00591883 1.74518   0.0810
+β₁    0.808781   0.066084 12.2387   <1e-33
+α₁    0.152648  0.0499813  3.0541   0.0023
+```
 ## Model selection
 The function [`selectmodel`](@ref) can be used for automatic model selection, based on an information crititerion. Given
 a class of model (i.e., a subtype of [`VolatilitySpec`](@ref)), it will return a fitted [`UnivariateARCHModel`](@ref), with the lag length
@@ -370,8 +399,7 @@ julia> am4 = simulate(am3, 1000); # passing the number of observations is option
 ```
 Care must be taken if the mean specification has a notion of sample size, as in the case of [`Regression`](@ref): because the sample size must match that of the data to be simulated, one must pass `warmup=0`, or an error will be thrown. For example, `am3` above could also have been simulated from as follows:
 ```jldoctest MANUAL
-julia> reg = Regression([5], ones(1000, 1))
-Regression{1,Float64}([5.0], [1.0; 1.0; … ; 1.0; 1.0])
+julia> reg = Regression([5], ones(1000, 1));
 
 julia> am3 = simulate(GARCH{1, 1}([1., .9, .05]), 1000; warmup=0, meanspec=reg, dist=StdT(3.))
 

diff --git a/docs/src/types.md b/docs/src/types.md
@@ -96,6 +96,8 @@ julia> reg = Regression(X);
 ```
 In this example, we created a regression model containing one regressor, given by a column of ones; this is equivalent to including an intercept in the model (see [`Intercept`](@ref) above). In general, the constructor should be passed a design matrix ``\mathbf{X}`` containing ``\{\mathbf{x}_t^{\mathrm{\scriptscriptstyle T}}\}_{t=1\ldots T}`` as its rows; that is, for a model with ``T`` observations and ``k`` regressors, ``X`` would have dimensions ``T\times k``.
 
+Another way to create a linear regression with ARCH errors is to pass a `LinearModel` or `DataFrameRegressionModel` from [GLM.jl](https://github.com/JuliaStats/GLM.jl) to [`fit`](@ref), as described under [Integration with GLM.jl](@ref).
+
 * An ARMA(p, q) model: ``\mu_t=c+\sum_{i=1}^p \varphi_i r_{t-i}+\sum_{i=1}^q \theta_i a_{t-i}``. Available as [`ARMA{p, q}`](@ref):
 ```jldoctest TYPES
 julia> ARMA{1, 1}([1., .9, -.1])
@@ -109,28 +111,6 @@ julia> MA{1}([1., -.1])
 ARMA{0,1,Float64}([1.0, -0.1])
 ```
 
-As an example, an ARMA(1, 1)-EGARCH(1, 1, 1) model is fitted as follows:
-```jldoctest TYPES
-julia> fit(EGARCH{1, 1, 1}, BG96; meanspec=ARMA{1, 1})
-
-EGARCH{1,1,1} model with Gaussian errors, T=1974.
-
-
-Mean equation parameters:
-
-       Estimate Std.Error  z value Pr(>|z|)
-c    -0.0215594 0.0136142 -1.58359   0.1133
-φ₁    -0.525702   0.20819  -2.5251   0.0116
-θ₁     0.574669  0.198072  2.90131   0.0037
-
-Volatility parameters:
-
-       Estimate Std.Error  z value Pr(>|z|)
-ω     -0.132921 0.0496466 -2.67735   0.0074
-γ₁   -0.0399304 0.0258478 -1.54483   0.1224
-β₁     0.908516 0.0319702  28.4175   <1e-99
-α₁     0.343967 0.0680369  5.05559    <1e-6
-```
 ## Distributions
 ### Built-in distributions
 Different standardized (mean zero, variance one) distributions for ``z_t`` are available as subtypes of [`StandardizedDistribution`](@ref). `StandardizedDistribution` in turn subtypes `Distribution{Univariate, Continuous}` from [Distributions.jl](https://github.com/JuliaStats/Distributions.jl), though not the entire interface must necessarily be implemented. `StandardizedDistribution`s again hold their parameters as vectors, but convenience constructors are provided. The following are currently available:

diff --git a/src/ARCHModels.jl b/src/ARCHModels.jl
@@ -17,6 +17,7 @@ The ARCHModels package for Julia. For documentation, see https://s-broda.github.
 """
 module ARCHModels
 using Reexport
+using Requires
 @reexport using StatsBase
 using StatsFuns: normcdf, normccdf, normlogpdf, norminvcdf, log2π, logtwo, RFunctions.tdistrand, RFunctions.tdistinvcdf, RFunctions.gammarand, RFunctions.gammainvcdf
 using SpecialFunctions: beta, lgamma, gamma
@@ -28,6 +29,7 @@ using Roots
 using LinearAlgebra
 using DataStructures: CircularBuffer
 using DelimitedFiles
+
 import Distributions: quantile
 import Base: show, showerror, eltype
 import Statistics: mean
@@ -36,7 +38,6 @@ import HypothesisTests: HypothesisTest, testname, population_param_of_interest,
 import StatsBase: StatisticalModel, stderror, loglikelihood, nobs, fit, fit!, confint, aic,
                   bic, aicc, dof, coef, coefnames, coeftable, CoefTable,
 				  informationmatrix, islinear, score, vcov, residuals, predict
-
 export ARCHModel, UnivariateARCHModel, VolatilitySpec, StandardizedDistribution, Standardized, MeanSpec,
        simulate, simulate!, selectmodel, StdNormal, StdT, StdGED, Intercept, Regression,
        NoIntercept, ARMA, AR, MA, BG96, volatilities, mean, quantile, VaRs, pvalue, means
@@ -50,5 +51,16 @@ include("univariatestandardizeddistributions.jl")
 include("EGARCH.jl")
 include("TGARCH.jl")
 include("tests.jl")
-
+function __init__()
+	@require GLM = "38e38edf-8417-5370-95a0-9cbb8c7f171a" begin
+		using .GLM
+		import .StatsModels: DataFrameRegressionModel
+		function fit(vs::Type{VS}, lm::DataFrameRegressionModel{<:LinearModel}; kwargs...) where VS<:VolatilitySpec
+			fit(vs, response(lm.model); meanspec=Regression(modelmatrix(lm.model); coefnames=coefnames(lm)), kwargs...)
+		end
+		function fit(vs::Type{VS}, lm::LinearModel; kwargs...) where VS<:VolatilitySpec
+			fit(vs, response(lm); meanspec=Regression(modelmatrix(lm)), kwargs...)
+		end
+	end
+end
 end#module
diff --git a/src/meanspecs.jl b/src/meanspecs.jl
@@ -1,4 +1,3 @@
-#TODO: maybe change mean(ARMA, Regression) so that it operates on at-1 etc? what to do about predict? allow X to be longer than data? check if inlining push! gives extra speedup
 ################################################################################
 #NoIntercept
 """
@@ -177,35 +176,35 @@ A linear regression as mean specification.
 struct Regression{k, T} <: MeanSpec{T}
     coefs::Vector{T}
     X::Matrix{T}
-    function Regression{k, T}(coefs, X) where {k, T}
+    coefnames::Vector{String}
+    function Regression{k, T}(coefs, X; coefnames=(i -> "β"*subscript(i)).([0:(k-1)...])) where {k, T}
         X = X[:, :]
-        nparams(Regression{k, T}) == size(X, 2) == k || throw(NumParamError(size(X, 2), length(coefs)))
-        return new{k, T}(coefs, X)
+        nparams(Regression{k, T}) == size(X, 2) == length(coefnames) == k || throw(NumParamError(size(X, 2), length(coefs)))
+        return new{k, T}(coefs, X, coefnames)
     end
 end
 """
-    Regression(coefs::Vector, X::Matrix)
-    Regression(X::Matrix)
-    Regression{T}(X::Matrix)
+    Regression(coefs::Vector, X::Matrix; coefnames=[β₀, β₁, …])
+    Regression(X::Matrix; coefnames=[β₀, β₁, …])
+    Regression{T}(X::Matrix; coefnames=[β₀, β₁, …])
 Create a regression model.
 """
-Regression(coefs::Vector{T}, X::MatOrVec{T}) where {T} = Regression{length(coefs), T}(coefs, X)
-Regression(coefs::Vector, X::MatOrVec) = (T = float(promote_type(eltype(coefs), eltype(X))); Regression{length(coefs), T}(convert.(T, coefs), convert.(T, X)))
-Regression{T}(X::MatOrVec) where T = Regression(Vector{T}(undef, size(X, 2)), convert.(T, X))
-Regression(X::MatOrVec{T}) where T<:AbstractFloat = Regression{T}(X)
-Regression(X::MatOrVec) = Regression(float.(X))
+Regression(coefs::Vector{T}, X::MatOrVec{T}; kwargs...) where {T} = Regression{length(coefs), T}(coefs, X; kwargs...)
+Regression(coefs::Vector, X::MatOrVec; kwargs...) = (T = float(promote_type(eltype(coefs), eltype(X))); Regression{length(coefs), T}(convert.(T, coefs), convert.(T, X); kwargs...))
+Regression{T}(X::MatOrVec; kwargs...) where T = Regression(Vector{T}(undef, size(X, 2)), convert.(T, X); kwargs...)
+Regression(X::MatOrVec{T}; kwargs...) where T<:AbstractFloat = Regression{T}(X; kwargs...)
+Regression(X::MatOrVec; kwargs...) = Regression(float.(X); kwargs...)
 nparams(::Type{Regression{k, T}}) where {k, T} = k
-function coefnames(::Regression{k, T}) where {k, T}
-    names= (i -> "β"*subscript(i)).([0:(k-1)...])
-    return names
+function coefnames(R::Regression{k, T}) where {k, T}
+    return R.coefnames
 end
 
 @inline presample(::Regression) = 0
 
 Base.@propagate_inbounds @inline function mean(
     at, ht, lht, data, meanspec::Regression{k}, meancoefs::Vector{T}, t
     ) where {k, T}
-    size(meanspec.X, 1) == length(data) || error("number of observations in X (N=$(size(meanspec.X, 1))) does not match the data (N=$(length(data))). If you are simulating, consider passing `warmup=0`.")
+    t > size(meanspec.X, 1) && error("insufficient number of observations in X (T=$(size(meanspec.X, 1))) to evaluate conditional mean at $t. Consider padding the design matrix. If you are simulating, consider passing `warmup=0`.")
     mean = T(0)
     for i = 1:k
         mean += meancoefs[i] * meanspec.X[t, i]
@@ -223,7 +222,8 @@ function constraints(::Type{<:Regression{k}}, ::Type{T})  where {k, T}
 end
 
 function startingvals(reg::Regression{k, T}, data::Vector{T})  where {k, T<:AbstractFloat}
-    beta = reg.X \ data
+    N = length(data)
+    beta = reg.X[1:N, :] \ data # allow extra entries in X for prediction
 end
 
 

diff --git a/src/univariatearchmodel.jl b/src/univariatearchmodel.jl
@@ -196,8 +196,11 @@ function predict(am::UnivariateARCHModel{T, VS, SD}, what=:volatility; level=0.0
 	lht = log.(ht)
 	zt = residuals(am)
 	at = residuals(am, standardized=false)
-	t = length(am.data)
-	themean = mean(at, ht, lht, am.data, am.meanspec, am.meanspec.coefs, t)
+	t = length(am.data) + 1
+
+	if what == :return || what == :VaR
+		themean = mean(at, ht, lht, am.data, am.meanspec, am.meanspec.coefs, t)
+	end
 	update!(ht, lht, zt, at, VS, am.meanspec, am.data, am.spec.coefs, am.meanspec.coefs)
 	if what == :return
 		return themean
@@ -362,7 +365,8 @@ end
     fit(VS::Type{<:VolatilitySpec}, data; dist=StdNormal, meanspec=Intercept,
         algorithm=BFGS(), autodiff=:forward, kwargs...)
 
-Fit the ARCH model specified by `VS` to data.
+Fit the ARCH model specified by `VS` to `data`. `data` can be a vector or a
+GLM.LinearModel (or GLM.DataFrameRegressionModel).
 
 # Keyword arguments:
 - `dist=StdNormal`: the error distribution.
@@ -390,6 +394,8 @@ Distribution parameters:
 ν     4.12423   0.40059 10.2954   <1e-24
 ```
 """
+function fit end
+
 function fit(::Type{VS}, data::Vector{T}; dist::Type{SD}=StdNormal{T},
              meanspec::Union{MS, Type{MS}}=Intercept{T}(T[0]), algorithm=BFGS(),
              autodiff=:forward, kwargs...

diff --git a/test/runtests.jl b/test/runtests.jl
@@ -2,6 +2,9 @@ using Test
 
 using ARCHModels
 using Random
+using GLM
+using DataFrames
+
 T = 10^4;
 @testset "TGARCH" begin
     @test ARCHModels.nparams(TGARCH{1, 2, 3}) == 7
@@ -200,7 +203,7 @@ end
                                   0.2357782619617334,
                                   -0.05909354030019596,
                                   0.2878312346045116], rtol=1e-4))
-    @test predict(am, :return) ≈ -1.6610785718124492 rtol = 1e-6
+    @test predict(am, :return) ≈ -1.4572460532296017 rtol = 1e-6
     am = selectmodel(ARCH, BG96;  meanspec=AR, maxlags=2);
     @test all(isapprox(coef(am), [0.11916340875306261,
                                   0.3156862868133263,
@@ -229,15 +232,24 @@ end
         [1.0129824114578263, 1.9885835817762578], rtol=1e-4))
     @test ARCHModels.uncond(reg) === 0.
     Random.seed!(1)
-
     am = simulate(GARCH{1, 1}([1., .9, .05]), 2000; meanspec=reg, warmup=0)
     fit!(am)
+    @test_throws Base.ErrorException predict(am, :return)
+
     @test all(isapprox(coef(am), [1.5240432453558923,
                                  0.869016093356202,
                                  0.06125683693937313,
                                  1.1773425168044198,
                                  1.7290964605805756], rtol=1e-4))
-
+    Random.seed!(1)
+    am = simulate(GARCH{1, 1}([1., .9, .05]), 1999; meanspec=reg, warmup=0)
+    @test predict(am, :return) ≈ 1.2174653422550268
+    data = DataFrame(X=ones(1974), Y=BG96)
+    model = lm(@formula(Y ~ X-1), data)
+    am = fit(GARCH{1, 1}, model)
+    @test all(isapprox(coef(am), coef(fit(GARCH{1, 1}, BG96, meanspec=Intercept)), rtol=1e-4))
+    @test coefnames(am)[end] == "X"
+    @test all(isapprox(coef(am), coef(fit(GARCH{1, 1}, model.model)), rtol=1e-4))
 end
 
 @testset "VaR" begin
@@ -271,7 +283,7 @@ end
     at = zeros(10)
     data = rand(10)
     reg = Regression(data[1:5])
-    @test_throws ErrorException mean(at, at, at, data, reg, [0.], 5)
+    @test_throws ErrorException mean(at, at, at, data, reg, [0.], 6)
 end
 
 @testset "Distributions" begin