In [49]:
using OnlineStats, Plots
gr()

Plots.GRBackend()

# OnlineStats.jl

Online algorithms for statistics.

Authored by Josh Day (@joshday) with major contributions from Tom Breloff (@tbreloff)

### Motivation 
![](http://www.ibmbigdatahub.com/sites/default/files/styles/xlarge-scaled/public/infographic_image/4-Vs-of-big-data.jpg?itok=4syrvSLX)

- Statisticians don't have tools to handle all of this
- Adapting our favorite algorithms is often nontrivial

# Introducing OnlineStats.jl

- Accepts input data piece by piece, rather than all at once
- Algorithms use O(1) memory
- Goal is to provide functionality for major areas of statistics
    - from summary statistics to penalized regression

# Each OnlineStat is a type

In [57]:
# Construct objects with data
x = randn(100, 2)
CovMatrix(x)

■ CovMatrix{EqualWeight}
  >     value: 2x2 Array{Float64,2}:
 0.91384   0.108541
 0.108541  1.34472 
  >      nobs: 100


In [59]:
# Or create "empty" object
CovMatrix(2)

■ CovMatrix{EqualWeight}
  >     value: 2x2 Array{Float64,2}:
 -0.0  -0.0
 -0.0  -0.0
  >      nobs: 0


# Types are parameterized by Weight

- EqualWeight
- ExponentialWeight
- BoundedEqualWeight
- LearningRate

In [60]:
CovMatrix(4, ExponentialWeight(.01))

■ CovMatrix{ExponentialWeight}
  >     value: 4x4 Array{Float64,2}:
 -0.0  -0.0  -0.0  -0.0
 -0.0  -0.0  -0.0  -0.0
 -0.0  -0.0  -0.0  -0.0
 -0.0  -0.0  -0.0  -0.0
  >      nobs: 0


# OnlineStat methods

- `fit!(o, data...)`
- `nobs(o)`
- `value(o)`

In [62]:
# Most types have additional methods
o = LinReg(5, intercept = false)
coef(o)

5-element Array{Float64,1}:
 0.0
 0.0
 0.0
 0.0
 0.0

# Using OnlineStats

In [51]:
x = randn(100_000, 4)
c = CovMatrix(4)

# input data can be AbstractArray
for i in 1:100_000
    fit!(c, slice(x, i, :))  
end

# or simply 
fit!(c, x)