In [60]:
using JuliaDB, OnlineStats, Plots, Distributions
gr()

IndexedTables.set_show_compact!(false)

false

In [2]:
m, v = Mean(), OnlineStats.Variance()

(Mean(0.0), Variance(0.0))

In [4]:
s = Series(m, v)

▦ Series{0}  |  EqualWeight  |  nobs = 0
├── Mean(0.0)
└── Variance(0.0)

In [5]:
fit!(s, randn(100))

▦ Series{0}  |  EqualWeight  |  nobs = 100
├── Mean(-0.155518)
└── Variance(1.04689)

In [6]:
s2 = Series(randn(100), Mean(), Variance())

▦ Series{0}  |  EqualWeight  |  nobs = 100
├── Mean(-0.0136449)
└── Variance(0.925289)

In [7]:
merge!(s, s2)

▦ Series{0}  |  EqualWeight  |  nobs = 200
├── Mean(-0.0845814)
└── Variance(0.98619)

In [12]:
o = IHistogram(100)
o2 = Sum()

Sum{Float64}(0.0)

In [13]:
s = Series(o, o2)

▦ Series{0}  |  EqualWeight  |  nobs = 0
├── IHistogram([Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf])
└── Sum{Float64}(0.0)

In [14]:
y = randexp(10^5)

100000-element Array{Float64,1}:
 1.82263  
 0.186101 
 0.330578 
 2.67864  
 0.476492 
 0.289544 
 0.969828 
 0.685172 
 0.204111 
 0.0206947
 0.603228 
 1.63955  
 2.07397  
 ⋮        
 1.09137  
 0.66482  
 2.12295  
 0.299543 
 0.759672 
 0.261128 
 0.762111 
 0.689006 
 0.392323 
 0.533975 
 1.99452  
 0.99262  

In [15]:
for yi in y
    fit!(s, yi)
end

In [16]:
plot(o)

In [17]:
Series(randn(100, 2), CovMatrix(2), MV(2, Mean()))

▦ Series{1}  |  EqualWeight  |  nobs = 100
├── CovMatrix([0.884584 -0.022559; -0.022559 1.15419])
└── MV{Mean}(-0.1800578291265422, 0.1723698938664877)

In [18]:
Series((randn(100, 3), randn(100)), LinReg(3))

▦ Series{(1, 0)}  |  EqualWeight  |  nobs = 100
└── LinReg: β(0.0) = [-0.01501 -0.321151 -0.0104224]

In [19]:
s1 = Series(Mean(), Variance())
s2 = Series(Mean(), Variance())
s3 = Series(Mean(), Variance())

fit!(s1, randn(1000))
fit!(s2, randn(1000))
fit!(s3, randn(1000))

merge!(s1, s2)
merge!(s1, s3)

▦ Series{0}  |  EqualWeight  |  nobs = 3000
├── Mean(-0.0356107)
└── Variance(0.958927)

The above idiom is important for fitting data in parallel

In [22]:
value(s1)

(-0.03561065214361064, 0.9589270474042937)

## Creating and manipulating `Series`

In [24]:
# Empty Constructors
# 1. without weights or data
Series(Mean(), Variance())

▦ Series{0}  |  EqualWeight  |  nobs = 0
├── Mean(0.0)
└── Variance(0.0)

In [25]:
# 2. With weights
Series(ExponentialWeight(), Mean(), Variance())

▦ Series{0}  |  ExponentialWeight(λ = 0.1)  |  nobs = 0
├── Mean(0.0)
└── Variance(0.0)

In [26]:
# 3. With data
y = randn(1000)
Series(y, Mean(), Variance())

▦ Series{0}  |  EqualWeight  |  nobs = 1000
├── Mean(0.030042)
└── Variance(0.939223)

In [27]:
# 4. With data and weights
Series(y, ExponentialWeight(0.01), Mean(), Variance())

▦ Series{0}  |  ExponentialWeight(λ = 0.01)  |  nobs = 1000
├── Mean(0.00675263)
└── Variance(0.984135)

In [28]:
Series(ExponentialWeight(0.01), y, Mean(), Variance())

▦ Series{0}  |  ExponentialWeight(λ = 0.01)  |  nobs = 1000
├── Mean(0.00675263)
└── Variance(0.984135)

## Updating `Series`

In [29]:
# Updating wwith single observation

s = Series(Mean())
fit!(s, randn())

▦ Series{0}  |  EqualWeight  |  nobs = 1
└── Mean(0.69289)

In [32]:
s = Series(CovMatrix(4))
fit!(s, randn(100, 4)) # by default observations are assumed to be stored in rows

▦ Series{1}  |  EqualWeight  |  nobs = 100
└── CovMatrix([1.1572 0.251604 0.160193 0.223771; 0.251604 0.971596 0.0769237 0.150906; 0.160193 0.0769237 1.07164 0.0529836; 0.223771 0.150906 0.0529836 1.19701])

In [33]:
# Series tracking the same stats can be merged

y1 = randn(100)
y2 = randn(100)

s1 = Series(y1, Mean(), Variance())
s2 = Series(y2, Mean(), Variance())

▦ Series{0}  |  EqualWeight  |  nobs = 100
├── Mean(-0.12993)
└── Variance(1.09701)

In [34]:
merge!(s1, s2, :append) # treated as a new batch with equal weight

▦ Series{0}  |  EqualWeight  |  nobs = 200
├── Mean(-0.0863707)
└── Variance(1.13086)

In [39]:
# The above is the same as the sequence
s1 = Series(Mean(), Variance()) # Initialize a empty series that tracks the mean and variance of the input data
fit!(s1, y1); println(value(s1)) # fit the series to first batch of data
fit!(s1, y2); println(value(s1)) # fit the series to second batch of data

(-0.04281123900787702, 1.172300992525855)
(-0.086370692611279, 1.1308631329459182)


In [41]:
merge!(s1, s2, 0.5) # ratio of influence s2 should have?

▦ Series{0}  |  EqualWeight  |  nobs = 400
├── Mean(-0.11904)
└── Variance(1.09894)

## Weights

The weight parameter for each series controls the influence wielded by the new observations in the incoming stream.

A statistic, $\theta^{(t)}$, updated by the $t^{th}$ observation, $x_t$ is governed by the weight attached to the new observation.

$$
\theta^{(t)} = (1 - \gamma_t)\theta^{(t-1)} + \gamma_tx_t
$$

In [43]:
# Equal weight gives all observations equal weight

Series(randn(100), EqualWeight(), Variance())

▦ Series{0}  |  EqualWeight  |  nobs = 100
└── Variance(0.939794)

In [46]:
# Exponential weighting gives the first observation a weight of 1 and the following observations are dependent
# on the number of lookback observations: 2/(lookback + 1)

Series(randn(100), ExponentialWeight(), Variance())

▦ Series{0}  |  ExponentialWeight(λ = 0.1)  |  nobs = 100
└── Variance(0.767538)

In [52]:
# Learning rate weights can be used to slightly decrease the weight of every incoming observation
Series(randn(100), LearningRate(0.7), Variance())

▦ Series{0}  |  LearningRate(r = 0.7)  |  nobs = 100
└── Variance(1.16816)

For each statistic we can think of we can pack a `Series` object to cature the statistic

In [55]:
s = Series(randn(100), Mean(), Variance(), Extrema(), Moments(), Sum())

▦ Series{0}  |  EqualWeight  |  nobs = 100
├── Mean(-0.0246202)
├── Variance(0.953926)
├── Extrema((-2.28313, 2.30418))
├── Moments([-0.0246202, 0.944993, -0.0145104, 2.3138])
└── Sum{Float64}(-2.46202)

In [56]:
value(s)

(-0.02462020651311296, 0.9539259543376356, (-2.2831267616816913, 2.304183862731215), [-0.0246202, 0.944993, -0.0145104, 2.3138], -2.462020651311299)

In [57]:
y = randn(100, 5)
o = CovMatrix(5)

CovMatrix([0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0])

In [58]:
Series(y, o)

▦ Series{1}  |  EqualWeight  |  nobs = 100
└── CovMatrix([0.927403 0.0124733 0.0244481 -0.00637656 0.0495987; 0.0124733 1.12675 -0.111914 -0.0929304 0.04205; 0.0244481 -0.111914 0.755048 -0.0374955 0.043437; -0.00637656 -0.0929304 -0.0374955 0.89885 0.251202; 0.0495987 0.04205 0.043437 0.251202 1.2909])