In [1]:
using Iterators
using Pipe

macro hcat(expr)
    :(hcat($expr...))
end
macro vcat(expr)
    :(vcat($expr...))
end



macro printval(ee)
    ee_expr = @sprintf "%s" string(ee)
    esc(:(println($ee_expr," = ", $ee)))
end

macro pz(ee)
    ee_expr = @sprintf "%s" string(ee)
    esc(:(println($ee_expr,"\t\t",typeof($ee), "\t", size($ee))))
end

In [2]:
using StatsBase
using SpecialMatrices

In [3]:
import Base.push!
function push!(X::Matrix, x::Vector)
    ncols = size(X, 2)
    for col=2:ncols
        X[:, col-1] = X[:, col]
    end
    X[:,end]=x
    X
end

push! (generic function with 32 methods)

In [4]:
function random_walk1!(xs::Matrix)
    step = rand(-1:1,size(xs,1))
    new_x = xs[:,end]+step
    push!(xs,new_x)
end

function lab2_ar_signal!(xs)
    u = randn(size(xs,1))
    new_x = 1.8xs[end]-0.81xs[end-1]+0.1u
    push!(xs,new_x)
end


lab2_ar_signal! (generic function with 1 method)

In [5]:
function get_samples!(transfer!::Function, xs::Matrix)
    for _ in 1:size(xs,2)
        transfer!(xs)
    end
    xs
end
function get_samples(transfer!::Function, n::Int, k::Int=1)
    xs = zeros(k,n)
    get_samples!(transfer!,xs)
end

get_samples (generic function with 2 methods)

In [6]:
function yule_walker_fit_AR(xs::Vector, p::Int)
    γ=autocor(xs,0:p,demean=true)
    γ_sym = [γ[end:-1:1], γ[2:end]] #Autocovarience is syemtrical about zero
    Γ = Toeplitz(γ_sym[2:end-1])  #Don't use p and -p indexes, in the gamma
    full(Γ)\γ[2:end]
end

yule_walker_fit_AR (generic function with 1 method)

In [7]:
xs = get_samples(lab2_ar_signal!,100)[:];
yule_walker_fit_AR(xs, 2)

2-element Array{Number,1}:
  1.4445  
 -0.454494

$$y_{t}=c+A_{1}y_{{t-1}}+A_{2}y_{{t-2}}+\cdots +A_{p}y_{{t-p}}+e_{t},\,$$

Where each $y_i$ is a vector of length $k$ and each $A_i$ is a $k × k$ matrix.

$${\begin{bmatrix}y_{{1,t}}\\y_{{2,t}}\\\vdots \\y_{{k,t}}\end{bmatrix}}={\begin{bmatrix}c_{{1}}\\c_{{2}}\\\vdots \\c_{{k}}\end{bmatrix}}+{\begin{bmatrix}a_{{1,1}}^{1}&a_{{1,2}}^{1}&\cdots &a_{{1,k}}^{1}\\a_{{2,1}}^{1}&a_{{2,2}}^{1}&\cdots &a_{{2,k}}^{1}\\\vdots &\vdots &\ddots &\vdots \\a_{{k,1}}^{1}&a_{{k,2}}^{1}&\cdots &a_{{k,k}}^{1}\end{bmatrix}}{\begin{bmatrix}y_{{1,t-1}}\\y_{{2,t-1}}\\\vdots \\y_{{k,t-1}}\end{bmatrix}}+\cdots +{\begin{bmatrix}a_{{1,1}}^{p}&a_{{1,2}}^{p}&\cdots &a_{{1,k}}^{p}\\a_{{2,1}}^{p}&a_{{2,2}}^{p}&\cdots &a_{{2,k}}^{p}\\\vdots &\vdots &\ddots &\vdots \\a_{{k,1}}^{p}&a_{{k,2}}^{p}&\cdots &a_{{k,k}}^{p}\end{bmatrix}}{\begin{bmatrix}y_{{1,t-p}}\\y_{{2,t-p}}\\\vdots \\y_{{k,t-p}}\end{bmatrix}}+{\begin{bmatrix}e_{{1,t}}\\e_{{2,t}}\\\vdots \\e_{{k,t}}\end{bmatrix}}$$

In [8]:
function get_next(ys::NumericMatrix, c::NumericVector, As::Vector{NumericMatrix})
    yn = c
    for t in 1:length(As)
        yn+=As[t]*ys[:,end-t+1] 
    end
    yn
end
function applyTransition!(ys::NumericMatrix, c::NumericVector, As::Vector{NumericMatrix})
    yn = get_next(ys,c,As)
    push!(ys,yn)
end

function least_squares_fit(ys::Matrix, p::Int)
    #Each colunmf of ys is a differnt point in time
    #Each row of ys is a differen variable
    #ys is first observation first
    #Y=BZ + U, B= [c A_1 A_2 ...]
    nVars, nObservationsTotal = size(ys)
    nObservations = nObservationsTotal-p
    
    Y = ys[:,p+1:end] #end is the most recent observation
    Z_col(t) = [1; vec(ys[:, p+t:-1: t+1])]
    Z = hcat([Z_col(t) for t in 0:nObservations-1]...)
    B̂=Y*Z'*(pinv(Z*Z'))
    
    Ŷ=B̂*Z
    R²=cor(vec(Y),vec(Ŷ))^2
        
    c=B̂[:,1]
    function A(t)
        start = 1+(t-1)*nVars
        B̂[:,1+start:start+nVars]
    end

    (c, map(A, 1:p), R²)
end

least_squares_fit (generic function with 1 method)

In [9]:
function constant_velocity_motion!(ys::Matrix, v::Vector)
    yn = ys[:,end].+v + 0.1randn(length(v))
    push!(ys,yn)
end

function motion(ys::Matrix)
    #x = ys[1,
    #v = ys[2,
    #a = ys[3,
    A= [1 1 0; 0 1 1; 0 0 1]
    yn = A*ys[:,end] + 1*randn(3)
    push!(ys,yn)
end


function v_motion(ys::Matrix)
    v = ys[:,end]- ys[:,end-1]
    yn = ys[:,end]+v + 0.1randn(length(v))
    push!(ys,yn)
end

v_motion (generic function with 1 method)

In [10]:
ys = [zeros(3,50) [0; 0; 9.8] ]
get_samples!(motion, ys)
least_squares_fit(ys, 1)

([-0.213747,-0.0822762,2.99581],[
3x3 Array{Number,2}:
  1.0002        0.995903    0.056392
 -1.0723e-5     0.999717    1.01518 
 -0.000141832  -0.00257041  0.74435 ],0.9999998755369578)

In [11]:
using PyCall
@pyimport nltk
function tokenize(sentence::String)
    convert(Array{String,1},nltk.word_tokenize(sentence))
end



tokenize (generic function with 1 method)

In [12]:
push!(LOAD_PATH, "../word-embeddings2")
using WordEmbeddings

3-element Array{Union(ASCIIString,UTF8String),1}:
 "/root/buildFromSource/julia/usr/local/share/julia/site/v0.3"
 "/root/buildFromSource/julia/usr/share/julia/site/v0.3"      
 "../word-embeddings2"                                        

In [13]:

we = @pipe load_embeddings("../word-embeddings2/embeddings-scaled.EMBEDDING_SIZE=50.txt") |> WE(_...);


In [14]:
function multi_least_squares_fit(yss, p::Int)
    #Each colunmf of ys is a differnt point in time
    #Each row of ys is a differen variable
    #ys is first observation first
    #Y=BZ + U, B= [c A_1 A_2 ...]
    
    function get_model(ys::Matrix)
        nVars, nObservationsTotal = size(ys)
        nObservations = nObservationsTotal-p
        Y = ys[:,p+1:end] #end is the most recent observation
        Z_col(t) = [1; vec(ys[:, p+t:-1: t+1])]
        Z = hcat([Z_col(t) for t in 0:nObservations-1]...)
        Y, Z
    end
    YZs = map(get_model, yss)
    Y=hcat(map(x->x[1],YZs)...)
    Z=hcat(map(x->x[2],YZs)...)
    B̂=Y*Z'*(pinv(Z*Z'))

    Ŷ=B̂*Z
    R²=cor(vec(Y),vec(Ŷ))^2
        
    c=B̂[:,1]
    function A(t)
        nVars=size(Y,1)
        start = 1+(t-1)*nVars
        B̂[:,1+start:start+nVars]
    end

    (c, map(A, 1:p), R²)
end

multi_least_squares_fit (generic function with 1 method)

In [153]:
@pyimport nltk.corpus as nltk_corpus
n_training = 20000
training_sents = @pipe (nltk_corpus.brown[:sents]() 
                            |> filter(s->3<length(s)<=6, _) 
                            #|> filter(s->s[2]=="was"||s[2]=="is"||s[2]=="are",_)
                            |> take(_,n_training)  
                            |> collect |> convert(Vector{Vector{String}},_));



In [154]:
xss = map(x-> eval_word_embeddings(we,x, false), training_sents);
@pz xss

xss		Array{Array{Number,2},1}	(4090,)


In [155]:
c,As, RR = multi_least_squares_fit(xss, 3);
RR

0.5609647936550113

In [156]:
xs = eval_word_embeddings(we, [["blue", ",", "green",]]);

In [157]:
xn=get_next(xs, c,As)
WordEmbeddings.show_best(we, xn)

20x2 Array{Any,2}:
 "."           0.95
 ":"           0.8 
 "?"           0.78
 "..."         0.77
 ";"           0.76
 "here"        0.72
 "yesterday."  0.71
 "away"        0.71
 "today."      0.7 
 "down."       0.7 
 "today"       0.68
 "there."      0.68
 "abroad"      0.67
 "alone"       0.67
 "news."       0.66
 "-"           0.66
 "said."       0.66
 "--"          0.66
 "policy."     0.65
 "later."      0.65

In [158]:
push!(xs,xn)
xn=get_next(xs, c,As)
WordEmbeddings.show_best(we, xn)

20x2 Array{Any,2}:
 "."           0.96
 ";"           0.83
 ":"           0.81
 "?"           0.78
 "..."         0.75
 "--"          0.7 
 "alone"       0.69
 "today."      0.69
 "-"           0.68
 "here"        0.68
 "down."       0.67
 "because"     0.67
 "away"        0.65
 "yesterday."  0.65
 "there."      0.65
 "!"           0.65
 "said."       0.64
 "here."       0.64
 "recently."   0.63
 "that"        0.63