In [1]:
using Iterators
using Pipe

macro hcat(expr)
    :(hcat($expr...))
end
macro vcat(expr)
    :(vcat($expr...))
end



macro printval(ee)
    ee_expr = @sprintf "%s" string(ee)
    esc(:(println($ee_expr," = ", $ee)))
end

macro pz(ee)
    ee_expr = @sprintf "%s" string(ee)
    esc(:(println($ee_expr,"\t\t",typeof($ee), "\t", size($ee))))
end

In [2]:
using StatsBase
using SpecialMatrices

In [3]:
import Base.push!
function push!(X::Matrix, x::Vector)
    ncols = size(X, 2)
    for col=2:ncols
        X[:, col-1] = X[:, col]
    end
    X[:,end]=x
    X
end

push! (generic function with 32 methods)

In [4]:
randn(5)

5-element Array{Float64,1}:
  0.244273
  0.563459
  0.452854
 -0.569941
  0.616974

In [5]:
function random_walk1!(xs::Matrix)
    step = rand(-1:1,size(xs,1))
    new_x = xs[:,end]+step
    push!(xs,new_x)
end

function lab2_ar_signal!(xs)
    u = randn(size(xs,1))
    new_x = 1.8xs[end]-0.81xs[end-1]+0.1u
    push!(xs,new_x)
end


lab2_ar_signal! (generic function with 1 method)

In [6]:
function get_samples!(transfer!::Function, xs::Matrix)
    for _ in 1:size(xs,2)
        transfer!(xs)
    end
    xs
end
function get_samples(transfer!::Function, n::Int, k::Int=1)
    xs = zeros(k,n)
    get_samples!(transfer!,xs)
end

get_samples (generic function with 2 methods)

In [7]:
function yule_walker_fit_AR(xs::Vector, p::Int)
    γ=autocor(xs,0:p,demean=true)
    γ_sym = [γ[end:-1:1], γ[2:end]] #Autocovarience is syemtrical about zero
    Γ = Toeplitz(γ_sym[2:end-1])  #Don't use p and -p indexes, in the gamma
    full(Γ)\γ[2:end]
end

yule_walker_fit_AR (generic function with 1 method)

In [8]:
xs = get_samples(lab2_ar_signal!,100)[:];
yule_walker_fit_AR(xs, 2)

2-element Array{Float64,1}:
  1.27253 
 -0.301632

$$y_{t}=c+A_{1}y_{{t-1}}+A_{2}y_{{t-2}}+\cdots +A_{p}y_{{t-p}}+e_{t},\,$$

Where each $y_i$ is a vector of length $k$ and each $A_i$ is a $k × k$ matrix.

$${\begin{bmatrix}y_{{1,t}}\\y_{{2,t}}\\\vdots \\y_{{k,t}}\end{bmatrix}}={\begin{bmatrix}c_{{1}}\\c_{{2}}\\\vdots \\c_{{k}}\end{bmatrix}}+{\begin{bmatrix}a_{{1,1}}^{1}&a_{{1,2}}^{1}&\cdots &a_{{1,k}}^{1}\\a_{{2,1}}^{1}&a_{{2,2}}^{1}&\cdots &a_{{2,k}}^{1}\\\vdots &\vdots &\ddots &\vdots \\a_{{k,1}}^{1}&a_{{k,2}}^{1}&\cdots &a_{{k,k}}^{1}\end{bmatrix}}{\begin{bmatrix}y_{{1,t-1}}\\y_{{2,t-1}}\\\vdots \\y_{{k,t-1}}\end{bmatrix}}+\cdots +{\begin{bmatrix}a_{{1,1}}^{p}&a_{{1,2}}^{p}&\cdots &a_{{1,k}}^{p}\\a_{{2,1}}^{p}&a_{{2,2}}^{p}&\cdots &a_{{2,k}}^{p}\\\vdots &\vdots &\ddots &\vdots \\a_{{k,1}}^{p}&a_{{k,2}}^{p}&\cdots &a_{{k,k}}^{p}\end{bmatrix}}{\begin{bmatrix}y_{{1,t-p}}\\y_{{2,t-p}}\\\vdots \\y_{{k,t-p}}\end{bmatrix}}+{\begin{bmatrix}e_{{1,t}}\\e_{{2,t}}\\\vdots \\e_{{k,t}}\end{bmatrix}}$$

In [9]:
 mean(A,2)

LoadError: A not defined
while loading In[9], in expression starting on line 1

In [10]:
A = [1 -2 3; 2 -4 6]


2x3 Array{Int64,2}:
 1  -2  3
 2  -4  6

In [11]:
function applyTransition!(ys::Matrix{Float64}, c::Vector{Float64}, As::Vector{Matrix{Float64}})
    yn = c
    for t in 1:length(As)
        yn+=As[t]*ys[:,end-t+1] 
    end
    
    push!(ys,yn)
end

function least_squares_fit(ys::Matrix, p::Int)
    #Each colunmf of ys is a differnt point in time
    #Each row of ys is a differen variable
    #ys is first observation first
    #Y=BZ + U, B= [c A_1 A_2 ...]
    nVars, nObservationsTotal = size(ys)
    nObservations = nObservationsTotal-p
    
    Y = ys[:,p+1:end] #end is the most recent observation
    Z_col(t) = [1; vec(ys[:, p+t:-1: t+1])]
    Z = hcat([Z_col(t) for t in 0:nObservations-1]...)
    B̂=Y*Z'*(pinv(Z*Z'))
    
    Ŷ=B̂*Z
    R²=cor(vec(Y),vec(Ŷ))^2
        
    c=B̂[:,1]
    function A(t)
        start = 1+(t-1)*nVars
        B̂[:,1+start:start+nVars]
    end

    (c, map(A, 1:p), R²)
end

least_squares_fit (generic function with 1 method)

In [12]:
function constant_velocity_motion!(ys::Matrix, v::Vector)
    yn = ys[:,end].+v + 0.1randn(length(v))
    push!(ys,yn)
end

function motion(ys::Matrix)
    #x = ys[1,
    #v = ys[2,
    #a = ys[3,
    A= [1 1 0; 0 1 1; 0 0 1]
    yn = A*ys[:,end] + 1*randn(3)
    push!(ys,yn)
end


function v_motion(ys::Matrix)
    v = ys[:,end]- ys[:,end-1]
    yn = ys[:,end]+v + 0.1randn(length(v))
    push!(ys,yn)
end

v_motion (generic function with 1 method)

In [13]:
ys = [zeros(3,50) [0; 0; 9.8] ]
get_samples!(motion, ys)

3x51 Array{Float64,2}:
 0.414465  10.5495   28.7024   …  12587.8     13112.2     13644.0    
 9.76862   17.4398   26.0697        524.56      533.544     543.228  
 8.49334    8.76181   8.57035         9.7813     11.0354      9.62079

In [14]:
least_squares_fit(ys, 1)

([1.45537,1.06361,3.91136],[
3x3 Array{Float64,2}:
  0.999686    1.0078      -0.202784
 -8.10083e-5  1.00008      0.911049
 -7.22222e-5  0.00136896   0.638683],0.9999999139699937)

In [15]:
using PyCall
@pyimport nltk
function tokenize(sentence::String)
    convert(Array{String,1},nltk.word_tokenize(sentence))
end



tokenize (generic function with 1 method)

In [16]:
push!(LOAD_PATH, "../word-embeddings2")

3-element Array{Union(ASCIIString,UTF8String),1}:
 "/root/buildFromSource/julia/usr/local/share/julia/site/v0.3"
 "/root/buildFromSource/julia/usr/share/julia/site/v0.3"      
 "../word-embeddings2"                                        

In [17]:
using WordEmbeddings

type WE
    L::Matrix{Float64}
    word_index::Dict{String,Int}
    indexed_words::Vector{String}
end


we = @pipe load_embeddings("../word-embeddings2/embeddings-scaled.EMBEDDING_SIZE=200.txt") |> WE(_...);


In [18]:
function get_word_index(we::WE, input::String, show_warn=true)
    if haskey(we.word_index, input)
        ii = we.word_index[input]
    elseif haskey(we.word_index, lowercase(input))
        ii = we.word_index[lowercase(input)]
    else
        ii = we.word_index["*UNKNOWN*"]
        if show_warn
            warn("$input not found. Defaulting.")
        end
    end
    ii
end

function get_word_embedding(we::WE, input::String, show_warn=true)
    k=get_word_index(we, input, show_warn)
    we.L[:,k]
end

function get_word_embeddings(we::WE, input::Vector{String}, show_warn=true)
    @pipe input|> map(x->get_word_embedding(we,x, show_warn), _) |> hcat(_...)
end



get_word_embeddings (generic function with 2 methods)

In [19]:
@pyimport nltk.corpus as nltk_corpus
n_training = 2000
training_sents = @pipe nltk_corpus.brown[:sents]() |> filter(s->20<length(s), _) |> take(_,n_training)  |> collect |> convert(Vector{Vector{String}},_);


In [20]:
xss = map(x-> get_word_embeddings(we,x, false), training_sents);

In [21]:
function multi_least_squares_fit(yss, p::Int)
    #Each colunmf of ys is a differnt point in time
    #Each row of ys is a differen variable
    #ys is first observation first
    #Y=BZ + U, B= [c A_1 A_2 ...]
    
    
    function get_model(ys::Matrix)
        nVars, nObservationsTotal = size(ys)
        nObservations = nObservationsTotal-p
        Y = ys[:,p+1:end] #end is the most recent observation
        Z_col(t) = [1; vec(ys[:, p+t:-1: t+1])]
        Z = hcat([Z_col(t) for t in 0:nObservations-1]...)
        Y, Z
    end
    YZs = map(get_model, yss)
    Y=hcat(map(x->x[1],YZs)...)
    Z=hcat(map(x->x[2],YZs)...)
    B̂=Y*Z'*(pinv(Z*Z'))

    Ŷ=B̂*Z
    R²=cor(vec(Y),vec(Ŷ))^2
        
    c=B̂[:,1]
    function A(t)
        nVars=size(Y,1)
        start = 1+(t-1)*nVars
        B̂[:,1+start:start+nVars]
    end

    (c, map(A, 1:p), R²)
end

multi_least_squares_fit (generic function with 1 method)

In [22]:
c,As, RR = multi_least_squares_fit(xss, 2);

In [23]:
RR


0.2552489695639468