In [1]:
using Iterators
using Pipe

macro hcat(expr)
    :(hcat($expr...))
end
macro vcat(expr)
    :(vcat($expr...))
end



macro printval(ee)
    ee_expr = @sprintf "%s" string(ee)
    esc(:(println($ee_expr," = ", $ee)))
end

macro pz(ee)
    ee_expr = @sprintf "%s" string(ee)
    esc(:(println($ee_expr,"\t\t",typeof($ee), "\t", size($ee))))
end

In [2]:
using StatsBase
using SpecialMatrices

In [3]:
import Base.push!
function push!(X::Matrix, x::Vector)
    ncols = size(X, 2)
    for col=2:ncols
        X[:, col-1] = X[:, col]
    end
    X[:,end]=x
    X
end

push! (generic function with 32 methods)

In [4]:
randn(5)

5-element Array{Float64,1}:
 -0.89826  
  0.0356721
  1.004    
 -0.127453 
  0.457552 

In [5]:
function random_walk1!(xs::Matrix)
    step = rand(-1:1,size(xs,1))
    new_x = xs[:,end]+step
    push!(xs,new_x)
end

function lab2_ar_signal!(xs)
    u = randn(size(xs,1))
    new_x = 1.8xs[end]-0.81xs[end-1]+0.1u
    push!(xs,new_x)
end


lab2_ar_signal! (generic function with 1 method)

In [6]:
function get_samples!(transfer!::Function, xs::Matrix)
    for _ in 1:size(xs,2)
        transfer!(xs)
    end
    xs
end
function get_samples(transfer!::Function, n::Int, k::Int=1)
    xs = zeros(k,n)
    get_samples!(transfer!,xs)
end

get_samples (generic function with 2 methods)

In [7]:
function yule_walker_fit_AR(xs::Vector, p::Int)
    γ=autocor(xs,0:p,demean=true)
    γ_sym = [γ[end:-1:1], γ[2:end]] #Autocovarience is syemtrical about zero
    Γ = Toeplitz(γ_sym[2:end-1])  #Don't use p and -p indexes, in the gamma
    full(Γ)\γ[2:end]
end

yule_walker_fit_AR (generic function with 1 method)

In [8]:
xs = get_samples(lab2_ar_signal!,100)[:];
yule_walker_fit_AR(xs, 2)

2-element Array{Float64,1}:
  1.43565 
 -0.465236

$$y_{t}=c+A_{1}y_{{t-1}}+A_{2}y_{{t-2}}+\cdots +A_{p}y_{{t-p}}+e_{t},\,$$

Where each $y_i$ is a vector of length $k$ and each $A_i$ is a $k × k$ matrix.

$${\begin{bmatrix}y_{{1,t}}\\y_{{2,t}}\\\vdots \\y_{{k,t}}\end{bmatrix}}={\begin{bmatrix}c_{{1}}\\c_{{2}}\\\vdots \\c_{{k}}\end{bmatrix}}+{\begin{bmatrix}a_{{1,1}}^{1}&a_{{1,2}}^{1}&\cdots &a_{{1,k}}^{1}\\a_{{2,1}}^{1}&a_{{2,2}}^{1}&\cdots &a_{{2,k}}^{1}\\\vdots &\vdots &\ddots &\vdots \\a_{{k,1}}^{1}&a_{{k,2}}^{1}&\cdots &a_{{k,k}}^{1}\end{bmatrix}}{\begin{bmatrix}y_{{1,t-1}}\\y_{{2,t-1}}\\\vdots \\y_{{k,t-1}}\end{bmatrix}}+\cdots +{\begin{bmatrix}a_{{1,1}}^{p}&a_{{1,2}}^{p}&\cdots &a_{{1,k}}^{p}\\a_{{2,1}}^{p}&a_{{2,2}}^{p}&\cdots &a_{{2,k}}^{p}\\\vdots &\vdots &\ddots &\vdots \\a_{{k,1}}^{p}&a_{{k,2}}^{p}&\cdots &a_{{k,k}}^{p}\end{bmatrix}}{\begin{bmatrix}y_{{1,t-p}}\\y_{{2,t-p}}\\\vdots \\y_{{k,t-p}}\end{bmatrix}}+{\begin{bmatrix}e_{{1,t}}\\e_{{2,t}}\\\vdots \\e_{{k,t}}\end{bmatrix}}$$

In [81]:
 mean(A,2)

2x1 Array{Float64,2}:
 0.666667
 1.33333 

In [86]:
A = [1 -2 3; 2 -4 6]


2x1 Array{Float64,2}:
 12.6667
 50.6667

In [107]:
function applyTransition!(ys::Matrix{Float64}, c::Vector{Float64}, As::Vector{Matrix{Float64}})
    yn = c
    for t in 1:length(As)
        yn+=As[t]*ys[:,end-t+1] 
    end
    
    push!(ys,yn)
end

function least_squares_fit(ys::Matrix, p::Int)
    #Each colunmf of ys is a differnt point in time
    #Each row of ys is a differen variable
    #ys is first observation first
    #Y=BZ + U, B= [c A_1 A_2 ...]
    nVars, nObservationsTotal = size(ys)
    nObservations = nObservationsTotal-p
    
    Y = ys[:,p+1:end] #end is the most recent observation
    Z_col(t) = [1; vec(ys[:, p+t:-1: t+1])]
    Z = hcat([Z_col(t) for t in 0:nObservations-1]...)
    B̂=Y*Z'*(pinv(Z*Z'))
    
    Ŷ=B̂*Z
    
    R²=cor(vec(Y),vec(Ŷ))
    
    
    
    c=B̂[:,1]
    function A(t)
        start = 1+(t-1)*nVars
        B̂[:,1+start:start+nVars]
    end

    (c, map(A, 1:p), R²)
end

least_squares_fit (generic function with 1 method)

In [108]:
function constant_velocity_motion!(ys::Matrix, v::Vector)
    yn = ys[:,end].+v + 0.1randn(length(v))
    push!(ys,yn)
end

function motion(ys::Matrix)
    #x = ys[1,
    #v = ys[2,
    #a = ys[3,
    A= [1 1 0; 0 1 1; 0 0 1]
    yn = A*ys[:,end] + 1*randn(3)
    push!(ys,yn)
end


function v_motion(ys::Matrix)
    v = ys[:,end]- ys[:,end-1]
    yn = ys[:,end]+v + 0.1randn(length(v))
    push!(ys,yn)
end

v_motion (generic function with 1 method)

In [109]:
ys = [zeros(3,50) [0; 0; 9.8] ]
get_samples!(motion, ys)

3x51 Array{Float64,2}:
 0.228328   7.73323  28.1418   55.5101  …  11019.8     11507.3    12005.8   
 9.0077    20.553    27.6735   37.9661       486.958     498.456    511.484 
 9.60983    9.29177   8.73491  10.0202        12.4793     12.02      11.6094

In [110]:
least_squares_fit(ys, 1)

([0.627606,2.78209,4.32971],[
3x3 Array{Float64,2}:
 1.00018       0.996915     -0.0571158
 0.00042191    0.991183      0.773021 
 0.000195762  -0.000524146   0.509764 ],0.9999999450115649)

In [111]:
using PyCall
@pyimport nltk
function tokenize(sentence::String)
    convert(Array{String,1},nltk.word_tokenize(sentence))
end

tokenize (generic function with 1 method)

In [16]:
push!(LOAD_PATH, "../word-embeddings2")

3-element Array{Union(ASCIIString,UTF8String),1}:
 "/root/buildFromSource/julia/usr/local/share/julia/site/v0.3"
 "/root/buildFromSource/julia/usr/share/julia/site/v0.3"      
 "../word-embeddings2"                                        

In [33]:
using WordEmbeddings

type WE
    L::Matrix{Float64}
    word_index::Dict{String,Int}
    indexed_words::Vector{String}
end


we = @pipe load_embeddings("../word-embeddings2/embeddings-scaled.EMBEDDING_SIZE=50.txt") |> WE(_...);


In [38]:
function get_word_index(we::WE, input::String, show_warn=true)
    if haskey(we.word_index, input)
        ii = we.word_index[input]
    elseif haskey(we.word_index, lowercase(input))
        ii = we.word_index[lowercase(input)]
    else
        ii = we.word_index["*UNKNOWN*"]
        if show_warn
            println("$input not found. Defaulting.")
        end
    end
    ii
end

function get_word_embedding(we::WE, input::String, show_warn=true)
    k=get_word_index(we, input, show_warn)
    we.L[:,k]
end

get_word_embedding (generic function with 2 methods)

In [41]:
get_word_embedding(we,"Mike")

50-element Array{Float64,1}:
 -0.213371 
  0.217116 
  0.0214932
  0.299436 
 -0.0970451
 -0.693677 
 -0.045795 
 -0.124475 
  0.146582 
  0.0791667
  0.452753 
 -0.11888  
 -0.400949 
  ⋮        
 -0.347061 
  0.261798 
  0.41213  
 -0.157424 
  0.0807169
  0.151612 
  0.0924173
  0.462194 
  0.143379 
 -0.131027 
 -0.621829 
 -0.251187 

In [62]:
@pyimport nltk.corpus as nltk_corpus
n_training = 10
training_sents = @pipe nltk_corpus.brown[:sents]() |> filter(s->20<length(s), _) |> take(_,n_training)  |> collect |> convert(Vector{Vector{String}},_);


In [66]:
sent =  training_sents[3] 
@show sent
xs = @pipe sent|> map(x->get_word_embedding(we,x), _) |> hcat(_...)

sent => String["The","September-October","term","jury","had","been","charged","by","Fulton","Superior","Court","Judge","Durwood","Pye","to","investigate","reports","of","possible","``","irregularities","''","in","the","hard-fought","primary","which","was","won","by","Mayor-nominate","Ivan","Allen","Jr.","."]
Durwood not found. Defaulting.
Mayor-nominate not found. Defaulting.


50x35 Array{Float64,2}:
 -0.38772    -0.0110013   …  -0.126184    0.126776    -1.29578  
 -0.107298    0.0320611       0.181705    0.165743     0.104425 
 -0.672488    0.00225346      0.33285     0.222661     0.0333433
 -0.356949    0.265144        0.0761791  -0.360137    -0.123819 
  0.253885    0.0381752       0.115926    0.00942884  -0.205738 
  0.181825   -0.0312634   …  -0.0384258   0.148997    -0.91893  
 -0.0745201   0.089455       -0.0767285  -0.00293289   0.33767  
 -0.36416    -0.0121613      -0.245524   -0.0592487   -1.35253  
 -0.61281    -0.0939935      -0.14852     0.0500138    0.0627508
 -0.328352   -0.0362206      -0.334763   -0.152738    -0.706524 
  0.107519   -0.133737    …   0.327445    0.0368522    0.313866 
  0.221072    0.2051         -0.0653426  -0.147301    -0.206052 
  0.0382774   0.0934149      -0.284551   -0.438393    -0.414846 
  ⋮                       ⋱                                     
  0.117019   -0.0661868      -0.282627    0.0509204    0.511014 
 

In [113]:
least_squares_fit(xs, 1)

([0.236132,0.198331,-0.283258,0.0984244,0.684315,0.232979,-0.204118,-0.312682,0.0548677,-0.051552  …  -0.205972,0.176683,0.334591,0.401038,0.118087,0.139996,-0.0719572,-0.215843,-0.188824,0.262566],[
50x50 Array{Float64,2}:
 -0.0222117   -0.269948    -0.178287     …   0.107925   -0.0108497 
 -0.0608238    0.046578    -0.0207793        0.0518733   0.00111753
  0.0656127    0.199868    -0.034913        -0.0711759   4.95184e-5
 -0.0131257    0.0263597    0.0316139        0.0379679  -0.0841863 
 -0.108119    -0.161893     0.000763189      0.0608917   0.0410824 
 -0.101278    -0.11377     -0.114181     …   0.129167   -0.0666931 
  0.163395     0.085619     0.0220293        0.0515453   0.0344129 
 -0.0326935   -0.0227812   -0.202176        -0.0906343  -0.0686455 
  0.0951928   -0.0902885    0.094196         0.152892    0.0796045 
 -0.0686583   -0.119914    -0.0135478       -0.0659156   0.0189067 
  0.190976     0.152783    -0.00633757   …  -0.0622457   0.0331395 
 -0.146881    -0.14198     -

In [None]:
function multi_least_squares_fit(ys::Matrix, p::Int)
    #Each colunmf of ys is a differnt point in time
    #Each row of ys is a differen variable
    #ys is first observation first
    #Y=BZ + U, B= [c A_1 A_2 ...]
   
end