### POS Tagging

In [1]:
# https://github.com/JuliaLang/julia/issues/14099
const spaces = filter(isspace, Char(0):Char(0x10FFFF));

In [19]:
file = open("data/pos/05-train-input.txt");
vocabSet = Set();
tagSet = Set();
# read line
for ln in eachline(file)
    word_tag = split(ln, spaces);
    # remove ""
    word_tag = word_tag[word_tag .!= ""]
    # separate word from tag
    for token in word_tag
        tokenSplit = split(token, "_");
        push!(vocabSet, tokenSplit[1]);
        push!(tagSet, tokenSplit[2]);
    end
end
close(file);
#println(vocabSet)
#println(tagSet)
# vocabulary dict
wordDict = Dict{AbstractString, Vector{Float64}}();
vocabSize = length(vocabSet);
for (index, value) in enumerate(vocabSet)
    val = zeros(vocabSize);
    val[index] = 1;
    wordDict[value] = val;
end
#println(wordDict);
# tag dict
tagDict = Dict{AbstractString, Vector{Float64}}();
tagSize = length(tagSet);
for (index, value) in enumerate(tagSet)
    val = zeros(tagSize);
    val[index] = 1;
    tagDict[value] = val;
end
#println(tagDict);
# prepare data array
data = Tuple{Vector{Float64}, Vector{Float64}}[];
file = open("data/pos/05-train-input.txt");
# read line
for ln in eachline(file)
    word_tag = split(ln, spaces);
    # remove ""
    word_tag = word_tag[word_tag .!= ""]
    # separate word from tag
    for token in word_tag
        tokenSplit = split(token, "_");
        push!(data, (wordDict[tokenSplit[1]], tagDict[tokenSplit[2]]));
    end
end
close(file);
#println(data)

[([0.0,0.0,1.0],[0.0,0.0,1.0]),([0.0,1.0,0.0],[1.0,0.0,0.0]),([0.0,0.0,1.0],[0.0,1.0,0.0]),([0.0,0.0,1.0],[0.0,0.0,1.0]),([1.0,0.0,0.0],[0.0,0.0,1.0]),([0.0,1.0,0.0],[1.0,0.0,0.0])]


In [None]:
function tanhGradient(x)
    return (1 - x^2)
end

In [None]:
function forwardRNN(activationFn::Function, Winx, Winh, Bin, Wouth, Bout, x)
    h = [] # hidden layers (at time t)
    p = [] # output probability distribution (at time t)
    y = [] # output values (at time t)
    cost = 0;
    # for each time t in x
    for time in 1:len(x)
        if time > 1
            h[time] = activationFn(Winx * x[time] + Winh * h[time - 1]+ Bin);
        else
            h[time] = activationFn(Winx * x[time] + Bin);
        end
        # output layer
        score = Wouth * h[time] +Bout;
        p[time] = exp() ./ sum(exp(score), 2);
        #cost = cost + (-log())
        y[time] = find_max(p[time]);
    end
    return h, p, y
end

In [None]:
function backwardRNN(activationFnGrad::Function, Winx, Winh, Bin, Wouth, Bout, x, h, p, yCap)
    gradWinx = []
    gradWinh = []
    gradBin = []
    gradWouth = []
    gradBout = []
    
    deltaIn = zeros(len(Bin)); # error from the following time step
    for time in 1:-1:len(x)
        pCap = createOneHot(yCap);
        # output layer error
        deltaOut = pCap - p[time];
        # output gradient
        gradWouth = gradWouth + (h[time] * deltaOut); 
        gradBout = gradBout + deltaOut;
        # backpropagate
        deltaInter = (deltaIn * Winh) + (deltaOut * Wouth);
        deltaIn = deltaInter * activationFnGrad(h[time]);
        # hidden layer gradient
        gradWinx = gradWinx + (x[time] * deltaIn);
        gradBin = gradBin + deltaIn;
        if time != 1
            gradWinh = gradWinh + (h[time - 1] * deltaIn);
    end
    return gradWinx, gradWinh, gradBin, gradWouth, gradBout
end

In [None]:
function updateWeights()
    Winx += lambda * gradWinx;
    Winh += lambda * gradWinh;
    Bin += lambda * gradBin;
    Wouth += lambda * gradWouth;
    Bout += lambda * gradBout;
end

- [NLP Programming Tutorial](http://www.phontron.com/teaching.php)
- [The Unreasonable Effectiveness of Recurrent Neural Networks](http://karpathy.github.io/2015/05/21/rnn-effectiveness/)