In [1]:
import numpy as np
from collections import defaultdict

def create_one_hot(id,size):
    vec=np.zeros(size)
    vec[id]=1
    return vec

def find_best(p):
    y=0
    for element_i in range(1,len(p)):
        if p[element_i]>p[y]:
            y=element_i
    return y

def forward_rnn(w_rx,w_rh,b_r,w_oh,b_o,x,x_ids):
    h=[]#隠れ層の値
    p=[]#出力の確率分布の値
    y=[]
    for time_t in range(len(x)):
        word,y_prime=x[time_t]
        if word in x_ids:
            word_one_hot=create_one_hot(x_ids[word],len(x_ids))
        else:
            word_one_hot=np.zeros(len(x_ids))
        if time_t>0:
            h.append(np.tanh(np.dot(w_rx,word_one_hot)+np.dot(w_rh,h[time_t-1])+b_r))
        else:
            h.append(np.tanh(np.dot(w_rx,word_one_hot)+b_r))
        p.append(np.tanh(np.dot(w_oh,h[time_t])+b_o))
        y.append(find_best(p[time_t]))
    return h,p,y

def initialize_net_randomly(x_len,y_len,node):
    w_rx=(np.random.rand(node,x_len)-0.5)/5
    w_rh=(np.random.rand(node,node)-0.5)/5
    b_r=(np.random.rand(node)-0.5)/5
    w_oh=(np.random.rand(y_len,node)-0.5)/5
    b_o=(np.random.rand(y_len)-0.5)/5
    return w_rx,w_rh,b_r,w_oh,b_o

def initialize(x_len,y_len,node):
    dw_rx=np.zeros((node,x_len))
    dw_rh=np.zeros((node,node))
    db_r=np.zeros((node))
    dw_oh=np.zeros((y_len,node))
    db_o=np.zeros((y_len))
    return dw_rx,dw_rh,db_r,dw_oh,db_o

def gradient_rnn(w_rx,w_rh,b_r,w_oh,b_o,x,h,p,node,y_ids,x_ids):
    dw_rx,dw_rh,db_r,dw_oh,db_o=initialize(len(x_ids),len(y_ids),node)
    delta_r_prime=np.zeros(len(b_r))
    for time_t in range(len(x)-1,-1,-1):
        word,tag=x[time_t]
        p_prime=create_one_hot(y_ids[tag],len(y_ids))
        word_one=create_one_hot(x_ids[word],len(x_ids))
        delta_o_prime=p_prime-p[time_t]
        dw_oh+=np.outer(h[time_t],delta_o_prime).T
        db_o+=delta_o_prime
        delta_r=np.dot(delta_r_prime,w_rh)+np.dot(delta_o_prime,w_oh)
        delta_r_prime = delta_r*(1-h[time_t]**2)
        dw_rx += np.outer(word_one, delta_r_prime).T
        db_r += delta_r_prime
        if time_t!=0:
            dw_rh+=np.outer(h[time_t-1],delta_r_prime).T

    return dw_rx,dw_rh,db_r,dw_oh,db_o
    
def update_weights(w_rx,w_rh,b_r,w_oh,b_o,dw_rx,dw_rh,db_r,dw_oh,db_o,lamb):
    w_rx+=lamb*dw_rx
    w_rh+=lamb*dw_rh
    b_r+=lamb*db_r
    w_oh+=lamb*dw_oh
    b_o+=lamb*db_o
    return w_rx,w_rh,b_r,w_oh,b_o
    
    
#train 
lamb=0.01 #学習率
node=100
iterations=5

x_ids=defaultdict(lambda:len(x_ids))
y_ids=defaultdict(lambda:len(y_ids))

with open("wiki-en-train.norm_pos.txt","r",encoding="utf-8") as train:
    feat_lab=[]
    for line in train:
        line=line.strip()
        word_tag=line.split(" ")
        sent=[]
        for w_t in word_tag:
            word,tag=w_t.split("_")
            x_ids[word]
            y_ids[tag]
            sent.append((word,tag))
        feat_lab.append(sent)
        
    w_rx,w_rh,b_r,w_oh,b_o=initialize_net_randomly(len(x_ids),len(y_ids),node)      

    for l in range(iterations):
        for x in feat_lab:
            h,p,y=forward_rnn(w_rx,w_rh,b_r,w_oh,b_o,x,x_ids)
            dw_rx,dw_rh,db_r,dw_oh,db_o=gradient_rnn(w_rx,w_rh,b_r,w_oh,b_o,x,h,p,node,y_ids,x_ids)
            w_rx,w_rh,b_r,w_oh,b_o=update_weights(w_rx,w_rh,b_r,w_oh,b_o,dw_rx,dw_rh,db_r,dw_oh,db_o,lamb)
    
#test
with open("wiki-en-test.norm.txt","r",encoding="utf-8")as wt:
    with open("answer.txt","w",encoding="utf-8") as ans:
        test_f=[]
        for line in wt:
            line=line.strip()
            words=line.split(" ")
            sent=[]
            for word in words:
                sent.append((word," "))
            test_f.append(sent)
           
        for x in test_f:
            h_t,p_t,y_t=forward_rnn(w_rx,w_rh,b_r,w_oh,b_o,x,x_ids)
            pos=[]
            for tag in y_t:
                for key,value in y_ids.items():
                    if tag==value:
                        pos.append(key)
            ans.write("{}\n".format(" ".join(pos)))

In [16]:
with open("answer.txt","r",encoding="utf-8") as ans:
    kekka=[]
    for line in ans:
        tags=line.strip().split(" ")
        for tag in tags:
            kekka.append(tag)
    
with open("wiki-en-test.pos.txt","r",encoding="utf-8") as test:
    kekka2=[]
    for line in test:
        tags=line.strip().split(" ")
        for tag in tags:
            kekka2.append(tag)
    count=0
    for num in range(len(kekka)):
        if kekka[num]==kekka2[num]:
            count+=1
    seido=count/len(kekka)
    print(seido)

0.8593030900723209
