In [71]:
import numpy as np
from collections import defaultdict

def create_features(x,ids):
    phi=np.zeros(len(ids))
    words=x.split()
    for word in words:
        phi[ids["UNI:"+word]]+=1
    return phi

def create_features_test(x,ids):
    phi=np.zeros(len(ids))
    words=x.split()
    for word in words:
        if "UNI:"+word in ids:
            phi[ids["UNI:"+word]]+=1
    return phi

def initialize_net_randomly(node,ids,layer):
    net=[];
    w_in=(np.random.rand(node,len(ids))-0.5)/5
    b_in=(np.random.rand(node)-0.5)/5
    net.append((w_in,b_in))
    for num in range(layer-2):
        w_m=(np.random.rand(node,node)-0.5)/5
        b_m=(np.random.rand(node)-0.5)/5
        net.append((w_m,b_m))
    w_out=(np.random.rand(1,node)-0.5)/5
    b_out=(np.random.rand(1)-0.5)/5
    net.append((w_out,b_out))
    return net

def forward_nn(net,phi0):
    phi=[phi0]
    for i in range(len(net)):
        w,b=net[i]
        phi.append(np.tanh(np.dot(w,phi[i])+b))
    return phi

def backward_nn(net,phi,y_p):
    J=len(net)
    delta=[np.ndarray for num in range(J+1)]
    delta[J]=float(y_p)-float(phi[J][0])
    delta_p=[np.ndarray for num in range(J+1)]
    for i in range(J-1,-1,-1):
        delta_p[i+1]=delta[i+1]*(1-phi[i+1]**2)
        w,b=net[i]
        delta[i]=np.dot(delta_p[i+1],w)
    return delta_p

def update_weights(net,phi,delta_p,lam):
    for i in range(len(net)):
        w,b=net[i]
        w+=lam*np.outer(delta_p[i+1],phi[i])
        b+=lam*delta_p[i+1]
        
def yosoku(score,n):
    if float(score[n][0])>0:
        return 1
    else:
        return -1

    
#train    
ids=defaultdict(lambda:len(ids))
feat_lab=[]
with open("titles-en-train.labeled.txt","r",encoding="utf-8") as train:
    for line in train:
        line=line.strip()
        label,sent=line.split("\t")
        word=sent.split(" ")
        for num in word:
            ids["UNI:"+num]
            
with open("titles-en-train.labeled.txt","r",encoding="utf-8") as train:
    for line in train:
        label,sent=line.split("\t")
        feat_lab.append((create_features(sent,ids),label))
node=2
layer=3
net=initialize_net_randomly(node,ids,layer)

iterations=1
lam=0.1
for num in range(iterations):
    for phi0,y in feat_lab:
        phi=forward_nn(net,phi0)
        delta_p=backward_nn(net,phi,y)
        update_weights(net,phi,delta_p,lam)
        

with open("titles-en-test.txt","r",encoding="utf-8") as tet:
    yosoku_t=[]
    for line in tet:
        line=line.strip()
        phi0=create_features_test(line,ids)
        score=forward_nn(net,phi0)
        yosoku_t.append(yosoku(score,len(net)))
    with open("yosoku.test.txt","w",encoding="utf-8") as yt:
        for num in yosoku_t:
            yt.write(str(num)+"\n")
        

In [77]:
with open("yosoku.test.txt","r",encoding="utf-8") as ans:
    kekka=[]
    for line in ans:
        tags=line.strip().split(" ")
        for tag in tags:
            kekka.append(tag)
    
with open("titles-en-test.labeled.txt","r",encoding="utf-8") as test:
    kekka2=[]
    for line in test:
        line=line.strip()
        tag,sen=line.split("\t")
        kekka2.append(tag)
    
    count=0
    for num in range(len(kekka)):
        if kekka[num]==kekka2[num]:
            count+=1
    seido=count/len(kekka)
    print(seido)

0.9132128940843075
