# Multi-scale CNN implementation

## directly run the cell step by step, and you can train the model.

In [82]:
import numpy as np
import torch
from torch.autograd import Variable
import torch.nn.functional as F
import matplotlib.pyplot as plt
import torch.nn as nn
import time

### Load the training data

In [83]:
x = np.load('word_embedding_divby5_train.npy')
y = np.load('isfocus_divby5_train.npy')

In [84]:
x = torch.from_numpy(x).float()
y = torch.from_numpy(y).long()

In [85]:
for i in range(x[0].shape[0]):
    print(torch.sum(x[0][i]))

tensor(1.5450)
tensor(0.2635)
tensor(0.)
tensor(0.)
tensor(0.)


### Design the model

In [86]:
class CNN(nn.Module):    

    def __init__(self, Ci=1, D=300):
        super(CNN, self).__init__()
 
        
        self.conv1 = nn.Conv2d(in_channels=Ci, out_channels=1, kernel_size=(1, D), stride=(1, 1),
                                   padding=(0, 0), bias=False)
        self.conv2 = nn.Conv2d(in_channels=Ci, out_channels=1, kernel_size=(3, D), stride=(1, 1),
                                   padding=(1, 0), bias=False)
        self.conv3 = nn.Conv2d(in_channels=Ci, out_channels=1, kernel_size=(5, D), stride=(1, 1),
                                   padding=(2, 0), bias=False)
        self.conv4 = nn.Conv2d(in_channels=Ci, out_channels=2, kernel_size=(1, 3), stride=(1, 1),
                                   padding=(0, 0), bias=False)


    def forward(self, x):
        
        # (N,W,D)
        x = x.unsqueeze(1)  # (N,1,W,D)
        x1 = F.relu(self.conv1(x)) #[(N,1,W,1)]
        x2 = F.relu(self.conv2(x)) #[(N,1,W,1)]
        x3 = F.relu(self.conv3(x)) #[(N,1,W,1)]
        x = torch.cat((x1,x2,x3), 3) #[(N,1,W,3)]
        x = self.conv4(x) #[(N,2,W,1)]
        x = x.squeeze(3)
        return x

In [87]:
net = CNN(Ci=1, D=300)     # define the network
print(net)  # net architecture

# Loss and Optimizer
# Softmax is internally computed.
# Set parameters to be updated.
optimizer = torch.optim.SGD(net.parameters(), lr=0.1)
loss_func = nn.NLLLoss(weight = torch.tensor([0.5,3.5]))  # the target label is NOT an one-hotted

CNN(
  (conv1): Conv2d(1, 1, kernel_size=(1, 300), stride=(1, 1), bias=False)
  (conv2): Conv2d(1, 1, kernel_size=(3, 300), stride=(1, 1), padding=(1, 0), bias=False)
  (conv3): Conv2d(1, 1, kernel_size=(5, 300), stride=(1, 1), padding=(2, 0), bias=False)
  (conv4): Conv2d(1, 2, kernel_size=(1, 3), stride=(1, 1), bias=False)
)


### Train and output the training time, if 200 epochs, it's about 25 min.

In [None]:
t_start = time.time()
for epoch in range(2000):
    out = net(x)                 # input x and predict based on x
    #print('out:',out.shape,'out:',out)
    loss = loss_func(F.log_softmax(out,dim=1), y)     # must be (1. nn output, 2. target), the target label is NOT one-hotted
    out = F.softmax(out,dim=1)
    y_o = out.max(dim=1)[1].cpu().data
    #precision = (y_o == y) / len(y)
    #print('out', out.sum())
    print('y_o:',y_o[20:23])
    print('y:',y[20:23])
    print('loss:', loss)

    optimizer.zero_grad()   # clear gradients for next train
    loss.backward()         # backpropagation, compute gradients
    optimizer.step()        # apply gradients
t_end = time.time()
print('training time:',t_end - t_start)

y_o: tensor([[0, 1, 0, 0, 0],
        [1, 0, 0, 0, 1],
        [0, 0, 0, 1, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.6924, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 1, 0, 0, 0],
        [1, 0, 0, 0, 1],
        [0, 0, 0, 1, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.6920, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 1, 0, 0, 0],
        [1, 0, 0, 0, 1],
        [0, 0, 0, 1, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.6917, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 1, 0, 0, 0],
        [1, 0, 0, 0, 1],
        [0, 0, 0, 1, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.6913, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 1, 0, 0, 0],
        [1, 0, 0, 0, 1],
        [0, 0, 0, 1, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])


y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.6681, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.6671, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.6662, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.6651, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])


y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.6088, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.6068, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.6049, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.6029, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])


y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.5334, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.5318, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.5303, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.5288, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])


y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.4844, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.4835, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.4826, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.4817, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])


y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.4531, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.4524, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.4518, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.4511, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])


y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.4284, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.4279, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.4274, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.4268, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])


y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.4086, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.4081, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.4077, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.4072, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])


y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3929, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3925, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3922, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3919, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])


y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3806, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3803, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3801, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3798, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])


y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3709, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3706, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3704, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3702, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])


y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3630, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3628, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3626, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3625, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])


y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3564, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3563, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3561, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3560, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])


y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3508, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3507, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3505, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3504, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])


y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3459, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3458, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3457, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3455, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])


y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3415, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3414, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3413, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3412, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])


y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3375, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3374, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3373, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3372, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])


y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3338, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3337, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3336, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3335, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])


y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3304, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3303, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3302, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3302, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])


y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3273, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3272, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3271, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3270, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])


y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3243, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3242, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3241, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3241, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])


y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3215, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3215, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3214, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3213, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])


y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3189, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3188, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3188, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3187, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])


y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3164, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3163, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3163, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3162, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])


y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3140, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3139, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3138, grad_fn=<NllLoss2DBackward>)
y_o: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
y: tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])
loss: tensor(0.3138, grad_fn=<NllLoss2DBackward>)


### load the test data

In [34]:
xval = np.load('word_embedding_divby5_dev.npy')
yval = np.load('isfocus_divby5_dev.npy')
xval = torch.from_numpy(xval).float()
yval = torch.from_numpy(yval).long()

In [35]:
out = net(xval) 

In [4]:
### Predict the focus for test data

In [36]:
out = F.softmax(out,dim=1)
y_pt = out.max(dim=1)[1].cpu().data

In [39]:
yval = yval[:,2]

In [None]:
y_pt = y_pt[:,2]

### Accuracy number

In [73]:
acc_num =  y_pt - yval == 0


In [74]:
b = acc_num

In [75]:
b.sum()

tensor(1443)

In [76]:
1443/1566

0.921455938697318

### FP number

In [77]:
c = yval[b == 1]

In [78]:
c.sum()

tensor(26)

In [79]:
yval.sum()

tensor(51)

In [80]:
y_pt.sum()

tensor(124)

In [25]:
k = b == 0

In [51]:
k.sum()

tensor(353)

#Weight [0.5,5] epoch 1000 lr=0.1  accuracy = 
TP    40
FN    11
FP   342 
TN   1173

#Weight [0.5, 4.5] epoch 2000 Time 25min30s  lr = 0.1  accuracy = 0.888
TP 31 
FN 20
FP 136  
TN 1300

#Weight [0.5, 3.5] epoch 2000 Time 25min30s  lr = 0.1  accuracy = 0.942
TP 27 
FN 24
FP 67  
TN 1369

In [54]:
PC = 27/94
RC = 27/51

In [None]:
PC = TP/(TP + FP)
RC = TP/(TP+FN)

### F-score

In [55]:
FC = 2*PC*RC/(PC+RC)
FC

0.37241379310344835