## Loading the data, padding (based on 2.0)

In [1]:
import sys
import os
import numpy as np
import torch

In [2]:
def read_chinese_data(inputfilename):
    with open(inputfilename, "r") as inputfile:
        sentences = []
        collection_words = []
        collection_labels = []
        for line in inputfile:
            if line[0] == '#':
                #The continue statement continues with the next iteration of the loop
                continue
            columns = line.split()
            #print(words)
            if columns == []:
                sentences.append((''.join(collection_words), collection_labels))
                collection_words = []
                collection_labels = []
                continue
            collection_words.append(columns[1])
            collection_labels += [1] + ([0] * (len(columns[1]) - 1))
            
    return sentences

In [3]:
train_sentences = read_chinese_data('/scratch/lt2316-h20-resources/zh_gsd-ud-train.conllu')

In [4]:
test_sentences = read_chinese_data('/scratch/lt2316-h20-resources/zh_gsd-ud-test.conllu')

In [5]:
def index_chars(sentences):
    megasentence = ''.join(sentences)
    char_list = set()
    for c in megasentence:
        char_list.add(c)
    char_list = [0] + list(char_list)
    return char_list, {char_list[x]:x for x in range(len(char_list))}

In [6]:
int_index, char_index = index_chars([x[0] for x in train_sentences + test_sentences])

In [7]:
int_index

[0,
 '埋',
 '祈',
 '眠',
 '堵',
 '椅',
 '加',
 '脫',
 '恥',
 '繞',
 'Z',
 '釀',
 '餡',
 '郎',
 '丈',
 '賬',
 '魚',
 '磯',
 '旱',
 '葛',
 '拖',
 '踐',
 '-',
 '樞',
 '圭',
 '滋',
 '瓊',
 '伺',
 '緩',
 '穹',
 '救',
 '艦',
 '賢',
 '瀕',
 '姥',
 '堅',
 '嬴',
 '熄',
 '娠',
 '省',
 '線',
 '僧',
 '矗',
 '鼎',
 '瓦',
 '貢',
 '測',
 '朗',
 '襪',
 '塊',
 '鄧',
 '懷',
 '浴',
 '竭',
 '侈',
 '碗',
 '疼',
 '翔',
 '缺',
 '嘛',
 '茵',
 '疾',
 '總',
 '樟',
 '渝',
 '檸',
 '瀘',
 '訓',
 '些',
 '苗',
 '恩',
 '涅',
 '扶',
 '免',
 '終',
 '辦',
 'i',
 '苛',
 '鰺',
 '殉',
 '柵',
 '本',
 '擎',
 '副',
 '篇',
 '鷗',
 '怕',
 '菌',
 '底',
 '椰',
 '伍',
 '鄢',
 '檎',
 '繫',
 '跳',
 '唯',
 '贏',
 '鴿',
 '徭',
 '杞',
 '腿',
 '器',
 '韃',
 '宇',
 '檄',
 '更',
 '斐',
 '靜',
 '禍',
 '纏',
 '逛',
 '鑼',
 '餘',
 '面',
 '庸',
 '中',
 '叢',
 '淋',
 '權',
 '卦',
 '柬',
 '讓',
 '北',
 '府',
 '任',
 '錶',
 '謝',
 '儡',
 '另',
 '柳',
 '柝',
 '鳴',
 '暗',
 '慷',
 '涵',
 '恢',
 '睦',
 '掃',
 '懊',
 '凶',
 '週',
 '烷',
 '諧',
 '鱗',
 '蠻',
 '錯',
 '治',
 '澎',
 '洒',
 '偕',
 '波',
 '奶',
 '夷',
 '臘',
 '小',
 '回',
 '阡',
 '樁',
 '爬',
 '豆',
 '頤',
 '艙',
 '醒',
 '業',
 '曄',
 '仁',
 '淀',


In [8]:
def convert_sentence(sentence, index):
    return [index[x] for x in sentence]

In [9]:
def pad_lengths(sentences, max_length, padding=0):
    return [x + ([padding] * (max_length - len(x))) for x in sentences]

In [10]:
def create_dataset(x, device="cpu"):
    converted = [(convert_sentence(x1[0], char_index), x1[1]) for x1 in x]
    X, y = zip(*converted)
    lengths = [len(x2) for x2 in X]
    padded_X = pad_lengths(X, max(lengths))
    Xt = torch.LongTensor(padded_X).to(device)
    padded_y = pad_lengths(y, max(lengths), padding=-1)
    yt = torch.LongTensor(padded_y).to(device)
    lengths_t = torch.LongTensor(lengths).to(device)
    return Xt, lengths_t, yt

In [11]:
train_X_tensor, train_lengths_tensor, train_y_tensor = create_dataset(train_sentences, "cuda:2")
test_X_tensor, test_lengths_tensor, test_y_tensor = create_dataset(test_sentences, "cuda:2")

## Packing the sequences for RNN

In [12]:
testtensor = torch.randn((10,100,200))

In [13]:
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence

In [14]:
testlengths = torch.randint(1, 100, (10,))

In [15]:
testlengths.size(), testlengths

(torch.Size([10]), tensor([95, 71, 90, 91, 12, 69, 26, 62, 49, 81]))

In [16]:
packed = pack_padded_sequence(testtensor, testlengths, batch_first=True, enforce_sorted=False)

In [17]:
testtensor

tensor([[[-0.7655, -1.0624,  0.3472,  ...,  1.1241, -1.4086,  1.2864],
         [ 2.4072,  1.8871, -0.6842,  ..., -0.1677, -1.0074,  1.4895],
         [-0.6139, -0.7079, -1.6666,  ..., -0.6701,  0.2219, -0.2259],
         ...,
         [ 0.4592, -1.2710,  0.9222,  ..., -0.5310, -2.0240, -1.1175],
         [ 1.5645,  1.7058,  1.8887,  ..., -0.3119,  1.1172, -0.1876],
         [ 0.4740,  1.0587,  0.3168,  ..., -0.1922,  0.7979, -2.0777]],

        [[-0.3271,  0.0399, -0.2727,  ..., -1.0159,  0.3676,  0.3217],
         [-0.3538,  0.3262,  0.5066,  ...,  0.1544,  3.3837, -1.3318],
         [ 0.4992, -0.9346,  1.6485,  ..., -0.5386,  0.3537,  1.2564],
         ...,
         [ 0.0286,  0.0733, -1.1147,  ...,  0.6976, -0.2958,  0.0584],
         [ 0.2396, -1.2888, -0.6070,  ..., -1.1753,  1.4104, -0.7159],
         [-0.2679, -0.9486, -2.1437,  ..., -0.7857, -0.9822,  0.7591]],

        [[-0.4340, -1.1005,  1.6977,  ..., -0.6661, -0.7251, -2.2044],
         [-0.1305, -1.6333,  0.5635,  ..., -0

In [18]:
packed

PackedSequence(data=tensor([[-7.6553e-01, -1.0624e+00,  3.4723e-01,  ...,  1.1241e+00,
         -1.4086e+00,  1.2864e+00],
        [-1.4471e+00, -3.3526e-01, -9.6787e-01,  ..., -5.2261e-01,
         -8.9144e-01,  8.2377e-01],
        [-4.3400e-01, -1.1005e+00,  1.6977e+00,  ..., -6.6615e-01,
         -7.2507e-01, -2.2044e+00],
        ...,
        [ 8.0458e-01,  1.4029e+00, -8.0282e-01,  ...,  2.0672e+00,
         -5.0840e-01, -1.2119e+00],
        [ 2.3915e-01, -3.1589e-01,  8.3257e-01,  ..., -3.2940e+00,
          2.4418e-03,  1.1835e-01],
        [ 2.2864e+00,  5.7609e-01, -1.0993e+00,  ..., -3.2018e-01,
          8.6901e-01, -2.9099e+00]]), batch_sizes=tensor([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,  9,  9,  9,  9,  9,  9,
         9,  9,  9,  9,  9,  9,  9,  9,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
         8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  7,  7,  7,  7,  7,
         7,  7,  7,  7,  7,  7,  7,  7,  6,  6,  6,  6,  6,  6,  6,  5,  5,  4,
         4,  4,

In [19]:
len(packed.batch_sizes)

95

In [20]:
unpacked = pad_packed_sequence(packed, batch_first=True, total_length=100)

In [21]:
unpacked

(tensor([[[-0.7655, -1.0624,  0.3472,  ...,  1.1241, -1.4086,  1.2864],
          [ 2.4072,  1.8871, -0.6842,  ..., -0.1677, -1.0074,  1.4895],
          [-0.6139, -0.7079, -1.6666,  ..., -0.6701,  0.2219, -0.2259],
          ...,
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],
 
         [[-0.3271,  0.0399, -0.2727,  ..., -1.0159,  0.3676,  0.3217],
          [-0.3538,  0.3262,  0.5066,  ...,  0.1544,  3.3837, -1.3318],
          [ 0.4992, -0.9346,  1.6485,  ..., -0.5386,  0.3537,  1.2564],
          ...,
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],
 
         [[-0.4340, -1.1005,  1.6977,  ..., -0.6661, -0.7251, -2.2044],
          [-0.1305, -1.6333,

In [22]:
unpacked[0]

tensor([[[-0.7655, -1.0624,  0.3472,  ...,  1.1241, -1.4086,  1.2864],
         [ 2.4072,  1.8871, -0.6842,  ..., -0.1677, -1.0074,  1.4895],
         [-0.6139, -0.7079, -1.6666,  ..., -0.6701,  0.2219, -0.2259],
         ...,
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],

        [[-0.3271,  0.0399, -0.2727,  ..., -1.0159,  0.3676,  0.3217],
         [-0.3538,  0.3262,  0.5066,  ...,  0.1544,  3.3837, -1.3318],
         [ 0.4992, -0.9346,  1.6485,  ..., -0.5386,  0.3537,  1.2564],
         ...,
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],

        [[-0.4340, -1.1005,  1.6977,  ..., -0.6661, -0.7251, -2.2044],
         [-0.1305, -1.6333,  0.5635,  ..., -0

In [23]:
unpacked[0].size()

torch.Size([10, 100, 200])

## Batching (based on 1.0, 1.1, 1.2)

In [24]:
class Batcher:
    def __init__(self, X, lengths, y, device, batch_size=50, max_iter=None):
        self.X = X
        self.lengths = lengths # We need the lengths to efficiently use the padding.
        self.y = y
        self.device = device
        self.batch_size=batch_size
        self.max_iter = max_iter
        self.curr_iter = 0
        
    def __iter__(self):
        return self
    
    def __next__(self):
        if self.curr_iter == self.max_iter:
            raise StopIteration
        permutation = torch.randperm(self.X.size()[0], device=self.device)
        permX = self.X[permutation]
        permlengths = self.lengths[permutation]
        permy = self.y[permutation]
        splitX = torch.split(permX, self.batch_size)
        splitlengths = torch.split(permlengths, self.batch_size)
        splity = torch.split(permy, self.batch_size)
        
        self.curr_iter += 1
        return zip(splitX, splitlengths, splity)

In [25]:
b = Batcher(train_X_tensor, train_lengths_tensor, train_y_tensor, torch.device('cuda:2'), max_iter=100)

In [26]:
testbatching = next(b)

In [27]:
testbatching

<zip at 0x7efdbdee0f00>

In [28]:
testbatch = next(testbatching)

In [29]:
testbatch

(tensor([[ 545, 2868, 2407,  ...,    0,    0,    0],
         [3203, 2409, 3417,  ...,    0,    0,    0],
         [ 226, 1267, 1201,  ...,    0,    0,    0],
         ...,
         [3203, 2409, 1672,  ...,    0,    0,    0],
         [ 806, 3271, 2421,  ...,    0,    0,    0],
         [ 836,  759, 3036,  ...,    0,    0,    0]], device='cuda:2'),
 tensor([36, 78, 32, 32, 49, 71, 22, 23, 19, 32, 62, 30, 63, 27, 55, 80, 65, 15,
         30, 36, 29, 29, 60, 19, 14, 40, 60, 32, 19, 21, 63, 62, 46, 38, 27, 18,
         40, 35, 36, 19, 14, 32, 55, 49, 62, 34, 67, 26, 64, 24],
        device='cuda:2'),
 tensor([[ 1,  0,  1,  ..., -1, -1, -1],
         [ 1,  0,  0,  ..., -1, -1, -1],
         [ 1,  1,  0,  ..., -1, -1, -1],
         ...,
         [ 1,  0,  0,  ..., -1, -1, -1],
         [ 1,  0,  1,  ..., -1, -1, -1],
         [ 1,  0,  1,  ..., -1, -1, -1]], device='cuda:2'))

## Modeling

In [30]:
import torch.nn as nn

In [31]:
emb = nn.Embedding(len(int_index), 200, 0).to("cuda:2")

In [32]:
testX, testlengths, testy = testbatch

In [33]:
testembs = emb(testX)

In [34]:
testembs

tensor([[[-0.0971,  0.6074, -2.0744,  ...,  0.0970,  1.1844,  0.8999],
         [-0.0243,  2.3940, -2.9894,  ...,  0.8090,  2.0689, -0.0798],
         [-1.0709, -0.4017,  0.8465,  ...,  1.1983, -1.0624,  1.1463],
         ...,
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],

        [[ 0.5316, -1.3332, -1.4926,  ...,  1.1938, -0.1908,  0.8099],
         [ 0.8595,  0.3166, -0.5150,  ..., -0.1407, -0.3194, -0.0930],
         [-0.2680,  0.6567,  2.3078,  ..., -0.6668,  0.2506, -0.4044],
         ...,
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],

        [[-0.9645,  1.5645, -0.6914,  ...,  0.0474, -0.1021, -0.0812],
         [ 0.6540,  0.5616,  0.3189,  ..., -0

In [35]:
testembs.size()

torch.Size([50, 182, 200])

In [36]:
testembs.device

device(type='cuda', index=2)

In [37]:
testlstm = nn.LSTM(200, 150, batch_first=True).to("cuda:2")

In [38]:
testembspadded = pack_padded_sequence(testembs, testlengths.to("cpu"), batch_first=True, enforce_sorted=False)

In [39]:
testoutput, teststate = testlstm(testembspadded)

In [40]:
testoutput

PackedSequence(data=tensor([[-2.0648e-02,  1.3061e-01,  4.5912e-02,  ...,  6.2236e-02,
          1.3535e-02,  7.5209e-02],
        [-2.6540e-01,  1.6427e-02,  4.0657e-02,  ...,  2.1634e-04,
         -1.2915e-01, -1.1415e-03],
        [-2.0648e-02,  1.3061e-01,  4.5912e-02,  ...,  6.2236e-02,
          1.3535e-02,  7.5209e-02],
        ...,
        [ 1.7102e-01, -1.1723e-01, -1.5912e-01,  ...,  1.6756e-01,
         -3.9497e-02,  4.5473e-02],
        [ 6.9713e-02, -4.8579e-02, -6.0299e-03,  ...,  1.7340e-01,
          1.6703e-02,  4.8543e-02],
        [ 1.5079e-01, -9.0342e-02, -1.0320e-01,  ...,  2.5328e-01,
          9.9119e-02,  8.5512e-04]], device='cuda:2',
       grad_fn=<CudnnRnnBackward>), batch_sizes=tensor([50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 48, 47, 47, 47,
        46, 42, 42, 41, 40, 39, 38, 38, 37, 35, 35, 33, 31, 31, 26, 26, 25, 24,
        21, 21, 20, 20, 18, 18, 18, 18, 18, 18, 17, 17, 17, 15, 15, 15, 15, 15,
        15, 13, 13, 13, 13, 13, 11, 11,  8,

In [41]:
testunpacked = pad_packed_sequence(testoutput, batch_first=True)

In [42]:
testunpacked[0].size()

torch.Size([50, 80, 150])

In [43]:
testsigm = nn.Sigmoid().to("cuda:2")

In [44]:
testoutput2 = testsigm(testunpacked[0])

In [45]:
testoutput2.size()

torch.Size([50, 80, 150])

In [46]:
testlin = nn.Linear(150, 2).to("cuda:2")

In [47]:
testoutput3 = testlin(testoutput2)

In [48]:
testoutput3.size()

torch.Size([50, 80, 2])

In [49]:
testsoft = nn.LogSoftmax(2).to("cuda:2")

In [50]:
testoutput4 = testsoft(testoutput3)

In [51]:
testoutput4

tensor([[[-0.5439, -0.8686],
         [-0.5379, -0.8770],
         [-0.5488, -0.8620],
         ...,
         [-0.5325, -0.8846],
         [-0.5325, -0.8846],
         [-0.5325, -0.8846]],

        [[-0.5531, -0.8560],
         [-0.5519, -0.8577],
         [-0.5300, -0.8882],
         ...,
         [-0.5147, -0.9106],
         [-0.5325, -0.8846],
         [-0.5325, -0.8846]],

        [[-0.5318, -0.8856],
         [-0.5291, -0.8896],
         [-0.5532, -0.8559],
         ...,
         [-0.5325, -0.8846],
         [-0.5325, -0.8846],
         [-0.5325, -0.8846]],

        ...,

        [[-0.5531, -0.8560],
         [-0.5519, -0.8577],
         [-0.5484, -0.8625],
         ...,
         [-0.5325, -0.8846],
         [-0.5325, -0.8846],
         [-0.5325, -0.8846]],

        [[-0.5309, -0.8869],
         [-0.5233, -0.8978],
         [-0.5293, -0.8892],
         ...,
         [-0.5325, -0.8846],
         [-0.5325, -0.8846],
         [-0.5325, -0.8846]],

        [[-0.5258, -0.8942],
       

In [52]:
testy_short = testy[:, :max(testlengths)]

In [53]:
testy_short

tensor([[ 1,  0,  1,  ..., -1, -1, -1],
        [ 1,  0,  0,  ...,  1, -1, -1],
        [ 1,  1,  0,  ..., -1, -1, -1],
        ...,
        [ 1,  0,  0,  ..., -1, -1, -1],
        [ 1,  0,  1,  ..., -1, -1, -1],
        [ 1,  0,  1,  ..., -1, -1, -1]], device='cuda:2')

In [54]:
testy_short.size()

torch.Size([50, 80])

In [55]:
max(testlengths)

tensor(80, device='cuda:2')

In [56]:
testpermuted = testoutput4.permute(0, 2, 1)

In [57]:
testpermuted

tensor([[[-0.5439, -0.5379, -0.5488,  ..., -0.5325, -0.5325, -0.5325],
         [-0.8686, -0.8770, -0.8620,  ..., -0.8846, -0.8846, -0.8846]],

        [[-0.5531, -0.5519, -0.5300,  ..., -0.5147, -0.5325, -0.5325],
         [-0.8560, -0.8577, -0.8882,  ..., -0.9106, -0.8846, -0.8846]],

        [[-0.5318, -0.5291, -0.5532,  ..., -0.5325, -0.5325, -0.5325],
         [-0.8856, -0.8896, -0.8559,  ..., -0.8846, -0.8846, -0.8846]],

        ...,

        [[-0.5531, -0.5519, -0.5484,  ..., -0.5325, -0.5325, -0.5325],
         [-0.8560, -0.8577, -0.8625,  ..., -0.8846, -0.8846, -0.8846]],

        [[-0.5309, -0.5233, -0.5293,  ..., -0.5325, -0.5325, -0.5325],
         [-0.8869, -0.8978, -0.8892,  ..., -0.8846, -0.8846, -0.8846]],

        [[-0.5258, -0.5333, -0.5319,  ..., -0.5325, -0.5325, -0.5325],
         [-0.8942, -0.8835, -0.8855,  ..., -0.8846, -0.8846, -0.8846]]],
       device='cuda:2', grad_fn=<PermuteBackward>)

In [58]:
nllloss = nn.NLLLoss(ignore_index=-1).to("cuda:2")

In [59]:
nllloss(testpermuted, testy_short)

tensor(0.7527, device='cuda:2', grad_fn=<NllLoss2DBackward>)

In [60]:
class Segmenter(nn.Module):
    def __init__(self, vocab_size, emb_size):
        super().__init__()
        
        self.vocab_size = vocab_size
        self.emb_size = emb_size
        
        self.emb = nn.Embedding(self.vocab_size, self.emb_size, 0)
        self.lstm = nn.LSTM(self.emb_size, 150, batch_first=True)
        self.sig1 = nn.Sigmoid()
        self.lin = nn.Linear(150, 2)
        self.softmax = nn.LogSoftmax(2)
        
    def forward(self, x, lengths):
        embs = self.emb(x)
        packed = pack_padded_sequence(embs, lengths.to("cpu"), batch_first=True, enforce_sorted=False)
        output1, _ = self.lstm(packed)
        unpacked, _ = pad_packed_sequence(output1, batch_first=True)
        output2 = self.sig1(unpacked)
        output3 = self.lin(output2)
        return self.softmax(output3)
        

In [61]:
import torch.optim as optim

In [62]:
def train(X, lengths, y, vocab_size, emb_size, batch_size, epochs, device, model=None):
    b = Batcher(X, lengths, y, device, batch_size=batch_size, max_iter=epochs)
    if not model:
        m = Segmenter(vocab_size, emb_size).to(device)
    else:
        m = model
    loss = nn.NLLLoss(ignore_index=-1)
    optimizer = optim.Adam(m.parameters(), lr=0.005)
    epoch = 0
    for split in b:
        tot_loss = 0
        for batch in split:
            optimizer.zero_grad()
            o = m(batch[0], batch[1])
            l = loss(o.permute(0,2,1), batch[2][:, :max(batch[1])])
            tot_loss += l
            l.backward()
            optimizer.step()
        print("Total loss in epoch {} is {}.".format(epoch, tot_loss))
        epoch += 1
    return m

In [63]:
model = train(train_X_tensor, train_lengths_tensor, train_y_tensor, len(int_index), 200, 50, 30, "cuda:2")

Total loss in epoch 0 is 32.004825592041016.
Total loss in epoch 1 is 18.05829429626465.
Total loss in epoch 2 is 13.295701026916504.
Total loss in epoch 3 is 10.13306713104248.
Total loss in epoch 4 is 7.971889972686768.
Total loss in epoch 5 is 6.049184322357178.
Total loss in epoch 6 is 4.774665832519531.
Total loss in epoch 7 is 3.8695778846740723.
Total loss in epoch 8 is 3.0027005672454834.
Total loss in epoch 9 is 2.354471206665039.
Total loss in epoch 10 is 1.9307667016983032.
Total loss in epoch 11 is 2.0545787811279297.
Total loss in epoch 12 is 2.439685344696045.
Total loss in epoch 13 is 2.4255528450012207.
Total loss in epoch 14 is 1.8841274976730347.
Total loss in epoch 15 is 1.2326704263687134.
Total loss in epoch 16 is 0.759043276309967.
Total loss in epoch 17 is 0.4610810875892639.
Total loss in epoch 18 is 0.30292564630508423.
Total loss in epoch 19 is 0.20920120179653168.
Total loss in epoch 20 is 0.16383619606494904.
Total loss in epoch 21 is 0.1345568150281906.
Tot

## Evaluation

In [64]:
model.eval()

Segmenter(
  (emb): Embedding(3648, 200, padding_idx=0)
  (lstm): LSTM(200, 150, batch_first=True)
  (sig1): Sigmoid()
  (lin): Linear(in_features=150, out_features=2, bias=True)
  (softmax): LogSoftmax(dim=2)
)

In [65]:
with torch.no_grad():
    rawpredictions = model(test_X_tensor, test_lengths_tensor)

In [66]:
rawpredictions.size()

torch.Size([500, 156, 2])

In [67]:
rawpredictions

tensor([[[-5.5307e+00, -3.9711e-03],
         [-4.9722e-04, -7.6067e+00],
         [-1.7355e+01,  0.0000e+00],
         ...,
         [-3.2601e+00, -3.9142e-02],
         [-3.2601e+00, -3.9142e-02],
         [-3.2601e+00, -3.9142e-02]],

        [[-1.1877e+01, -6.9141e-06],
         [-2.7702e-02, -3.6001e+00],
         [-7.6969e+00, -4.5432e-04],
         ...,
         [-3.2601e+00, -3.9142e-02],
         [-3.2601e+00, -3.9142e-02],
         [-3.2601e+00, -3.9142e-02]],

        [[-6.5656e+00, -1.4089e-03],
         [-1.6689e-06, -1.3334e+01],
         [-6.5181e+00, -1.4776e-03],
         ...,
         [-3.2601e+00, -3.9142e-02],
         [-3.2601e+00, -3.9142e-02],
         [-3.2601e+00, -3.9142e-02]],

        ...,

        [[-5.2260e+00, -5.3896e-03],
         [-5.4596e-05, -9.8150e+00],
         [-1.3359e+01, -1.5497e-06],
         ...,
         [-3.2601e+00, -3.9142e-02],
         [-3.2601e+00, -3.9142e-02],
         [-3.2601e+00, -3.9142e-02]],

        [[-1.4282e+01, -5.9605e-07

In [68]:
import math
math.log2(0.9), math.log2(0.8)

(-0.15200309344504995, -0.3219280948873623)

In [69]:
predictions = torch.argmax(rawpredictions, 2)

In [70]:
predictions

tensor([[1, 0, 1,  ..., 1, 1, 1],
        [1, 0, 1,  ..., 1, 1, 1],
        [1, 0, 1,  ..., 1, 1, 1],
        ...,
        [1, 0, 1,  ..., 1, 1, 1],
        [1, 1, 1,  ..., 1, 1, 1],
        [1, 0, 1,  ..., 1, 1, 1]], device='cuda:2')

In [71]:
predictions.size()

torch.Size([500, 156])

In [72]:
predictions[0]

tensor([1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], device='cuda:2')

In [73]:
test_sentences[0]

('然而，這樣的處理也衍生了一些問題。', [1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1])

In [74]:
test_y_tensor[0]

tensor([ 1,  0,  1,  1,  0,  1,  1,  0,  1,  1,  0,  1,  1,  0,  1,  0,  1, -1,
        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1], device='cuda:2')

In [75]:
test_lengths_tensor[0]

tensor(17, device='cuda:2')

In [76]:
collectpreds = []
collecty = []

In [77]:
for i in range(test_X_tensor.size(0)):
    collectpreds.append(predictions[i][:test_lengths_tensor[i]])
    collecty.append(test_y_tensor[i][:test_lengths_tensor[i]])

In [78]:
collecty

[tensor([1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1], device='cuda:2'),
 tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
         0, 1, 1, 1, 0, 1, 0, 1], device='cuda:2'),
 tensor([1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0,
         1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1], device='cuda:2'),
 tensor([1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1,
         1, 1, 0, 1, 1, 1, 1, 1], device='cuda:2'),
 tensor([1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1], device='cuda:2'),
 tensor([1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0,
         1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
         1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1], device='cuda:2'),
 tensor([1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
         0, 0, 0, 1, 1], device='c

In [79]:
allpreds = torch.cat(collectpreds)

In [80]:
allpreds.size()

torch.Size([19206])

In [81]:
classes = torch.cat(collecty)

In [82]:
allpreds, classes

(tensor([1, 0, 1,  ..., 1, 0, 1], device='cuda:2'),
 tensor([1, 0, 1,  ..., 1, 0, 1], device='cuda:2'))

In [83]:
classes.size()

torch.Size([19206])

In [84]:
classes = classes.float()
allpreds = allpreds.float()

In [85]:
tp = sum(classes * allpreds)
fp = sum(classes * (~allpreds.bool()).float())
tn = sum((~classes.bool()).float() * (~allpreds.bool()).float())
fn = sum((~classes.bool()).float() * allpreds)

tp, fp, tn, fn

(tensor(11339., device='cuda:2'),
 tensor(673., device='cuda:2'),
 tensor(6418., device='cuda:2'),
 tensor(776., device='cuda:2'))

In [86]:
accuracy = (tp + tn) / (tp + fp + tn + fn)
accuracy

tensor(0.9246, device='cuda:2')

In [87]:
recall = tp / (tp + fn)
recall

tensor(0.9359, device='cuda:2')

In [88]:
precision = tp / (tp + fp)
precision

tensor(0.9440, device='cuda:2')

In [89]:
f1 = (2 * recall * precision) / (recall + precision)
f1

tensor(0.9399, device='cuda:2')