In [1]:
import torch
import torch.autograd as autograd
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import numpy as np
import jieba
torch.manual_seed(1)

<torch._C.Generator at 0x7f21041b7b10>

In [2]:
## 1.pytorch basic

In [3]:
float64_a = np.array(np.random.randn(2, 3), dtype=np.int)
float64_a

array([[-1, -1,  0],
       [ 0,  0,  0]])

In [4]:
type(float64_a[0][1])

numpy.int64

In [5]:
test1 = torch.from_numpy(float64_a)

In [6]:
type(test1)

torch.LongTensor

In [7]:
float64_b = [
    [1, 0, 0],
    [1, 0, 0]
]

In [8]:
test2 = torch.Tensor(float64_b).type(torch.LongTensor)

In [9]:
type(test2)

torch.LongTensor

In [10]:
x = torch.randn(2, 2).type(torch.ShortTensor)
x


-2  1
 0 -2
[torch.ShortTensor of size 2x2]

In [11]:
x.view(4, -1)


-2
 1
 0
-2
[torch.ShortTensor of size 4x1]

In [12]:
## 2.Computation Graphs and Automatic Differentiation

In [13]:
test_c = torch.Tensor([1., 2., 3])

In [14]:
print(type(test_c[2]))
print(test_c[2])

<class 'float'>
3.0


In [15]:
x = Variable(torch.Tensor([1., 2., 3]), requires_grad=True)
x

Variable containing:
 1
 2
 3
[torch.FloatTensor of size 3]

In [16]:
x.data


 1
 2
 3
[torch.FloatTensor of size 3]

In [17]:
y = Variable(torch.Tensor([4., 5., 6]), requires_grad=True)

In [18]:
z = x + y

In [19]:
z

Variable containing:
 5
 7
 9
[torch.FloatTensor of size 3]

In [20]:
z.requires_grad

True

In [21]:
z.grad_fn

<torch.autograd.function.AddBackward at 0x7f20ae9854f8>

In [22]:
s = z.sum()
s

Variable containing:
 21
[torch.FloatTensor of size 1]

In [23]:
s.grad_fn

<torch.autograd.function.SumBackward at 0x7f20ae9855e8>

In [24]:
s.backward()
print(x.grad)
print(y.grad)

Variable containing:
 1
 1
 1
[torch.FloatTensor of size 3]

Variable containing:
 1
 1
 1
[torch.FloatTensor of size 3]



In [25]:
## 3.Deep learning building blocks: 
###  Affine maps, non-linearities and objectives

In [26]:
### Affine maps
lin = nn.Linear(5, 2)
X = Variable(torch.rand(2, 5))
y = lin(X)
y

Variable containing:
-0.4767 -0.6123
-0.6477 -0.3223
[torch.FloatTensor of size 2x2]

In [27]:
### non-linearities
### BY ACTIVATIONS
X

Variable containing:
 0.3968  0.9355  0.5388  0.8463  0.4192
 0.3133  0.6852  0.5245  0.2045  0.4435
[torch.FloatTensor of size 2x5]

In [28]:
F.relu(X)

Variable containing:
 0.3968  0.9355  0.5388  0.8463  0.4192
 0.3133  0.6852  0.5245  0.2045  0.4435
[torch.FloatTensor of size 2x5]

In [29]:
F.elu(X)

Variable containing:
 0.3968  0.9355  0.5388  0.8463  0.4192
 0.3133  0.6852  0.5245  0.2045  0.4435
[torch.FloatTensor of size 2x5]

In [30]:
### softmax and probabilities
X = Variable(torch.randn(1, 5))

In [31]:
P = F.softmax(X)
P

Variable containing:
 0.2321  0.0836  0.4662  0.1704  0.0477
[torch.FloatTensor of size 1x5]

In [32]:
P.sum()

Variable containing:
 1
[torch.FloatTensor of size 1]

In [33]:
### bag of words
data = [
    ("I love you very much".split(), "ENGLISH"),
    (list(jieba.cut("我非常爱你")), "CHINESE"),
    ("Do you love her".split(), "ENGLISH"),
    (list(jieba.cut("你喜欢她吗")), "CHINESE")
]

test_data = [
    ("He is a good person", "ENGLISH"),
    (list(jieba.cut("他是一个好人")), "CHINESE")
]

word_to_ix = {}
for sent, _ in data + test_data:
    for word in sent:
        if word not in word_to_ix:
            word_to_ix[word] = len(word_to_ix)
            
print(word_to_ix)

VOCAB_SIZE = len(word_to_ix)
NUM_LABELS = 2

Building prefix dict from the default dictionary ...
Loading model from cache /tmp/jieba.cache
Loading model cost 0.717 seconds.
Prefix dict has been built succesfully.


{'I': 0, 'love': 1, 'you': 2, 'very': 3, 'much': 4, '我': 5, '非常': 6, '爱': 7, '你': 8, 'Do': 9, 'her': 10, '喜欢': 11, '她': 12, '吗': 13, 'H': 14, 'e': 15, ' ': 16, 'i': 17, 's': 18, 'a': 19, 'g': 20, 'o': 21, 'd': 22, 'p': 23, 'r': 24, 'n': 25, '他': 26, '是': 27, '一个': 28, '好人': 29}


In [34]:
class BoWClassifier(nn.Module):
    def __init__(self, num_labels, vocab_size):
        super(BoWClassifier, self).__init__()
        
        self.layer = nn.Sequential(
            nn.Linear(vocab_size, num_labels),
            nn.LogSoftmax()
        )
        #self.linear = nn.Linear(vocab_size, num_labels)
    
    def forward(self, bow_vec):
        #bow_vec = F.log_softmax(self.linear(bow_vec))
        bow_vec = self.layer(bow_vec)
        return bow_vec

In [35]:
def make_bow_vector(sentence, word_to_ix):
    vec = torch.zeros(len(word_to_ix))
    for word in sentence:
        vec[word_to_ix[word]] += 1
    return vec.view(1, -1)

def make_target(label, label_to_ix):
    return torch.LongTensor([label_to_ix[label]])

In [36]:
model = BoWClassifier(NUM_LABELS, VOCAB_SIZE)
print(model)
for param in model.parameters():
    print(param)

BoWClassifier (
  (layer): Sequential (
    (0): Linear (30 -> 2)
    (1): LogSoftmax ()
  )
)
Parameter containing:

Columns 0 to 9 
-0.0302 -0.0156  0.0214 -0.0253 -0.1313  0.1603 -0.1102  0.1017  0.1098  0.0789
-0.0288  0.0627  0.1672 -0.0322  0.0121 -0.1104  0.0701 -0.0768 -0.0674 -0.1307

Columns 10 to 19 
 0.1710  0.1106 -0.0681 -0.1487  0.0702  0.0066  0.1374  0.1333  0.1441  0.1202
 0.0681  0.1035  0.1222 -0.0319 -0.1759 -0.1701  0.0913  0.0453  0.1785  0.0587

Columns 20 to 29 
-0.1515  0.1204 -0.1683 -0.0829 -0.1206 -0.1609  0.1381  0.0623 -0.1467  0.0340
 0.0906 -0.0736 -0.0802 -0.0197  0.1056 -0.1015 -0.1449 -0.1558 -0.0190 -0.0112
[torch.FloatTensor of size 2x30]

Parameter containing:
 0.1492
-0.1475
[torch.FloatTensor of size 2]



In [37]:
sample = data[0]
bow_vector = make_bow_vector(sample[0], word_to_ix)
print(type(bow_vector))
log_probs = model(Variable(bow_vector))
print(log_probs)

<class 'torch.FloatTensor'>
Variable containing:
-0.7263 -0.6610
[torch.FloatTensor of size 1x2]



In [38]:
label_to_ix = { "CHINESE": 0, "ENGLISH": 1 }

In [39]:
for instance, label in test_data:
    bow_vec = Variable(make_bow_vector(instance, word_to_ix))
    log_probs = model(bow_vec)
    print(log_probs)

Variable containing:
-0.2965 -1.3604
[torch.FloatTensor of size 1x2]

Variable containing:
-0.3982 -1.1134
[torch.FloatTensor of size 1x2]



In [40]:
print(next(model.parameters()))
print(next(model.parameters())[:, word_to_ix["喜欢"]])

Parameter containing:

Columns 0 to 9 
-0.0302 -0.0156  0.0214 -0.0253 -0.1313  0.1603 -0.1102  0.1017  0.1098  0.0789
-0.0288  0.0627  0.1672 -0.0322  0.0121 -0.1104  0.0701 -0.0768 -0.0674 -0.1307

Columns 10 to 19 
 0.1710  0.1106 -0.0681 -0.1487  0.0702  0.0066  0.1374  0.1333  0.1441  0.1202
 0.0681  0.1035  0.1222 -0.0319 -0.1759 -0.1701  0.0913  0.0453  0.1785  0.0587

Columns 20 to 29 
-0.1515  0.1204 -0.1683 -0.0829 -0.1206 -0.1609  0.1381  0.0623 -0.1467  0.0340
 0.0906 -0.0736 -0.0802 -0.0197  0.1056 -0.1015 -0.1449 -0.1558 -0.0190 -0.0112
[torch.FloatTensor of size 2x30]

Variable containing:
 0.1106
 0.1035
[torch.FloatTensor of size 2]



In [45]:
loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)

for epoch in range(100):
    for instance, label in data:
        model.zero_grad()
        
        bow_vec =  Variable(make_bow_vector(instance, word_to_ix))
        target = Variable(make_target(label, label_to_ix))
        
        log_probs = model(bow_vec)
        
        loss = loss_function(log_probs, target)
    if not epoch % 20:
        print("epoch {}/100: {}".format(epoch, loss))
        
        loss.backward()
        optimizer.step()

epoch 0/100: Variable containing:
1.00000e-02 *
  6.5788
[torch.FloatTensor of size 1]

epoch 20/100: Variable containing:
1.00000e-02 *
  6.1852
[torch.FloatTensor of size 1]

epoch 40/100: Variable containing:
1.00000e-02 *
  5.8355
[torch.FloatTensor of size 1]

epoch 60/100: Variable containing:
1.00000e-02 *
  5.5226
[torch.FloatTensor of size 1]

epoch 80/100: Variable containing:
1.00000e-02 *
  5.2411
[torch.FloatTensor of size 1]



In [46]:
for instance, label in test_data:
    bow_vec = Variable(make_bow_vector(instance, word_to_ix))
    log_probs = model(bow_vec)
    print(log_probs)
    
print(next(model.parameters()))
print(next(model.parameters())[:, word_to_ix["喜欢"]])

Variable containing:
-0.1799 -1.8038
[torch.FloatTensor of size 1x2]

Variable containing:
-0.2464 -1.5215
[torch.FloatTensor of size 1x2]

Parameter containing:

Columns 0 to 9 
-0.0302 -0.0156  0.0214 -0.0253 -0.1313  0.1603 -0.1102  0.1017  0.3898  0.0789
-0.0288  0.0627  0.1672 -0.0322  0.0121 -0.1104  0.0701 -0.0768 -0.3473 -0.1307

Columns 10 to 19 
 0.1710  0.3905  0.2118  0.1313  0.0702  0.0066  0.1374  0.1333  0.1441  0.1202
 0.0681 -0.1765 -0.1578 -0.3119 -0.1759 -0.1701  0.0913  0.0453  0.1785  0.0587

Columns 20 to 29 
-0.1515  0.1204 -0.1683 -0.0829 -0.1206 -0.1609  0.1381  0.0623 -0.1467  0.0340
 0.0906 -0.0736 -0.0802 -0.0197  0.1056 -0.1015 -0.1449 -0.1558 -0.0190 -0.0112
[torch.FloatTensor of size 2x30]

Variable containing:
 0.3905
-0.1765
[torch.FloatTensor of size 2]



In [65]:
# word embeddings
ix_to_word = dict(enumerate(list(jieba.cut("我喜欢你"))))
word_to_ix = dict(zip(ix_to_word.values(), ix_to_word.keys()))
embeds = nn.Embedding(3, 6)
lookup_tensor = torch.LongTensor([word_to_ix["喜欢"]])
print(embeds(Variable(lookup_tensor)))
lookup_tensor

Variable containing:
-1.0565 -0.9923 -0.3660  0.6203  0.7167  0.5366
[torch.FloatTensor of size 1x6]




 1
[torch.LongTensor of size 1]

In [66]:
### N-gram Language Modeling

In [184]:
CONTEXT_SIZE = 2
EMBEDDING_DIM = 10

raw_sen = """前天久违的（大概隔了一个月）把自己割了。
由于忍了太久的缘故，下手不知轻重，割得有些深，血不是一点一点渗在皮肤表面上的，
而是直接大颗大颗的涌出来，给我的感觉是凉凉的，很多，顺着我的手腕汇成一股，
沿着我的手掌地在地上，我的拖鞋上，哪里都是血。浣熊在敲门，又干嚎，我不记得了，
总之很混乱，我不得不走出去。然后他吓坏了，一直哭，声音很大，
隔壁人家以为出了事来敲我们的门。血滴了很多在房间和餐厅的地板上，
我拿一些纸巾摁在手腕上止血，但没什么用。我换了很多张纸巾，
浣熊一直蹲在地上哭，“天哪天哪”地叫着，我哭不出来，我甚至一点表情都做不出来，
换句话说，我那个时候很奇怪的麻木了，我完全不知道先做什么，要安抚浣熊，
还是给自己处理伤口，我就呆坐在那里。然后我小小声的说“没事的”。
"""

import re

raw_sen = re.sub(r'[!。（），”“\n]', r'', raw_sen)
raw_sen

test_sentence = list(jieba.cut(raw_sen))
print(len(test_sentence))
print(test_sentence)

185
['前天', '久违', '的', '大概', '隔', '了', '一个月', '把', '自己', '割', '了', '由于', '忍', '了', '太久', '的', '缘故', '下手', '不知轻重', '割', '得', '有些', '深血', '不是', '一点一点', '渗在', '皮肤', '表面', '上', '的', '而是', '直接', '大颗', '大颗', '的', '涌出来', '给', '我', '的', '感觉', '是', '凉凉的', '很多', '顺着', '我', '的', '手腕', '汇成', '一股', '沿着', '我', '的', '手掌', '地', '在', '地上', '我', '的', '拖鞋', '上', '哪里', '都', '是', '血', '浣熊', '在', '敲门', '又', '干嚎', '我', '不', '记得', '了', '总之', '很', '混乱', '我', '不得不', '走', '出去', '然后', '他', '吓坏', '了', '一直', '哭', '声音', '很大', '隔壁', '人家', '以为', '出', '了', '事来', '敲', '我们', '的', '门', '血滴', '了', '很多', '在', '房间', '和', '餐厅', '的', '地板', '上', '我', '拿', '一些', '纸巾', '摁', '在', '手腕', '上', '止血', '但', '没什么', '用', '我', '换', '了', '很多', '张', '纸巾', '浣熊', '一直', '蹲', '在', '地上', '哭天', '哪天', '哪地', '叫', '着', '我', '哭', '不', '出来', '我', '甚至', '一点', '表情', '都', '做', '不', '出来', '换句话说', '我', '那个', '时候', '很', '奇怪', '的', '麻木', '了', '我', '完全', '不', '知道', '先', '做', '什么', '要', '安抚', '浣熊', '还是', '给', '自己', '处理', '伤口', '我', '就', '呆坐', '在', '那里', '然后', '我

In [185]:
### 3-gram
trigrams = [([test_sentence[i], test_sentence[i+1]], test_sentence[i+2]) for i in range(len(test_sentence) - 2)]
trigrams[:3]

[(['前天', '久违'], '的'), (['久违', '的'], '大概'), (['的', '大概'], '隔')]

In [250]:
vocab = set(test_sentence)
word_to_ix = {word: i for i, word in enumerate(vocab)}
ix_to_word = {i: word for i, word in enumerate(vocab)}
ix_to_word

{0: '甚至',
 1: '割',
 2: '声音',
 3: '血滴',
 4: '没什么',
 5: '什么',
 6: '给',
 7: '不',
 8: '那里',
 9: '小',
 10: '人家',
 11: '在',
 12: '皮肤',
 13: '然后',
 14: '汇成',
 15: '哪地',
 16: '事来',
 17: '就',
 18: '都',
 19: '摁',
 20: '下手',
 21: '以为',
 22: '一点一点',
 23: '哪里',
 24: '还是',
 25: '做',
 26: '而是',
 27: '呆坐',
 28: '自己',
 29: '出来',
 30: '大颗',
 31: '出去',
 32: '着',
 33: '把',
 34: '地',
 35: '隔',
 36: '用',
 37: '叫',
 38: '纸巾',
 39: '拿',
 40: '血',
 41: '张',
 42: '说',
 43: '一个月',
 44: '记得',
 45: '安抚',
 46: '止血',
 47: '上',
 48: '太久',
 49: '他',
 50: '敲',
 51: '哪天',
 52: '换句话说',
 53: '不知轻重',
 54: '地上',
 55: '很',
 56: '地板',
 57: '感觉',
 58: '凉凉的',
 59: '换',
 60: '不是',
 61: '一点',
 62: '处理',
 63: '很多',
 64: '沿着',
 65: '渗在',
 66: '缘故',
 67: '很大',
 68: '门',
 69: '我们',
 70: '得',
 71: '拖鞋',
 72: '没事',
 73: '那个',
 74: '有些',
 75: '深血',
 76: '要',
 77: '出',
 78: '但',
 79: '手腕',
 80: '顺着',
 81: '我',
 82: '手掌',
 83: '混乱',
 84: '大概',
 85: '小声',
 86: '麻木',
 87: '蹲',
 88: '完全',
 89: '哭',
 90: '走',
 91: '时候',
 92: '伤口',
 93: '一直',


In [251]:
class NGramLanguageModeler(nn.Module):
    
    def __init__(self, vocab_size, embedding_dim, context_size):
        super(NGramLanguageModeler, self).__init__()
        
        self.embeddings = nn.Embedding(vocab_size, embedding_dim)
        self.linear1 = nn.Linear(context_size * embedding_dim, 128)
        self.linear2 = nn.Linear(128, vocab_size)
    
    def forward(self, X):
        embeds = self.embeddings(X).view(1, -1)
        out = F.relu(self.linear1(embeds))
        out = self.linear2(out)
        log_probs = F.log_softmax(out)
        return log_probs

In [252]:
# losses = []
loss_function = nn.NLLLoss()
model = NGramLanguageModeler(len(vocab), EMBEDDING_DIM, CONTEXT_SIZE)
optimizer = optim.SGD(model.parameters(), lr=.001)

for epoch in range(100):
    total_loss = torch.Tensor([0])
    for context, target in trigrams:
#         print(context)
        context_idxs = list(map(lambda w: word_to_ix[w], context))
        context_var = Variable(
            torch.LongTensor(context_idxs).view(-1, 1)
        )
#         print(context_var.size())
        model.zero_grad()
        log_probs = model(context_var)
        
#         loss = loss_function(log_probs, Variable(torch.LongTensor([word_to_ix[target]])))    
        loss = loss_function(
            log_probs, 
            Variable(torch.LongTensor([word_to_ix[target]]))
        )
        loss.backward()
        optimizer.step()
        total_loss += loss.data
    if not epoch % 20:
        print("Epoch: {}/100 Loss: {}".format(epoch, total_loss))

Epoch: 0/100 Loss: 
 877.4662
[torch.FloatTensor of size 1]

Epoch: 20/100 Loss: 
 765.2822
[torch.FloatTensor of size 1]

Epoch: 40/100 Loss: 
 688.0721
[torch.FloatTensor of size 1]

Epoch: 60/100 Loss: 
 612.6309
[torch.FloatTensor of size 1]

Epoch: 80/100 Loss: 
 530.2733
[torch.FloatTensor of size 1]



In [271]:
input_test = Variable(torch.LongTensor([[word_to_ix["我"]], [word_to_ix["感觉"]]]).view(-1, 1))
ix_to_word[np.argmin(model(input_test).data.numpy())]

'小'

In [272]:
ix_to_word[np.argmax(model(input_test).data.numpy())]

'在'

In [281]:
a = model(input_test).data.numpy()
a[0][11]

-2.8453712

In [276]:
np.argmax(model(input_test).data.numpy())

11

In [282]:
### CBOW

In [283]:
CONTEXT_SIZE = 2
EMBEDDING_DIM = 10

In [284]:
data = []
for i in range(2, len(raw_sen) - 2):
    context = [raw_sen[i-2], raw_sen[i-1], raw_sen[i+1], raw_sen[i+2]]
    target = raw_sen[i]
    data.append( (context, target) )

print(data[:5])

[(['前', '天', '违', '的'], '久'), (['天', '久', '的', '大'], '违'), (['久', '违', '大', '概'], '的'), (['违', '的', '概', '隔'], '大'), (['的', '大', '隔', '了'], '概')]


In [294]:
class CBOW(nn.Module):
    def __init__(self, vocab_size, embedding_dim, context_size):
        super(CBOW, self).__init__()
        
        self.embeds = nn.Embedding(vocab_size, embedding_dim)
        self.linear1 = nn.Linear(context_size * embedding_dim, 128)
        self.linear2 = nn.Linear(128, vocab_size)
    
    def forward(self, X):
        pass

In [295]:
# model = CBOW()

In [296]:
# loss_function = nn.NLLLoss()
# optimizer = optim.SGD(model.parameters(), lr=.001)    

In [297]:
# LSTM pytorch