# Content
- N-gram语言模型简介
- 神经语言模型简介
- 神经语言模型
- Word2Vec(CBOW)
       
       注意.view((1,-1))调整tensor维度为loss_function做准备
    
- CBOW网络拓扑
- CBOW构建路程

# N-gram语言模型简介
![deque](pic/ngram.png)
![deque](pic/ngram1.png)
![deque](pic/ngram2.png)
![deque](pic/ngram3.png)

# 神经语言模型简介
![deque](pic/神经语言模型.png)

In [5]:
# 神经语言模型

import torch 
import torch.autograd as autograd
import torch.nn as nn 
import torch.nn.functional as F 
import torch.optim as optim

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

torch.manual_seed(1)

<torch._C.Generator at 0x18ad347f4b0>

In [2]:
word_to_ix = {'hello':0, 'world':1}

embeds = nn.Embedding(2, 5)
# 2 单词数量 ； 5 单词维度

lookup_tensor = torch.LongTensor([word_to_ix['hello']])
print('lookup_tensor:\n',lookup_tensor)

lookup_tensor = torch.LongTensor([0,1])

hello_embed = embeds(autograd.Variable(lookup_tensor))
a = hello_embed[0]
b = hello_embed[1]

sum_embed = hello_embed.sum(0)

print(sum_embed)
print('hello_embed:\n',hello_embed)

lookup_tensor:
 tensor([0])
tensor([ 0.4952, -1.2558,  0.4434, -0.4063, -1.0150], grad_fn=<SumBackward1>)
hello_embed:
 tensor([[ 0.6614,  0.2669,  0.0617,  0.6213, -0.4519],
        [-0.1661, -1.5228,  0.3817, -1.0276, -0.5631]],
       grad_fn=<EmbeddingBackward>)


In [12]:
test_sentence = """When forty winters shall besiege thy brow,
And dig deep trenches in thy beauty's field,
Thy youth's proud livery so gazed on now,
Will be a totter'd weed of small worth held:
Then being asked, where all thy beauty lies,
Where all the treasure of thy lusty days;
To say, within thine own deep sunken eyes,
Were an all-eating shame, and thriftless praise.
How much more praise deserv'd thy beauty's use,
If thou couldst answer 'This fair child of mine
Shall sum my count, and make my old excuse,'
Proving his beauty by succession thine!
This were to be new made when thou art old,
And see thy blood warm when thou feel'st it cold.""".split()

# 构建trigram训练数据集 ：(['When', 'forty'], 'winters')
trigrams = [([test_sentence[i], test_sentence[i+1]], test_sentence[i+2]) 
            for i in range(len(test_sentence) - 2)]
print('trigrams[:3]:\n',trigrams[:3])

vocab = set(test_sentence)

word_to_ix = {word: i for i,word in enumerate(vocab)}

class NNLM(nn.Module):
    
    def __init__(self, vacab_size, embedding_dim, context_size):
        super(NNLM,self).__init__()
        
        self.input_size = vacab_size # vocab_size / one-hot encoding
        self.output_size = embedding_dim
        self.context_size = context_size
        
        self.embeddings = nn.Embedding(self.input_size, self.output_size)
        
        self.fc1 = nn.Linear(self.context_size * self.output_size, 128)
        
        self.fc2 = nn.Linear(128, self.input_size)
        
    def forward(self, inputs):
        
        embeds = self.embeddings(inputs).view((1,-1))
        # embeds.size ——> output_size / embedding_dim
        outs = F.relu(self.fc1(embeds))
        
        outs = self.fc2(outs)
        
        outs = F.log_softmax(outs, dim=1)
        
        return outs

losses = []

# 超参
vacab_size = len(vocab) # hot-one encoding / cocab_size
embedding_dim =  5 # EMBEDDING_DIM
context_size = 2
learning_rate = 0.001

model = NNLM(vacab_size, embedding_dim, context_size).to(device)

loss_function = nn.NLLLoss()

optimizer = optim.SGD(model.parameters(), lr=learning_rate)

for epoch in range(10):
    
    total_loss = torch.Tensor([0])
    
    for context, target in trigrams:
        
        context_idxs = [word_to_ix[w] for w in context]
        
        context_var = autograd.Variable(torch.LongTensor(context_idxs)).to(device)
        
        model.zero_grad()
        
        log_probs = model(context_var)
        
        loss = loss_function(log_probs, 
        autograd.Variable(torch.LongTensor([word_to_ix[target]])).to(device))
        
        loss.backward()
        optimizer.step()
        
        total_loss += loss.data
    losses.append(total_loss)
    
print(losses)
        

trigrams[:3]:
 [(['When', 'forty'], 'winters'), (['forty', 'winters'], 'shall'), (['winters', 'shall'], 'besiege')]
[tensor([518.4762]), tensor([516.1401]), tensor([513.8240]), tensor([511.5272]), tensor([509.2499]), tensor([506.9915]), tensor([504.7505]), tensor([502.5263]), tensor([500.3181]), tensor([498.1255])]


# NNLM例子网络拓扑
![deque](pic/神经语言模型1.png)

In [11]:
# Word2Vec(CBOW)

# =====数据预处理
raw_text = """We are about to study the idea of a computational process.
Computational processes are abstract beings that inhabit computers.
As they evolve, processes manipulate other abstract things called data.
The evolution of a process is directed by a pattern of rules
called a program. People create programs to direct processes. In effect,
we conjure the spirits of the computer with our spells.""".split()

vocab = set(raw_text)

vocab_size = len(vocab)

# 词典
word_to_ix = {word: i for i, word in enumerate(vocab)}

data = []

# 构造数据集
for i in range(2, len(raw_text) - 2):
    context = [raw_text[i - 2], raw_text[i - 1],
               raw_text[i + 1], raw_text[i + 2]]
    target = raw_text[i]

    data.append((context, target))

# =====CBOW模型
class CBOW(nn.Module):

    def __init__(self, vacab_size, embedding_dim, window_size):
        super(CBOW, self).__init__()
        
        self.input_size = vacab_size
        self.output_size = embedding_dim
        self.window_size = window_size

        self.window_size = window_size

        self.embeddings = nn.Embedding(self.input_size, self.output_size)

        self.fc = nn.Linear(self.output_size, self.input_size)

    def forward(self, inputs):

        embeds_avg = (self.embeddings(inputs).sum(0) / self.window_size).view((1, -1))
        # embeds矩阵按行求和在求平均,
        # 注意用view((1,-1))修改tensor维度否则loss_function计算报错

        outs = self.fc(embeds_avg)

        outs = F.log_softmax(outs, dim=1)

        return outs

# =====模型配置

# 超参
vocab_size = len(vocab)
embedding_size = 5
window_size = 2
learning =0.001

model = CBOW(vocab_size , embedding_size, window_size).to(device)

loss_function = nn.NLLLoss()

optimizer = optim.SGD(model.parameters(), lr=learning_rate)

# =====training
losses = []

for epoch in range(100):

    total_loss = torch.Tensor([0])

    for context, target in data:
        
        context_idxs = [word_to_ix[w] for w in context]
        
        context_var = autograd.Variable(torch.LongTensor(context_idxs)).to(device)
        
        model.zero_grad()
        
        log_probs = model(context_var)
        
        loss = loss_function(log_probs, autograd.Variable(torch.LongTensor([word_to_ix[target]])).to(device))
        
        loss.backward()
        
        optimizer.step()

        total_loss += loss.data

    losses.append(total_loss)

print(losses)

[tensor([239.4652]), tensor([239.0371]), tensor([238.6110]), tensor([238.1869]), tensor([237.7647]), tensor([237.3443]), tensor([236.9259]), tensor([236.5092]), tensor([236.0945]), tensor([235.6816]), tensor([235.2704]), tensor([234.8611]), tensor([234.4535]), tensor([234.0477]), tensor([233.6437]), tensor([233.2414]), tensor([232.8409]), tensor([232.4420]), tensor([232.0448]), tensor([231.6494]), tensor([231.2556]), tensor([230.8634]), tensor([230.4729]), tensor([230.0841]), tensor([229.6969]), tensor([229.3112]), tensor([228.9273]), tensor([228.5449]), tensor([228.1640]), tensor([227.7848]), tensor([227.4071]), tensor([227.0310]), tensor([226.6564]), tensor([226.2834]), tensor([225.9119]), tensor([225.5419]), tensor([225.1734]), tensor([224.8063]), tensor([224.4408]), tensor([224.0768]), tensor([223.7142]), tensor([223.3531]), tensor([222.9935]), tensor([222.6353]), tensor([222.2785]), tensor([221.9232]), tensor([221.5692]), tensor([221.2168]), tensor([220.8657]), tensor([220.5160]),

# CBOW网络拓扑
![deque](pic/CBOW.png)

# CBOW构建路程
![deque](pic/CBOW0.png)
![deque](pic/CBOW1.png)
![deque](pic/CBOW2.png)
![deque](pic/CBOW3.png)
![deque](pic/CBOW4.png)
![deque](pic/CBOW5.png)
