In [3]:
# -*- coding: utf-8 -*-
import os
import random
import math
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable

# generator.py

In [2]:
class Generator(nn.Module):
    """Generator """
    def __init__(self, num_emb, emb_dim, hidden_dim, use_cuda):
        super(Generator, self).__init__()
        self.num_emb = num_emb
        self.emb_dim = emb_dim
        self.hidden_dim = hidden_dim
        self.use_cuda = use_cuda
        self.emb = nn.Embedding(num_emb, emb_dim) #num_emb嵌入词典的大小；emb_dim每个嵌入向量的维度；
        # When batch_first is True, then the input and output tensors are provided as (batch, seq, feature).
        self.lstm = nn.LSTM(emb_dim, hidden_dim, batch_first=True) # 为什么不用GRU
        self.lin = nn.Linear(hidden_dim, num_emb)
        self.softmax = nn.LogSoftmax()
        self.init_params()

    def forward(self, x):
        """
        Args:
            x: (batch_size, seq_len), sequence of tokens generated by generator
        """
        emb = self.emb(x)
        h0, c0 = self.init_hidden(x.size(0))
        output, (h, c) = self.lstm(emb, (h0, c0)) # output:(batch_size,seq_len,hidden_dim)
        pred = self.softmax(self.lin(output.contiguous().view(-1, self.hidden_dim))) #这里不要声明dim吗？
        return pred

    def step(self, x, h, c):
        """
        Args:
            x: (batch_size,  1), sequence of tokens generated by generator
            h: (1, batch_size, hidden_dim), lstm hidden state
            c: (1, batch_size, hidden_dim), lstm cell state
        """
        emb = self.emb(x)
        output, (h, c) = self.lstm(emb, (h, c))
        pred = F.softmax(self.lin(output.view(-1, self.hidden_dim)), dim=1)
        return pred, h, c


    def init_hidden(self, batch_size):
        h = Variable(torch.zeros((1, batch_size, self.hidden_dim)))
        c = Variable(torch.zeros((1, batch_size, self.hidden_dim)))
        if self.use_cuda:
            h, c = h.cuda(), c.cuda()
        return h, c

    def init_params(self):
        for param in self.parameters():
            param.data.uniform_(-0.05, 0.05)

    def sample(self, batch_size, seq_len, x=None):
        res = []
        flag = False # whether sample from zero
        if x is None:
            flag = True
        if flag:
            x = Variable(torch.zeros((batch_size, 1)).long())
        if self.use_cuda:
            x = x.cuda()
        h, c = self.init_hidden(batch_size)
        samples = []
        if flag:
            for i in range(seq_len):
                output, h, c = self.step(x, h, c)
                x = output.multinomial(1)
                samples.append(x)
        else:
            given_len = x.size(1)
            lis = x.chunk(x.size(1), dim=1)
            for i in range(given_len):
                output, h, c = self.step(lis[i], h, c)
                samples.append(lis[i])
            x = output.multinomial(1)
            for i in range(given_len, seq_len):
                samples.append(x)
                output, h, c = self.step(x, h, c)
                x = output.multinomial(1)
        output = torch.cat(samples, dim=1)
        return output


In [3]:
generator = Generator(num_emb = 5000,emb_dim = 128,hidden_dim = 64,use_cuda = 'Ture')

In [7]:
generator.cuda()

Generator(
  (emb): Embedding(5000, 128)
  (lstm): LSTM(128, 64, batch_first=True)
  (lin): Linear(in_features=64, out_features=5000, bias=True)
  (softmax): LogSoftmax()
)

In [12]:
x = torch.LongTensor([[2,50,100],
                       [40,3,1000]]).cuda()
x.shape

torch.Size([2, 3])

In [13]:
'''
emb:(2,3,128)
output:(2,3,64)
pred:(6,5000)
'''
pred = generator(x) # 我的预测结构应该是（6， 128）



In [14]:
pred.shape

torch.Size([6, 5000])

In [17]:
params = list(generator.parameters())

In [18]:
len(params)

7

In [19]:
for name,parameters in generator.named_parameters():
    print(name,':',parameters.size())

emb.weight : torch.Size([5000, 128])
lstm.weight_ih_l0 : torch.Size([256, 128])
lstm.weight_hh_l0 : torch.Size([256, 64])
lstm.bias_ih_l0 : torch.Size([256])
lstm.bias_hh_l0 : torch.Size([256])
lin.weight : torch.Size([5000, 64])
lin.bias : torch.Size([5000])


# discriminator.py

In [1]:
d_filter_sizes = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20]
d_num_filters = [100, 200, 200, 200, 200, 100, 100, 100, 100, 100, 160, 160]
d_dropout = 0.75
d_emb_dim = 64
d_num_class = 2
VOCAB_SIZE = 5000
# discriminator = Discriminator(num_classes, vocab_size, emb_dim, filter_sizes, num_filters, dropout)

In [3]:
len(d_filter_sizes),len(d_num_filters)

(12, 12)

In [6]:
for (n,f) in zip(d_filter_sizes,d_num_filters):
    print('n = %d , f = %d \n'%(n,f))

n = 1 , f = 100 

n = 2 , f = 200 

n = 3 , f = 200 

n = 4 , f = 200 

n = 5 , f = 200 

n = 6 , f = 100 

n = 7 , f = 100 

n = 8 , f = 100 

n = 9 , f = 100 

n = 10 , f = 100 

n = 15 , f = 160 

n = 20 , f = 160 



In [2]:
class Discriminator(nn.Module):
    """A CNN for text classification

    architecture: Embedding >> Convolution >> Max-pooling >> Softmax
    """

    def __init__(self, num_classes, vocab_size, emb_dim, filter_sizes, num_filters, dropout):
        super(Discriminator, self).__init__()
        self.emb = nn.Embedding(vocab_size, emb_dim)
        self.convs = nn.ModuleList([
            nn.Conv2d(1, n, (f, emb_dim)) for (n, f) in zip(num_filters, filter_sizes)
        ])
        self.highway = nn.Linear(sum(num_filters), sum(num_filters))
        self.dropout = nn.Dropout(p=dropout)
        self.lin = nn.Linear(sum(num_filters), num_classes)
        self.softmax = nn.LogSoftmax()
        self.init_parameters()

    def forward(self, x):
        """
        Args:
            x: (batch_size * seq_len)
        """
        emb = self.emb(x).unsqueeze(1)  # batch_size * 1 * seq_len * emb_dim
        convs = [F.relu(conv(emb)).squeeze(3) for conv in self.convs]  # [batch_size * num_filter * length]
        pools = [F.max_pool1d(conv, conv.size(2)).squeeze(2) for conv in convs] # [batch_size * num_filter]
        pred = torch.cat(pools, 1)  # batch_size * num_filters_sum
        highway = self.highway(pred)
        pred = torch.sigmoid(highway) *  F.relu(highway) + (1. - torch.sigmoid(highway)) * pred
        pred = self.softmax(self.lin(self.dropout(pred)))
        return pred

    def init_parameters(self):
        for param in self.parameters():
            param.data.uniform_(-0.05, 0.05)


In [5]:
D = Discriminator(d_num_class, VOCAB_SIZE, d_emb_dim, d_filter_sizes, d_num_filters, d_dropout)

In [6]:
D

Discriminator(
  (emb): Embedding(5000, 64)
  (convs): ModuleList(
    (0): Conv2d(1, 100, kernel_size=(1, 64), stride=(1, 1))
    (1): Conv2d(1, 200, kernel_size=(2, 64), stride=(1, 1))
    (2): Conv2d(1, 200, kernel_size=(3, 64), stride=(1, 1))
    (3): Conv2d(1, 200, kernel_size=(4, 64), stride=(1, 1))
    (4): Conv2d(1, 200, kernel_size=(5, 64), stride=(1, 1))
    (5): Conv2d(1, 100, kernel_size=(6, 64), stride=(1, 1))
    (6): Conv2d(1, 100, kernel_size=(7, 64), stride=(1, 1))
    (7): Conv2d(1, 100, kernel_size=(8, 64), stride=(1, 1))
    (8): Conv2d(1, 100, kernel_size=(9, 64), stride=(1, 1))
    (9): Conv2d(1, 100, kernel_size=(10, 64), stride=(1, 1))
    (10): Conv2d(1, 160, kernel_size=(15, 64), stride=(1, 1))
    (11): Conv2d(1, 160, kernel_size=(20, 64), stride=(1, 1))
  )
  (highway): Linear(in_features=1720, out_features=1720, bias=True)
  (dropout): Dropout(p=0.75)
  (lin): Linear(in_features=1720, out_features=2, bias=True)
  (softmax): LogSoftmax()
)

In [7]:
a = torch.randn(100,30,40)

In [9]:
a.shape

torch.Size([100, 30, 40])

In [10]:
a.size(2)

40

In [11]:
b = F.max_pool1d(a,40)

In [12]:
b.shape

torch.Size([100, 30, 1])

In [7]:
batch_size = 32
emb_dim = 20
hidden_dim = 20

h_0 = torch.randn(1,batch_size,hidden_dim)
c_0 = torch.randn(1,batch_size,hidden_dim)
x = torch.zeros(batch_size,1).long()
emb = nn.Embedding(50,emb_dim)
y = emb(x)
lstm = nn.LSTM(emb_dim,hidden_dim,batch_first = True)
output,(h,c) = lstm(y,(h_0,c_0))

In [8]:
y.shape

torch.Size([32, 1, 20])

In [10]:
output.shape

torch.Size([32, 1, 20])

In [11]:
output

tensor([[[-1.9810e-01, -1.2066e-01,  1.6467e-01,  1.0540e-01,  6.5125e-02,
          -3.8412e-01, -9.1172e-02,  9.3457e-02,  2.0005e-01, -1.2519e-01,
          -4.4131e-02, -6.6144e-02, -3.0389e-01,  6.8024e-02,  1.2933e-01,
           1.1670e-01,  6.7173e-02,  6.2757e-01,  1.8349e-01, -2.5825e-01]],

        [[ 1.3263e-01,  4.0234e-01,  2.3037e-01, -2.4626e-01, -2.9471e-01,
           4.0388e-01,  1.6501e-01, -2.8089e-01,  9.1824e-02, -1.8406e-02,
          -3.0841e-02, -2.3087e-01, -7.2299e-01, -3.6598e-02,  8.1172e-02,
          -1.2660e-01,  3.3301e-02, -3.2666e-01, -2.0666e-02, -1.4617e-01]],

        [[-3.4644e-01,  2.9565e-01,  3.1386e-01, -7.9993e-02,  9.6691e-02,
          -6.4538e-02,  1.2887e-01,  3.3819e-01, -5.2917e-02,  2.5142e-01,
          -1.2581e-01, -9.9608e-02,  6.6515e-02,  1.7201e-02,  6.2637e-02,
           8.6001e-02,  1.7932e-01, -1.2206e-02,  1.5722e-02, -4.2225e-01]],

        [[-3.9014e-01,  8.3860e-02,  4.9311e-01,  1.3096e-02, -1.7987e-01,
           4.237

In [25]:
z_1 = torch.exp(output)
z = z_1.multinomial(1)
k = z_1.multinomial(2)
z.shape

RuntimeError: invalid argument 2: cannot sample n_sample > prob_dist.size(1) samples without replacement at ..\aten\src\TH/generic/THTensorRandom.cpp:286

In [22]:
z_1

tensor([[[0.8203, 0.8863, 1.1790, 1.1112, 1.0673, 0.6811, 0.9129, 1.0980,
          1.2215, 0.8823, 0.9568, 0.9360, 0.7379, 1.0704, 1.1381, 1.1238,
          1.0695, 1.8731, 1.2014, 0.7724]],

        [[1.1418, 1.4953, 1.2591, 0.7817, 0.7447, 1.4976, 1.1794, 0.7551,
          1.0962, 0.9818, 0.9696, 0.7938, 0.4853, 0.9641, 1.0846, 0.8811,
          1.0339, 0.7213, 0.9795, 0.8640]],

        [[0.7072, 1.3440, 1.3687, 0.9231, 1.1015, 0.9375, 1.1375, 1.4024,
          0.9485, 1.2859, 0.8818, 0.9052, 1.0688, 1.0173, 1.0646, 1.0898,
          1.1964, 0.9879, 1.0158, 0.6556]],

        [[0.6770, 1.0875, 1.6374, 1.0132, 0.8354, 1.5277, 1.3577, 1.1613,
          1.1533, 1.1570, 1.1721, 0.7087, 1.5868, 1.0220, 1.4724, 1.0116,
          0.6066, 1.2742, 1.7185, 1.1850]],

        [[0.7550, 0.8964, 0.8842, 0.8423, 0.8918, 0.7280, 0.6139, 0.7885,
          1.2982, 1.7488, 1.2889, 0.9036, 0.6559, 1.0486, 0.8021, 0.8511,
          1.0579, 1.8554, 1.5396, 0.7036]],

        [[1.3033, 0.9471, 1.3069, 1

In [23]:
z

tensor([[0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0]])

In [29]:
import argparse
parser = argparse.ArgumentParser(description='Training Parameter')
parser.add_argument('--cuda', action='store', default=None, type=int)
opt = parser.parse_args()
print(opt)

usage: ipykernel_launcher.py [-h] [--cuda CUDA]
ipykernel_launcher.py: error: unrecognized arguments: -f C:\Users\75155\AppData\Roaming\jupyter\runtime\kernel-558ca72b-ac81-431c-ba0d-2640b67bad83.json


SystemExit: 2

# data_iter.py

# rollout.py

# main()第198行之后

In [7]:
x = Variable(torch.tensor([[2,50,100],
                       [40,3,1000]]))

In [8]:
x.shape

torch.Size([2, 3])

In [9]:
y = x.view((-1,))

In [10]:
y.shape

torch.Size([6])