In [22]:
# 全家桶 使用 Pytorch 必备 具体功能且看下文
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable

In [62]:
class CNN(nn.Module):
    def __init__(self, output_dimesion, vocab_size, dropout_rate, emb_dim, max_len, n_filters, init_W=None):
        # number_filters
        super(CNN, self).__init__()

        self.max_len = max_len
        max_features = vocab_size
        vanila_dimension = 200 #倒数第二层的节点数
        projection_dimension = output_dimesion #输出层的节点数
        self.qual_conv_set = {}    

        '''Embedding Layer'''
        if init_W is None:
            # 先尝试使用embedding随机赋值
            self.embedding = nn.Embedding(max_features, emb_dim)

        self.conv1 = nn.Sequential(
            # 卷积层的激活函数
            nn.Conv2d(1, n_filters, kernel_size=(3, emb_dim)),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(max_len - 3 + 1, 1))
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(1, n_filters, kernel_size=(4, emb_dim)),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(max_len - 4 + 1, 1))
        )
        self.conv3 = nn.Sequential(
            nn.Conv2d(1, n_filters, kernel_size=(5, emb_dim)),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(max_len - 5 + 1, 1))
        )
        
        '''Dropout Layer'''
        #layer = Dense(vanila_dimension, activation='tanh')(flatten_layer)
        #layer = Dropout(dropout_rate)(layer)
        self.layer = nn.Linear(300, vanila_dimension)
        self.dropout = nn.Dropout(dropout_rate)

        '''Projection Layer & Output Layer'''
        #output_layer = Dense(projection_dimension, activation='tanh')(layer)
        self.output_layer = nn.Linear(vanila_dimension, projection_dimension)

        

    def forward(self, input):
        embeds = self.embedding(input)
        # concatenate the tensors
        x = self.conv_1(embeds)
        y = self.conv_2(embeds)
        z = self.conv_3(embeds)
        flatten = torch.cat((x,view(-1), y.view(-1), z.view(-1)))
        
        out = F.tanh(self.layer(flatten))
        out = self.dropout(out)
        out = F.tanh(self.output_layer(out))  
        
cnn = CNN(50, 8000, 0.5, 50, 150, 100)

In [63]:
cnn

CNN(
  (embedding): Embedding(8000, 50)
  (conv1): Sequential(
    (0): Conv2d (1, 100, kernel_size=(3, 50), stride=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=(148, 1), stride=(148, 1), dilation=(1, 1))
  )
  (conv2): Sequential(
    (0): Conv2d (1, 100, kernel_size=(4, 50), stride=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=(147, 1), stride=(147, 1), dilation=(1, 1))
  )
  (conv3): Sequential(
    (0): Conv2d (1, 100, kernel_size=(5, 50), stride=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=(146, 1), stride=(146, 1), dilation=(1, 1))
  )
  (layer): Linear(in_features=300, out_features=200)
  (dropout): Dropout(p=0.5)
  (output_layer): Linear(in_features=200, out_features=50)
)

In [13]:
from data_manager import Data_Factory
import pprint
data_factory = Data_Factory()

R, D_all = data_factory.load("./data/preprocessed/ml-1m/")

Load preprocessed rating data - ./data/preprocessed/ml-1m//ratings.all
Load preprocessed document data - ./data/preprocessed/ml-1m//document.all


In [59]:
CNN_X = D_all['X_sequence']
print(len(CNN_X))
print(len(CNN_X[3]))
print(CNN_X[3])
print(CNN_X)

3544
95
[2497, 7513, 6630, 4814, 1994, 2754, 3900, 5018, 4346, 7235, 2533, 2610, 2633, 4156, 249, 2161, 1127, 146, 6530, 5018, 337, 6530, 6985, 6530, 4157, 3071, 6530, 3900, 1500, 4316, 7833, 5018, 5150, 7102, 6530, 6476, 6530, 1394, 4450, 6751, 1238, 7824, 6530, 740, 3773, 7062, 5917, 2514, 1171, 3782, 5251, 2992, 2353, 1496, 7819, 6530, 2101, 1496, 7446, 5832, 1052, 4109, 1865, 7355, 7769, 1496, 3590, 2271, 7458, 5529, 6087, 475, 6530, 2063, 1908, 2497, 2754, 3379, 4161, 5526, 6474, 2535, 7934, 3782, 6530, 5150, 807, 1354, 172, 4156, 355, 3417, 249, 2168, 1649]


AttributeError: 'list' object has no attribute 'shape'

In [9]:
len(D_all['X_vocab'])

8000

In [48]:
embeds = nn.Embedding(8000, 5)
test = CNN_X[3]
tensor = torch.LongTensor(list(map(int, test)))
me_embed = embeds(Variable(tensor))
print(me_embed)

Variable containing:
 1.1375  0.6195  0.1585  1.0799  0.1302
-0.5405 -0.9589 -1.3669  1.2314  1.9734
-0.4789  0.5938  0.1744 -0.0176 -0.0497
-0.2310 -1.1388  0.7172 -0.4343  0.7839
 0.5238  0.7899 -0.5901  1.0298  0.3844
-1.4921  1.8542 -1.1308  0.7227 -1.6314
-0.9999  0.4745  0.3701  0.2189  0.4824
 0.0339  1.6608  0.5456 -2.0539  0.0004
 0.0580  0.9189  1.2705  1.6964 -0.6851
-0.4247 -1.4672  0.5220  0.0431 -0.2025
 1.0033 -1.0548  1.1176  0.5650 -1.4660
-0.8414  1.8125  1.8854 -1.6015 -0.6787
-0.8838  0.0412 -0.6423  1.7509 -1.9570
 0.5814 -1.5999  0.6436  1.4211 -1.3188
-0.4954 -0.6092 -1.6808 -1.0020  0.1801
-0.9836 -0.0847 -1.2562 -0.1226 -0.2108
-1.3440 -0.1142 -1.2649  0.2782 -1.4181
-0.0528  0.0718 -0.6514  1.1687 -1.0889
 1.7251  1.5146 -0.6547 -0.2933 -1.5057
 0.0339  1.6608  0.5456 -2.0539  0.0004
-0.7024 -0.6674 -1.9162 -0.1312  1.1091
 1.7251  1.5146 -0.6547 -0.2933 -1.5057
 0.2818  0.5606 -0.3546 -0.6588 -0.7651
 1.7251  1.5146 -0.6547 -0.2933 -1.5057
-0.7891 -1.7500  0.

In [49]:
map(int, test)

<map at 0x7fd13d822ac8>