In [1]:
import numpy as np
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as Data
from torch.autograd import Variable

torch.cuda.set_device(0)

# 引入attribute向量
def get_data():
    attribute_embedding = np.load('generateData/Attribute_Embedding.npy')
    attribute_matrix = torch.from_numpy(attribute_embedding)
    torch_dataset = Data.TensorDataset(attribute_matrix)
    data_loader = Data.DataLoader(dataset=torch_dataset, batch_size=32)
    return data_loader

In [2]:
class AE(nn.Module):
    def __init__(self, input_dim, encoding_dim):
        super(AE, self).__init__()
        self.input_dim = input_dim
        self.encoding_dim = encoding_dim
        self.encoder = nn.Sequential(
                        nn.Linear(self.input_dim, 32),
                        nn.ReLU(),
                        nn.Linear(32, self.encoding_dim))
        self.decoder = nn.Sequential(
                        nn.Linear(self.encoding_dim, 32),
                        nn.ReLU(),
                        nn.Linear(32, self.input_dim))
    
    def forward(self, input):
        m = self.encoder(input)
        out = self.decoder(m)
        return m, out

In [3]:
def l1_penalty(var):
    return torch.abs(var).sum()


input_dim = 61
encoding_dim = 16
model = AE(input_dim, encoding_dim)
    
def train():  
    save_path = "generateData/AE.pkl"
    lr = 1e-2
    epoches = 20
    weight_decay = 1e-5

    train_data = get_data()
    loss_score = 100
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    if torch.cuda.is_available():
        model.cuda()
    for epoch in range(epoches):
        running_loss = 0.0
        for batch_idx, data in enumerate(train_data):
            inputs = Variable(data[0])
            if torch.cuda.is_available():
                inputs = inputs.cuda()
            optimizer.zero_grad()
            mid_reps, outputs = model(inputs)
            mse_loss = F.mse_loss(outputs, inputs)
            l1_reg = weight_decay*l1_penalty(mid_reps)
            loss = mse_loss + l1_reg
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            if batch_idx%50 == 49:
                print('Train Epoch: %d, Batch %d, loss:%.6f' % (epoch+1, (batch_idx+1)/50, running_loss/100))
                if save_path and running_loss<loss_score:
                    loss_score = running_loss
                    torch.save(model, save_path)
                    print("******Save model successful******")
                running_loss = 0.0
            
            
    print("Finished Training")

In [4]:
train()

Train Epoch: 1, Batch 1, loss:0.061869
******Save model successful******
Train Epoch: 1, Batch 2, loss:0.035895
******Save model successful******
Train Epoch: 1, Batch 3, loss:0.028323
******Save model successful******
Train Epoch: 1, Batch 4, loss:0.022615
******Save model successful******
Train Epoch: 1, Batch 5, loss:0.020680
******Save model successful******
Train Epoch: 1, Batch 6, loss:0.019655
******Save model successful******
Train Epoch: 1, Batch 7, loss:0.018477
******Save model successful******
Train Epoch: 1, Batch 8, loss:0.017275
******Save model successful******
Train Epoch: 1, Batch 9, loss:0.016322
******Save model successful******
Train Epoch: 1, Batch 10, loss:0.014579
******Save model successful******
Train Epoch: 1, Batch 11, loss:0.014161
******Save model successful******
Train Epoch: 1, Batch 12, loss:0.013559
******Save model successful******
Train Epoch: 1, Batch 13, loss:0.013124
******Save model successful******
Train Epoch: 2, Batch 1, loss:0.020446
Train Ep

Train Epoch: 14, Batch 3, loss:0.010191
Train Epoch: 14, Batch 4, loss:0.009053
Train Epoch: 14, Batch 5, loss:0.009353
Train Epoch: 14, Batch 6, loss:0.009318
Train Epoch: 14, Batch 7, loss:0.009658
Train Epoch: 14, Batch 8, loss:0.008739
Train Epoch: 14, Batch 9, loss:0.009214
Train Epoch: 14, Batch 10, loss:0.008282
Train Epoch: 14, Batch 11, loss:0.008011
******Save model successful******
Train Epoch: 14, Batch 12, loss:0.008252
Train Epoch: 14, Batch 13, loss:0.008428
Train Epoch: 15, Batch 1, loss:0.016029
Train Epoch: 15, Batch 2, loss:0.011128
Train Epoch: 15, Batch 3, loss:0.010104
Train Epoch: 15, Batch 4, loss:0.008725
Train Epoch: 15, Batch 5, loss:0.009298
Train Epoch: 15, Batch 6, loss:0.009391
Train Epoch: 15, Batch 7, loss:0.009696
Train Epoch: 15, Batch 8, loss:0.008726
Train Epoch: 15, Batch 9, loss:0.009204
Train Epoch: 15, Batch 10, loss:0.008230
Train Epoch: 15, Batch 11, loss:0.008051
Train Epoch: 15, Batch 12, loss:0.008330
Train Epoch: 15, Batch 13, loss:0.00835

In [5]:
test_loader = get_data()
for batch_idx, data in enumerate(test_loader):
    inputs = Variable(data[0])
    if torch.cuda.is_available():
        inputs = inputs.cuda()
    mid_rep, out = model(inputs)
    highLevel_representation = mid_rep.data.cpu().numpy()
    if batch_idx == 0:
        attribute_embedding_high = highLevel_representation
    else:
        attribute_embedding_high = np.concatenate((attribute_embedding_high, highLevel_representation))
np.save("generateData/Attribute_High_Embedding", attribute_embedding_high)
print("OK!")

OK!


In [7]:
import numpy as np
import pandas as pd
from pandas import DataFrame
from gensim.models.poincare import PoincareModel

pd.set_option('display.max_columns', None)  # 显示时不折叠

# 获取每个shop的category向量，并组成category feature矩阵
def get_category_matrix():
    poincareModel = PoincareModel.load("generateData/Category_Hierarchy_Model")
    df_shop = pd.read_csv("newData/Shop_data_10.csv", encoding="utf-8", low_memory=False)
    for shop in df_shop.iterrows():
        idx = shop[1]['shop_id']
        category = shop[1]['small_category']
        category_embedding = poincareModel.kv.get_vector(category)
        if idx==0:
            category_matrix = np.reshape(category_embedding, (1,16))
        else:
            category_embedding = np.reshape(category_embedding, (1,16))
            category_matrix = np.concatenate((category_matrix, category_embedding))
    return category_matrix

np.save("generateData/Category_Embedding", get_category_matrix())