# **Product Recommendation Model: DNN**

In [None]:
! pip3 install torch torchvision torchaudio
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils import data
from torch.utils.data import Dataset, DataLoader
torch.set_printoptions(precision=7)

In [None]:
import pandas as pd
import numpy as np
import gc
import os
import matplotlib.pyplot as plt
import datetime
import copy
import random
import pickle
import warnings
warnings.filterwarnings("ignore")

from gensim.models import Word2Vec
from behavior2vec import behavior2vec

In [None]:
def remove_file(file_path):
    if os.path.exists(file_path):
        os.remove(file_path)
        print('file deleted')
    else:
        print("Can not delete the file as it doesn't exists")

# Convert browsing behaviors of customers into vectors
1. consider the maximun duration of a working session as one hour, and a session means the amount of time a customer spends on the website.

2. collect the behavioral data of all customers in the past year, and organize data into every row representing all behaviors of a customer in a session. (the results are stored at variable "file" below)

3. train Behavior2Vec model to convert the behavioral data into vectors which model can understand.

In [None]:
# session_data.txt：every row of this file represents the behavior of one customer within one hour 
# file = ['view-product_ID1 view-product_ID2\n', 'view-product_ID3 leave-product_ID3\n', ...]
file = open('session_data.txt', 'r').readlines()

In [None]:
# train behavior2vec model
start = datetime.datetime.now()

model = behavior2vec.Behavior2Vec()
file_name = 'session_data.txt' 
model.train(file_name, size=32)

model_name = "Behavior2Vec_20221017.model"
remove_file(model_name)
model.full_model.save(model_name)

print('Time: ', datetime.datetime.now() - start)

In [None]:
# reload behavior2vec model 
# behavior_embedding = {'view':[x1, x2, ..., x32], ...} -> embedding of customers' behaviors
# action_product_embedding = {'view':{'product ID1':[x1, x2, ..., x32], 'product ID2':[x1, x2, ..., x32]},
#                             'add_cart':{...}} -> embedding of each product of each behavior
behavior_model = behavior2vec.Behavior2Vec()
behavior_model.full_model = Word2Vec.load("Behavior2Vec_20221017.model")
behavior_model.behavior_embeddings = behavior_model._gen_behavior_embedding()
behavior_embedding = behavior_model._gen_avg_behavior_embeddings(behavior_model.behavior_embeddings)
product_embedding = behavior_model._gen_item_embeddings()
action_product_embedding = behavior_model.behavior_embeddings

with open('product_embedding.pkl', 'wb') as f:
    pickle.dump(product_embedding, f)
with open('action_product_embedding.pkl', 'wb') as f:
    pickle.dump(action_product_embedding, f)
with open('behavior_embedding.pkl', 'wb') as f:
    pickle.dump(behavior_embedding, f) 

In [None]:
# vector_size = the dimension of action_product_embedding
vector_size = 32

In [None]:
action = list(action_product_embedding.keys())
act_prod_2_index = {} # ex. 'view-01':0
count = 0
# index_2_act_prod[0] = action_product_embedding['view']['01'] = [x1, x2, ..., x32]
index_2_act_prod = np.zeros(shape=(len(action)*len(action_product_embedding['view']), vector_size))

for i in action:
    for j in action_product_embedding[i].keys():
        act_prod_2_index[i+'-'+j] = count
        index_2_act_prod[count] = action_product_embedding[i][j]
        count += 1


In [None]:
# information of taiwanese products
prod_tw_active = pd.read_csv('tw_prod_active.csv')

In [None]:
all_product = np.array(list(action_product_embedding['view'].keys()))
prod_2_index = {} # ex. 'product ID1': 0
index_2_prod = np.zeros((vector_size, all_product.shape[0])) # dimension = (32, 6667)
# index_2_prod[:, 0] = the embedding of product ID1
index_2_prod_list = []
count = 0
for i in all_product:
    prod_2_index[i] = count
    index_2_prod_list.append(i)
    index_2_prod[:, count] = np.mean(product_embedding[i].reshape(-1, vector_size), axis=0)
    count += 1
index_2_prod = torch.from_numpy(index_2_prod).float()
norm_product_vector = (index_2_prod/index_2_prod.norm(dim=0))

# Create Dataset
1. use behavioral data of two months before the expected advertising time point as model's input.

2. last-viewed product is the model's prediction target. 

In [None]:
train_df = pd.read_csv('train_raw_data.csv') 
valid_df = pd.read_csv('valid_raw_data.csv') 
test_df = pd.read_csv('test_raw_data.csv') 
print(train_df.shape, valid_df.shape, test_df.shape)

In [None]:
# data preprocessing: averaging the vectors that represent all behaviors of a customer within two months
def preprocessing(data):
    
    data['act_prod'] = data['action']+'-'+data['product_oid'].astype(str)
    data['act_prod_index'] = data['act_prod'].map(act_prod_2_index).fillna(len(act_prod_2_index)).astype(int)

    data = data[data['act_prod_index'] != len(act_prod_2_index)]
    
    data_act_prod = data.groupby(['adtech_user_id'])['act_prod_index'].apply(list).reset_index()
    data_product = data.groupby(['adtech_user_id'])['product_oid'].apply(list).reset_index()
    data1 = data_act_prod.merge(data_product, how='inner', on='adtech_user_id')

    data1["x_length"] = data1["act_prod_index"].apply(lambda x:len(x))
    data1 = data1[data1['x_length']>1]
    data1['y'] = data1['product_oid'].apply(lambda x:x[-1]).astype(str).map(prod_2_index)
    
    data1['x_embedding'] = data1['act_prod_index'].apply(lambda x:np.mean(index_2_act_prod[x[:-1]], axis=0))
    return data1

In [None]:
start=datetime.datetime.now()
train_df1 = preprocessing(train_df)
valid_df1 = preprocessing(valid_df)
test_df1 = preprocessing(test_df)
print(train_df1.shape, valid_df1.shape, test_df1.shape)
print(datetime.datetime.now()-start)

In [None]:
def model_input(data1):
    df = data1[['adtech_user_id', 'x_embedding', 'y']]
    df_column = ['x_'+str(i) for i in range(vector_size)]
    df[df_column] = pd.DataFrame(df.x_embedding.tolist(), index= df.index)
    return df

In [None]:
train_data = model_input(train_df1)
valid_data = model_input(valid_df1)
test_data = model_input(test_df1)

In [None]:
train_data.to_csv('train_dnn_data.csv', index=False)
valid_data.to_csv('valid_dnn_data.csv', index=False)
test_data.to_csv('test_dnn_data.csv', index=False)

# Deep Neural Network (DNN)
1. build the dataset, dataloader and dnn model.

2. dnn model's output is a 32-dimensional vector which represents the product that the model thinks the customer wants to see next.

3. compute the cosine similarity between model's output and every product vector to find products that match model's prediction.

4. sort by the similarity score from highest to lowest to get the recommended product list for a specific customer.

5. use cross entropy loss and accuracy function defined below to evaluate model's performance. 


In [None]:
def get_device():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f'Using device {device}.')
    return device
device = get_device()

In [None]:
seed = 1115
random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  
np.random.seed(seed)  
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True

In [None]:
class dnn_dataset(Dataset):

    def __init__(self, data, device):
          self.x = torch.tensor(np.array(data.iloc[:,3:])).to(device)
          self.y = torch.tensor(np.array(data.iloc[:,2])).to(device)
    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        return  self.x[idx], self.y[idx]

In [None]:
train_data = pd.read_csv('train_dnn_data.csv')
valid_data = pd.read_csv('valid_dnn_data.csv')
test_data = pd.read_csv('test_dnn_data.csv')

In [None]:
# data loader
batch_size = 512
train_loader = torch.utils.data.DataLoader(dnn_dataset(train_data, device), batch_size=batch_size, shuffle=False)
val_loader = torch.utils.data.DataLoader(dnn_dataset(valid_data, device), batch_size=batch_size, shuffle=False)
test_loader = torch.utils.data.DataLoader(dnn_dataset(test_data, device), batch_size=batch_size, shuffle=False)

In [None]:
# model's structure
class first_stage(nn.Module):
    def __init__(self, start_embedding_dim):
        super(first_stage, self).__init__()
        self.fc1 = nn.Linear(start_embedding_dim, 128) 
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 32)

    def forward(self, behavior_embedding):
        r = F.relu(self.fc1(behavior_embedding.float()))
        r = F.relu(self.fc2(r))
        r = F.relu(self.fc3(r))
        return r

In [None]:
# if the product that customer actually sees next is in the top 10 of recommended product list, 
# then append 1 into acc_list, it means model's prediction is correct, otherwise, append 0 into acc_list.
def accuracy(top_k, y, cosine_sim):
    count = 0
    acc_list = []

    recommend = torch.topk(cosine_sim, top_k)[1]
    for i in y:
        if i in recommend[count]:
            acc_list.append(1)
        else:
            acc_list.append(0)
        count += 1
    return acc_list

In [None]:
def train(model, loader, index_2_prod, norm_product_vector, loss_func, optimizer, data_size, epoch, top_k):   

    total_loss = 0
    batch_loss = []
    acc_list = []
    
    model.train()    
    for step, (x, y) in enumerate(loader):
        out = model(x)
        out_1 = torch.mm(out, index_2_prod)
        
        loss = loss_func(out_1, y)
        cpu_loss = loss.detach().cpu().item() 
        batch_loss.append(cpu_loss/y.shape[0])        
        total_loss += cpu_loss
 
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # compute cosine similarity
        out_2 = out / out.norm(dim=1)[:,None]
        cosine_sim = torch.mm(out_2, norm_product_vector)
        acc_list += accuracy(top_k, y, cosine_sim)
        gc.collect()
        
    epoch_loss = (total_loss/data_size)
    acc =  np.sum(acc_list)/len(acc_list)
    print(f'train epoch: {epoch + 1}, train loss: {epoch_loss}, train accuracy: {acc}')

    return epoch_loss, batch_loss, acc

In [None]:
@torch.no_grad()
def valid(model, loader, index_2_prod, norm_product_vector, loss_func, data_size, epoch, top_k):   

    total_loss = 0
    batch_loss = []
    acc_list = []
    
    model.eval()
    for step, (x, y) in enumerate(loader):
        out = model(x)

        out_1 = torch.mm(out, index_2_prod)
        cpu_loss = loss_func(out_1, y).detach().cpu().item()  
        total_loss += cpu_loss
        batch_loss.append(cpu_loss/y.shape[0])
        

        out_2 = out / out.norm(dim=1)[:,None]
        cosine_sim = torch.mm(out_2, norm_product_vector)
        acc_list += accuracy(top_k, y, cosine_sim)
    
    epoch_loss = (total_loss/data_size)
    # calculate the accuracy rate
    acc =  np.sum(acc_list)/len(acc_list)
    print(f'valid epoch: {epoch + 1}, valid loss: {epoch_loss}, valid accuracy: {acc}')
    
    return epoch_loss, batch_loss, acc

In [None]:
@torch.no_grad()
def prediction(model, loader, norm_product_vector, top_k):
    
    pred = torch.tensor([])
    acc_list = []
    
    model.eval()
    for step, (x, y) in enumerate(loader):
        out = model(x)
        # compute cosine similarity
        out = out / out.norm(dim=1)[:,None]
        cosine_sim = torch.mm(out, norm_product_vector)
        pred = torch.cat((pred, cosine_sim.detach().cpu()), 0)
        acc_list += accuracy(top_k, y, cosine_sim)
            
    return pred, np.sum(acc_list)/len(acc_list)

In [None]:
# save checkpoint
def save_ckpt(score, epoch, model, optim, model_path):
    remove_file(model_path)
    torch.save({'epoch': epoch+1,
                'state_dict': model.state_dict(),
                'optim_state_dict': optim.state_dict(),
                'score': score}, model_path)

In [None]:
epoch = 10
best_score = 0.0
min_loss = 100000000
patience = 40
trigger_times = 0
top_k = 10
reload = True
model_path = 'best_model.pth.tar'
lr = 0.005
performance_dict = {'train_epoch_loss':[], 'valid_epoch_loss':[], 
                    'train_epoch_acc':[], 'valid_epoch_acc':[],
                    'train_batch_loss':[], 'valid_batch_loss':[]}

model = first_stage(vector_size).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
loss_func = nn.CrossEntropyLoss(reduction='sum')

if reload == True:
    ckpt = torch.load(model_path, map_location=device)
    model.load_state_dict(ckpt['state_dict'])
    optimizer.load_state_dict(ckpt['optim_state_dict'])

In [None]:
with open('performance_dict.pkl', 'rb') as f:
    performance_dict = pickle.load(f) 

In [None]:
# every epoch: 1 training + 1 validation
start=datetime.datetime.now()

for epoch_num in range(epoch):
    print(epoch_num)
    epoch_train_loss, batch_train_loss, epoch_train_acc = \
    train(model, train_loader, index_2_prod.to(device), norm_product_vector.to(device), 
          loss_func, optimizer, train_data.shape[0], epoch_num, top_k)
  
    epoch_val_loss, batch_val_loss, epoch_val_acc = \
    valid(model, val_loader, index_2_prod.to(device), norm_product_vector.to(device), 
          loss_func, valid_data.shape[0], epoch_num, top_k)

  
    if epoch_val_acc > best_score:
        save_ckpt(epoch_val_acc, epoch_num, model, optimizer, model_path)
        best_score = epoch_val_acc
  
    performance_dict['train_epoch_loss'].append(epoch_train_loss)
    performance_dict['valid_epoch_loss'].append(epoch_val_loss)
    performance_dict['train_epoch_acc'].append(epoch_train_acc)
    performance_dict['valid_epoch_acc'].append(epoch_val_acc)
    performance_dict['train_batch_loss'] += batch_train_loss
    performance_dict['valid_batch_loss'] += batch_val_loss

    # Early stopping
    if epoch_val_loss >= min_loss:
        trigger_times += 1
        if trigger_times >= patience:
            print('Early stopping!')
            break
    else:
        trigger_times = 0
        min_loss = epoch_val_loss
    
print(datetime.datetime.now()-start)

In [None]:
# plot loss and accuracy rate
def plot(train_data, valid_data, title, label_name, x_label, y_label, fig_name):
    plt.figure(figsize=(15,7))
    plt.plot(range(len(train_data)), train_data, 'o-', c='blue', alpha=0.3, label='train - '+ label_name, linewidth=3)
    plt.plot(range(len(valid_data)), valid_data, 'o-', c='orange', alpha=0.7, label='valid - '+ label_name, linewidth=3)
    plt.xlabel(x_label, fontsize=15)
    plt.ylabel(y_label, fontsize=20)
    plt.title(title, fontsize=20)
    plt.legend(prop = {'size':15})
    plt.grid()
    plt.savefig(fig_name + '.png')
    plt.show()

In [None]:
plot(performance_dict['train_epoch_loss'], performance_dict['valid_epoch_loss'],  'train & valid epoch loss', 'loss', 'epoch number', 'loss', 'train_valid_epoch_loss')
plot(performance_dict['train_epoch_acc'], performance_dict['valid_epoch_acc'],  'train & valid epoch accuracy', 'accuracy', 'epoch number', 'accuracy', 'train_valid_epoch_accuracy')

In [None]:
# predict
ckpt = torch.load(model_path, map_location=device)
model.load_state_dict(ckpt['state_dict'])
optimizer.load_state_dict(ckpt['optim_state_dict'])
pred, test_acc = prediction(model, test_loader, norm_product_vector.to(device), top_k)