## Convolutional AI Sequence Embedding Recommendation Model (Caser)

#### Acknowledgement

Tang, J. and Wang, K. (2018). Personalized top-n sequential recommendation via convolutional sequence embedding. In ACM International Conference on Web Search and Data Mining.

This notebook is based on the content of the GitHub repository (https://github.com/graytowne/caser_pytorch). Portions of the code presented here are adapted or directly borrowed from the repository, with modifications made for specific purposes.

In [None]:
#import later used packages
import pandas as pd

# custom functions see .py files
# by Caser Paper author
from train_caser import Recommender
from interactions import Interactions
from utils import *

# selfmade
from Rec_split import rec_split
from Kendall_distance import kendall_distance_with_penalty

pd.set_option('mode.chained_assignment', None)

### Data Loding and Preprocessing

In [2]:
# prepare data
df = pd.read_csv('data/ml_1M_full.csv')
df.drop(columns=['Gender', 'Age', 'Occupation', 'Genre'], inplace=True)

#split data
train_df, val, test = rec_split(df, 'User', 'Timestamp', train_share=0.7, val_share=0.15)

#transform rating to binary
train_df['Rating'] = train_df['Rating'].apply(lambda x: 1 if x in [4, 5] else 0)
val['Rating'] = val['Rating'].apply(lambda x: 1 if x in [4, 5] else 0)

In [3]:
# save data as txt 
train_df.to_csv('data/train.txt', sep='\t', header=False, index=False)
val.to_csv('data/validation.txt', sep='\t', header=False, index=False)
test.to_csv('data/test.txt', sep='\t', header=False, index=False)

In [None]:
# data arguments
train_root = 'data/train.txt'
val_root = 'data/validation.txt'
L = 5
T =3

# load dataset
train = Interactions(train_root)
# transform triplets to sequence representation
train.to_sequence(L, T)

val = Interactions(val_root,
                    user_map=train.user_map,
                    item_map=train.item_map)

### Hyperparameter Tuning

In [4]:
# define hyperparameters
# train arguments
epochs = [30, 50, 70] #default 50
seed = 123
batch_size = 512
learning_rate = [1e-3, 1e-4]
l2 = 1e-6
neg_samples = 3
use_cuda = False

# model dependent arguments
d = 50
nhs = [8, 16, 32]
nvs = [2, 4, 8]
drop = 0.5
ac_conv ='relu'
ac_fc = 'relu'

# set seed
set_seed(seed, cuda=use_cuda)

In [None]:
# fit model

# generate empty dataframe to save results
results = pd.DataFrame()

for epoch in epochs:
    for nh in nhs:
        for nv in nvs:
            for lr in learning_rate:
                # train the model for each hyperparameter combination
                model = Recommender(n_iter=epoch,
                                    batch_size=batch_size,
                                    learning_rate=lr,
                                    l2=l2,
                                    neg_samples=neg_samples,
                                    L = L,
                                    dims = d,
                                    nh = nh,
                                    nv = nv,
                                    drop = drop,
                                    ac_conv = ac_conv,
                                    ac_fc = ac_fc,
                                    use_cuda=use_cuda)
                # save results of a specific hyperparameter combination
                result = model.fit(train, val, verbose=False)

                # add result to dataframe with all results
                results = pd.concat([results, pd.DataFrame(result, index=[0])], ignore_index=True)
                results.to_csv('results/Caser_hyperparameter.csv')

In [6]:
# check for the best performing hyper parameter combination
results = pd.read_csv('results/Caser_hyperparameter.csv').drop(columns='Unnamed: 0')
results.sort_values(by=['MAP'], ascending=False, inplace=True)

results

Unnamed: 0,loss,MAP,precision@1,precision@5,precision@10,recall@1,recall@5,recall@10,learnig rate,Number of Epochs,nh,nv
8,0.21985,0.174431,0.266225,0.221589,0.199934,0.022577,0.09015,0.158683,0.001,30,16,4
48,0.199758,0.173346,0.252483,0.222781,0.200132,0.021194,0.091621,0.157798,0.001,70,32,2
42,0.205151,0.172895,0.251656,0.21947,0.19745,0.022133,0.090582,0.156288,0.001,70,16,2
26,0.204445,0.172397,0.253642,0.220331,0.197583,0.022119,0.090299,0.155358,0.001,50,16,4
12,0.222707,0.172349,0.25,0.221623,0.197947,0.021052,0.089637,0.156278,0.001,30,32,2
32,0.202228,0.172163,0.240066,0.219768,0.197781,0.020931,0.091147,0.15696,0.001,50,32,4
41,0.326347,0.172091,0.267881,0.226126,0.204818,0.021831,0.087007,0.153871,0.0001,70,8,8
28,0.198875,0.172074,0.258775,0.219139,0.198046,0.022322,0.090946,0.156685,0.001,50,16,8
10,0.214238,0.17206,0.24851,0.220497,0.198692,0.020567,0.090722,0.15706,0.001,30,16,8
14,0.217256,0.172015,0.25149,0.219172,0.197152,0.021837,0.088849,0.155269,0.001,30,32,4


### Model Evaluation

In [7]:
#optimal hyperparameter
lr_opt = 0.001
epochs_opt = 30
nh_opt = 16
nv_opt = 4

In [8]:
#run model with optimal hyperparameters
model_opt = Recommender(n_iter=epochs_opt,
                        batch_size=batch_size,
                        learning_rate=lr_opt,
                        l2=l2,
                        neg_samples=neg_samples,
                        L = L,
                        dims = d,
                        nh = nh_opt,
                        nv = nv_opt,
                        drop = drop,
                        ac_conv = ac_conv,
                        ac_fc = ac_fc,
                        use_cuda=use_cuda)

model_opt.fit(train, val, verbose=False)

total training instances: 655098
Epoch 1 [109.3 s]	loss=0.8230 [0.0 s]
Epoch 2 [107.8 s]	loss=0.6174 [0.0 s]
Epoch 3 [111.1 s]	loss=0.5261 [0.0 s]
Epoch 4 [112.1 s]	loss=0.4647 [0.0 s]
Epoch 5 [112.2 s]	loss=0.4219 [0.0 s]
Epoch 6 [108.8 s]	loss=0.3902 [0.0 s]
Epoch 7 [107.7 s]	loss=0.3665 [0.0 s]
Epoch 8 [108.2 s]	loss=0.3471 [0.0 s]
Epoch 9 [106.5 s]	loss=0.3316 [0.0 s]
Epoch 10 [107.3 s]	loss=0.3177 [0.0 s]
Epoch 11 [109.6 s]	loss=0.3053 [0.0 s]
Epoch 12 [108.9 s]	loss=0.2949 [0.0 s]
Epoch 13 [110.6 s]	loss=0.2861 [0.0 s]
Epoch 14 [110.8 s]	loss=0.2770 [0.0 s]
Epoch 15 [109.6 s]	loss=0.2698 [0.0 s]
Epoch 16 [108.7 s]	loss=0.2641 [0.0 s]
Epoch 17 [109.8 s]	loss=0.2583 [0.0 s]
Epoch 18 [109.8 s]	loss=0.2524 [0.0 s]
Epoch 19 [111.0 s]	loss=0.2493 [0.0 s]
Epoch 20 [109.7 s]	loss=0.2451 [0.0 s]
Epoch 21 [108.5 s]	loss=0.2408 [0.0 s]
Epoch 22 [107.4 s]	loss=0.2373 [0.0 s]
Epoch 23 [105.8 s]	loss=0.2341 [0.0 s]
Epoch 24 [100.7 s]	loss=0.2322 [0.0 s]
Epoch 25 [104.2 s]	loss=0.2300 [0.0 s]
E

{'loss': 0.2212129334686324,
 'MAP': 0.17204287337960014,
 'precision@1': 0.25579470198675497,
 'precision@5': 0.22248344370860926,
 'precision@10': 0.1974834437086093,
 'recall@1': 0.021888589108570035,
 'recall@5': 0.0908098008717107,
 'recall@10': 0.1570444634268174,
 'learnig rate': 0.001,
 'Number of Epochs': 30,
 'nh': 16,
 'nv': 4}

In [37]:
# reverse build in movie mapping to make ids compareable
movie_mapping = {v: int(k) for k, v in train.item_map.items()}

In [48]:
# Extracting unique users from the training data
users = train_df.User.unique()

# Initializing DataFrames to store results
awhrs = pd.DataFrame()
asats = pd.DataFrame()
asats_2 = pd.DataFrame()

# Lists to store Kendall distance sums
kendal_sum = []
kendal_sum_2 = []

# Looping through different values of k
for k in [1, 5, 10, 20, 50]:
    whrs = []  # List to store Weighted Hit Rates for each user
    sat_us = []  # List to store User Satisfaction values for each user
    sat_us_2 = []  # List to store User Satisfaction values (with different threshold) for each user
    recommendations_allu = []  # List to store recommendations for each user

    # Looping through each user
    for user in users:
        whr = 0  # Initializing Weighted Hit Rate for the user
        sat = 0  # Initializing User Satisfaction for the user
        sat_2 = 0  # Initializing User Satisfaction (with different threshold) for the user

        # Getting predictions for the current user
        predictions_user = model_opt.predict(user - 1)
        # Selecting top k recommendations
        recommendations = np.argsort(predictions_user)[-k:]

        # Getting ratings of recommended items for the current user
        ratings = test[test['User'] == user]
        
        for rec in recommendations:
            # Mapping recommendations
            rec_mapped = movie_mapping[rec]
            rat = ratings[ratings['Movie'] == rec_mapped]

            if len(rat) == 1:
                if rat.Rating.values[0] == 1:
                    whr -= 5
                elif rat.Rating.values[0] == 2:
                    whr -= 2
                elif rat.Rating.values[0] == 3:
                    whr += 2
                elif rat.Rating.values[0] == 4:
                    whr += 6
                    sat = 1
                elif rat.Rating.values[0] == 5:
                    whr += 12
                    sat = 1
                    sat_2 = 1

        whr = whr / k
        whrs.append(whr)
        sat_us.append(sat)
        sat_us_2.append(sat_2)

        # Store recommendations for the user
        recommendations_allu.append(recommendations)

        # Calculating Kendall Distance for the current user's predictions
        # only once as it uses the whole sequence of predictions and is therefore independend of k
        if k == 1:
            predictions_user_df = pd.DataFrame({'Prediction': pd.Series(predictions_user).values,
                                                'Movie_unmapped': pd.Series(predictions_user).index})
            predictions_user_df['Movie'] = predictions_user_df['Movie_unmapped'].map(movie_mapping)

            kendal_u = kendall_distance_with_penalty(predictions_user_df, ratings, 'Movie', 'Movie',
                                                     'Prediction', 'Rating', p=0.05)
            kendal_u_2 = kendall_distance_with_penalty(predictions_user_df, ratings, 'Movie', 'Movie',
                                                       'Prediction', 'Rating', p=0.2)

            kendal_sum.append(kendal_u)
            kendal_sum_2.append(kendal_u_2)

    # Calculating average Weighted Hit Rate for current k
    average_whr = pd.DataFrame({'Average Weigthed Hit Rate': np.mean(whrs), 'k': k}, index=[0])
    # Calculating average User Satisfaction for current k
    average_sat = pd.DataFrame({'Average User Satisfaction': np.mean(sat_us), 'k': k}, index=[0])
    # Calculating average User Satisfaction (with different threshold) for current k
    average_sat_2 = pd.DataFrame({'Average User Satisfaction': np.mean(sat_us_2), 'k': k}, index=[0])

    # Saving recommendation distribution for current k to a CSV file
    recommendations_k = pd.DataFrame({'Element': pd.Series(recommendations_allu).index,
                                      'Occurrence Count': pd.Series(recommendations_allu).values})
    recommendations_k.to_csv(f'results/Recommendation_distribution@{k}.csv')

    # Appending results for a specific k to respective DataFrames
    awhrs = pd.concat([awhrs, average_whr], ignore_index=True)
    asats = pd.concat([asats, average_sat], ignore_index=True)
    asats_2 = pd.concat([asats_2, average_sat_2], ignore_index=True)

# Calculating average Kendall Distances
kendal = pd.DataFrame({'Kendall Distance': np.mean(kendal_sum), 'p': 0.05}, index=[0])
kendal_2 = pd.DataFrame({'Kendall Distance': np.mean(kendal_sum_2), 'p': 0.2}, index=[0])
# Concatenating both Kendall Distance DataFrames
kendal = pd.concat([kendal, kendal_2], ignore_index=True)

In [50]:
awhrs.to_csv('results/Caser_awhrs.csv')
asats.to_csv('results/Caser_asats.csv')
asats_2.to_csv('results/Caser_asats2.csv')
kendal.to_csv('results/Caser_Kendall.csv')