### Factorization Machines using LightFM

In [74]:
import json
import pandas as pd
import numpy as np
from tqdm import tqdm

import pickle
import scipy.sparse as sp

import import_ipynb
import data_acquisition
import feature_engineering

from lightfm.data import Dataset
from lightfm import LightFM
from lightfm.evaluation import precision_at_k
from lightfm.evaluation import recall_at_k
from lightfm.evaluation import auc_score
from scipy.sparse.coo import coo_matrix
from matplotlib import pyplot as plt


#### 1. Reading Data

In [100]:
# fetch data objects from saved pickle files
ratings_train = pickle.load(open("data/ratings_train_5_years.pkl", "rb"))
ratings_test = pickle.load(open("data/ratings_test_5_years.pkl", "rb"))
ratings_valid = pickle.load(open("data/ratings_validation_5_years.pkl", "rb"))

business_df = pickle.load(open("data/business_feature_set.pkl", "rb"))
user_df = pickle.load(open("data/user_feature_set.pkl", "rb"))

recommendation_df = pickle.load(open("data/ratings_recommendation_list.pkl", "rb"))

# formatting dataframe
ratings_train = ratings_train[['user_id', 'business_id', 'rating']]
ratings_test = ratings_test[['user_id', 'business_id', 'rating']]
user_df = user_df.fillna(0)
business_df = business_df.fillna(0)

In [104]:
print("Users Count ", user_df.shape)
print("Business Count ", business_df.shape)
print("Ratings Train Count ", ratings_train.shape)
print("Ratings Test Count ", ratings_test.shape)
print("Ratings Validation Count ", ratings_valid.shape)
print("Ratings Recommendation Pairs ", recommendation_df.shape)


Users Count  (284023, 33)
Business Count  (23793, 31)
Ratings Train Count  (267777, 3)
Ratings Test Count  (31710, 3)
Ratings Validation Count  (63420, 29)
Ratings Recommendation Pairs  (1745448, 2)


#### 2. Preprocessing Data

In [4]:
def convert_df_to_tupleList(key_col, df):
    result = []
    for i,row in df.iterrows():
        key_val = row[key_col]
        feature_dict = {}
        for col in df.columns.values:
            if col!=key_col:
                feature_dict[col] = row[col]
        result.append((key_val, feature_dict))
    return result

def convert_ratings_to_tupleList(df, user_id, business_id, weight_id):
    result = []
    for i,row in df.iterrows():
        row_tuple = (row[user_id], row[business_id], row[weight_id])
        result.append(row_tuple)
    return result

In [5]:
users_map = None
business_map = None
def convert_to_fm_format(interaction_features, user_features, business_features):
    global users_map, business_map
    global users_map, business_map
    dataset = Dataset(user_identity_features=False, item_identity_features=False)
    
    # interaction matrix
    user_features_list = list(user_features[0][1].keys())
    business_features_list = list(business_features[0][1].keys())
    
    uid_list = (x[0] for x in user_features)
    bid_list = (x[0] for x in business_features)
    
    dataset.fit(uid_list, bid_list, user_features=user_features_list, item_features=business_features_list)
    interactions_mat, interactions_weights = dataset.build_interactions(interaction_features)
    num_users, num_items = dataset.interactions_shape()

    # business features matrix
    business_features_mat = dataset.build_item_features(business_features)

    # user features matrix
    user_features_mat = dataset.build_user_features(user_features)
    
    users_map = dataset.mapping()[0]
    business_map = dataset.mapping()[2]
    
    return interactions_mat, interactions_weights, business_features_mat, user_features_mat


def get_rating_user_business_mat(ratings, user_df, business_df):
    uid = set(ratings['user_id'].values)
    bid = set(ratings['business_id'].values)
    
    user_df = user_df[user_df['user_id'].apply(lambda x: x in uid)].reset_index(drop = True)
    business_df = business_df[business_df['business_id'].apply(lambda x: x in bid)].reset_index(drop = True)
    
    user_features = convert_df_to_tupleList('user_id', user_df)
    business_features = convert_df_to_tupleList('business_id', business_df)
    interaction_features = convert_ratings_to_tupleList(ratings, 'user_id', 'business_id', 'rating')
    
    return convert_to_fm_format(interaction_features, user_features, business_features)

In [6]:
print("Train Data Formatted:")
train_interactions_mat, train_interactions_weights, train_business_features_mat, train_user_features_mat = get_rating_user_business_mat(ratings_train, user_df, business_df)
print("Interaction matrix shape: ", train_interactions_mat.get_shape())
print("Business matrix shape: ", train_business_features_mat.get_shape())
print("User matrix shape: ", train_user_features_mat.get_shape())

print("\n\nTest Data Formatted:")
test_interactions_mat, test_interactions_weights, test_business_features_mat, test_user_features_mat = get_rating_user_business_mat(ratings_test, user_df, business_df)
print("Interaction matrix shape: ", test_interactions_mat.get_shape())
print("Business matrix shape: ", test_business_features_mat.get_shape())
print("User matrix shape: ", test_user_features_mat.get_shape())

print("\n\nValidation Data Formatted:")
valid_interactions_mat, valid_interactions_weights, valid_business_features_mat, valid_user_features_mat = get_rating_user_business_mat(ratings_valid, user_df, business_df)
print("Interaction matrix shape: ", valid_interactions_mat.get_shape())
print("Business matrix shape: ", valid_business_features_mat.get_shape())
print("User matrix shape: ", valid_user_features_mat.get_shape())

Train Data Formatted:
Interaction matrix shape:  (31710, 3675)
Business matrix shape:  (3675, 30)
User matrix shape:  (31710, 32)


Test Data Formatted:
Interaction matrix shape:  (31710, 3216)
Business matrix shape:  (3216, 30)
User matrix shape:  (31710, 32)


Validation Data Formatted:
Interaction matrix shape:  (31710, 3481)
Business matrix shape:  (3481, 30)
User matrix shape:  (31710, 32)


#### 3. Model Training

In [7]:
def train_model(interactions_mat, user_features_mat, business_features_mat, 
                interactions_weights, learning_rate_p = 0.05, epochs_p = 30):
    
    print("Learning Rate ",learning_rate_p)
    print("Epochs ",epochs_p)

    model = LightFM(loss='warp', learning_rate=learning_rate_p)
    model.fit(interactions_mat, user_features = user_features_mat, item_features = business_features_mat,
              sample_weight = interactions_weights, epochs = epochs_p)
    
    return model

In [17]:
def train_model_without_features(interactions_mat, 
                interactions_weights, learning_rate_p = 0.05, epochs_p = 30):
    
    print("Learning Rate ",learning_rate_p)
    print("Epochs ",epochs_p)

    model = LightFM(loss='warp', learning_rate=learning_rate_p)
    model.fit(interactions_mat,
              sample_weight = interactions_weights, epochs = epochs_p)
    
    return model

#### 4. Model Evaluation

In [8]:
def eval_metrics(model, interactions_mat, business_features_mat, user_features_mat, k_value=10):
    
    precision = np.nanmean(precision_at_k(model, interactions_mat, item_features = business_features_mat,
                                          user_features = user_features_mat, k = k_value))
    
    recall = np.nanmean(recall_at_k(model, interactions_mat, item_features = business_features_mat,
                                    user_features = user_features_mat, k = k_value))
    
    auc = np.nanmean(auc_score(model, interactions_mat, item_features = business_features_mat, 
                               user_features = user_features_mat))
    
    return precision, recall, auc

In [81]:
def eval_metrics_without_features(model, interactions_mat, k_value=10):
    
    precision = np.nanmean(precision_at_k(model, interactions_mat, k = k_value))
    
    recall = np.nanmean(recall_at_k(model, interactions_mat, k = k_value))
    
    auc = np.nanmean(auc_score(model, interactions_mat))
    
    return precision, recall, auc

#### 5. Hyperparameter Tuning
##### 5.1 FM with Features

1. Tuning for hyperparameter - **learning_rate**

In [9]:
train_lr = []
valid_lr = []
print("Tuning Using validation Set: \n")
learning_rates_list = [0.01, 0.03, 0.05, 0.07, 0.09, 0.11, 0.13, 0.15, 0.17, 0.19]

for learning_rate in learning_rates_list:
    model = train_model(train_interactions_mat, train_user_features_mat, train_business_features_mat, 
                        train_interactions_weights, learning_rate)
    
    train_p, train_r, train_auc =  eval_metrics(model, train_interactions_mat, 
                                                train_business_features_mat, train_user_features_mat)
    
    # Validation Accuracy
    valid_p, valid_r, valid_auc = eval_metrics(model, valid_interactions_mat, 
                                               valid_business_features_mat, valid_user_features_mat)

    tr = {"learning_rate": learning_rate, "precision": train_p, "recall":train_r, "auc":train_auc}
    te = {"learning_rate": learning_rate, "precision": valid_p, "recall":valid_r, "auc":valid_auc}
    
    print(tr)
    print(te, "\n")
    
    train_lr.append(tr)
    valid_lr.append(te)

Tuning Using validation Set: 

Learning Rate  0.01
Epochs  30
{'learning_rate': 0.01, 'precision': 0.022358878, 'recall': 0.039431366912638904, 'auc': 0.79679453}
{'learning_rate': 0.01, 'precision': 0.0079280995, 'recall': 0.039640491958372756, 'auc': 0.78773135} 

Learning Rate  0.03
Epochs  30
{'learning_rate': 0.03, 'precision': 0.021592557, 'recall': 0.03774732765814362, 'auc': 0.79967374}
{'learning_rate': 0.03, 'precision': 0.007704194, 'recall': 0.03852097130242826, 'auc': 0.7904477} 

Learning Rate  0.05
Epochs  30
{'learning_rate': 0.05, 'precision': 0.022560704, 'recall': 0.03873765249800165, 'auc': 0.8009239}
{'learning_rate': 0.05, 'precision': 0.007839798, 'recall': 0.039198990854619996, 'auc': 0.79167354} 

Learning Rate  0.07
Epochs  30
{'learning_rate': 0.07, 'precision': 0.022563858, 'recall': 0.03874046439351566, 'auc': 0.8013522}
{'learning_rate': 0.07, 'precision': 0.007839798, 'recall': 0.039198990854619996, 'auc': 0.792071} 

Learning Rate  0.09
Epochs  30
{'lear

In [85]:
best_learning_rate = 0.13
print("Best Learning parameter found: ", best_learning_rate)
print("\n",train_lr, "\n\n")
print(valid_lr)

Best Learning parameter found:  0.13

 [{'learning_rate': 0.01, 'precision': 0.022358878, 'recall': 0.039431366912638904, 'auc': 0.79679453}, {'learning_rate': 0.03, 'precision': 0.021592557, 'recall': 0.03774732765814362, 'auc': 0.79967374}, {'learning_rate': 0.05, 'precision': 0.022560704, 'recall': 0.03873765249800165, 'auc': 0.8009239}, {'learning_rate': 0.07, 'precision': 0.022563858, 'recall': 0.03874046439351566, 'auc': 0.8013522}, {'learning_rate': 0.09, 'precision': 0.022567015, 'recall': 0.03876020839386515, 'auc': 0.80158055}, {'learning_rate': 0.11, 'precision': 0.02257332, 'recall': 0.03876019387227914, 'auc': 0.8017475}, {'learning_rate': 0.13, 'precision': 0.02262693, 'recall': 0.03891535609250064, 'auc': 0.80189353}, {'learning_rate': 0.15, 'precision': 0.022614319, 'recall': 0.038917837325383675, 'auc': 0.80191}, {'learning_rate': 0.17, 'precision': 0.022598548, 'recall': 0.0387737822725478, 'auc': 0.8021215}, {'learning_rate': 0.19, 'precision': 0.02262693, 'recall': 

In [None]:
# plotted the Precision, Recall, AUC
train_pre = [a['precision'] for a in train_lr]
train_rec = [a['recall'] for a in train_lr]
train_auc = [a['auc'] for a in train_lr]

valid_pre = [a['precision'] for a in valid_lr]
valid_rec = [a['recall'] for a in valid_lr]
valid_auc = [a['auc'] for a in valid_lr]


plt.subplots_adjust(0.1, 0.1, 2, 1.4)

plt.subplot(1, 2, 1)
plt.plot(learning_rates_list, train_pre, marker='o')
plt.plot(learning_rates_list, train_rec, marker='o')
plt.plot(learning_rates_list, train_auc, marker='o')

plt.legend(['Precision', 'Recall', 'AUC'])
plt.title('Performace on Train Dataset')
plt.xlabel('Learning rate')
plt.ylabel('Performance')

plt.subplot(1, 2, 2)
plt.plot(learning_rates_list, valid_pre, marker='o')
plt.plot(learning_rates_list, valid_rec, marker='o')
plt.plot(learning_rates_list, valid_auc, marker='o')

plt.legend(['Precision', 'Recall', 'AUC'])
plt.title('Performace on Validation Dataset')
plt.xlabel('Learning rate')
plt.ylabel('Performance')

plt.show()

2. Tuning for hyperparameter - **epochs**

In [25]:
train_epoch = []
valid_epoch = []
print("Tuning Using validation Set: \n")
epochs_list = [5, 10, 15, 20, 25, 30, 35, 40, 45, 50]

for epochs in epochs_list:
    model = train_model(train_interactions_mat, train_user_features_mat, train_business_features_mat, 
                        train_interactions_weights, learning_rate_p = best_learning_rate, epochs_p = epochs)
    
    train_p, train_r, train_auc =  eval_metrics(model, train_interactions_mat, 
                                                train_business_features_mat, train_user_features_mat)
    
    # Validation Accuracy
    valid_p, valid_r, valid_auc = eval_metrics(model, valid_interactions_mat, 
                                               valid_business_features_mat, valid_user_features_mat)

    tr = {"epochs": learning_rate, "precision": train_p, "recall":train_r, "auc":train_auc}
    te = {"epochs": learning_rate, "precision": valid_p, "recall":valid_r, "auc":valid_auc}
    
    print(tr)
    print(te, "\n")
    
    train_epoch.append(tr)
    valid_epoch.append(te)

Tuning Using validation Set: 

Learning Rate  0.13
Epochs  5
{'epochs': 0.01, 'precision': 0.022576476, 'recall': 0.0387795901018924, 'auc': 0.80122393}
{'epochs': 0.01, 'precision': 0.007849258, 'recall': 0.03924629454430779, 'auc': 0.7919521} 

Learning Rate  0.13
Epochs  10
{'epochs': 0.01, 'precision': 0.022601703, 'recall': 0.03880110230133411, 'auc': 0.8014045}
{'epochs': 0.01, 'precision': 0.007846106, 'recall': 0.03923052664774519, 'auc': 0.7921605} 

Learning Rate  0.13
Epochs  15
{'epochs': 0.01, 'precision': 0.022567013, 'recall': 0.03874807020905783, 'auc': 0.80167556}
{'epochs': 0.01, 'precision': 0.007849259, 'recall': 0.03924629454430779, 'auc': 0.7923665} 

Learning Rate  0.13
Epochs  20
{'epochs': 0.01, 'precision': 0.022636391, 'recall': 0.038879461422243154, 'auc': 0.801809}
{'epochs': 0.01, 'precision': 0.007849258, 'recall': 0.03924629454430779, 'auc': 0.79247844} 

Learning Rate  0.13
Epochs  25
{'epochs': 0.01, 'precision': 0.022576476, 'recall': 0.03875955886595

In [15]:
best_epochs = 25
print("Best Number of Epochs found: ", best_epochs)
print("\n",train_epoch, "\n\n")
print(valid_epoch)

Best Number of Epochs found:  25

 [{'epochs': 0.19, 'precision': 0.022529172, 'recall': 0.03869798832411684, 'auc': 0.80075896}, {'epochs': 0.19, 'precision': 0.022585934, 'recall': 0.03880133541404633, 'auc': 0.8013791}, {'epochs': 0.19, 'precision': 0.022598548, 'recall': 0.03878807377307144, 'auc': 0.80161536}, {'epochs': 0.19, 'precision': 0.022548094, 'recall': 0.03872432909468106, 'auc': 0.8016837}, {'epochs': 0.19, 'precision': 0.0226427, 'recall': 0.03889457816829866, 'auc': 0.80186665}, {'epochs': 0.19, 'precision': 0.022557553, 'recall': 0.038749140216728206, 'auc': 0.8017779}, {'epochs': 0.19, 'precision': 0.022636393, 'recall': 0.038876458578325085, 'auc': 0.801918}, {'epochs': 0.19, 'precision': 0.022652162, 'recall': 0.03887154730048204, 'auc': 0.8020284}, {'epochs': 0.19, 'precision': 0.02258278, 'recall': 0.03876605866752485, 'auc': 0.80199736}, {'epochs': 0.19, 'precision': 0.0226427, 'recall': 0.03886791303383353, 'auc': 0.80201495}] 


[{'epochs': 0.19, 'precision':

In [None]:
# plotted the Precision, Recall, AUC
train_pre = [a['precision'] for a in train_epoch]
train_rec = [a['recall'] for a in train_epoch]
train_auc = [a['auc'] for a in train_epoch]

valid_pre = [a['precision'] for a in valid_epoch]
valid_rec = [a['recall'] for a in valid_epoch]
valid_auc = [a['auc'] for a in valid_epoch]


plt.subplots_adjust(0.1, 0.1, 2, 1.4)

plt.subplot(1, 2, 1)
plt.plot(epochs_list, train_pre, marker='o')
plt.plot(epochs_list, train_rec, marker='o')
plt.plot(epochs_list, train_auc, marker='o')

plt.legend(['Precision', 'Recall', 'AUC'])
plt.title('Performace on Train Dataset')
plt.xlabel('Epochs')
plt.ylabel('Performance')

plt.subplot(1, 2, 2)
plt.plot(epochs_list, valid_pre, marker='o')
plt.plot(epochs_list, valid_rec, marker='o')
plt.plot(epochs_list, valid_auc, marker='o')

plt.legend(['Precision', 'Recall', 'AUC'])
plt.title('Performace on Validation Dataset')
plt.xlabel('Epochs')
plt.ylabel('Performance')

plt.show()


##### 5.2 FM without Features

1. Tuning for hyperparameter - **learning_rate**

In [79]:
# train data
train_interactions_mat_wf = train_interactions_mat.copy()

# test data
test_interactions_mat_wf = test_interactions_mat.copy()
r = test_interactions_mat_wf.get_shape()[0]
c = train_interactions_mat_wf.get_shape()[1] - test_interactions_mat_wf.get_shape()[1]
additional_business_mat = coo_matrix(np.zeros([r, c]), dtype = np.float32)
test_interactions_mat_wf = coo_matrix(np.hstack((test_interactions_mat_wf.toarray(), additional_business_mat.toarray())))
print("shape of test_interactions_mat_wf: ", test_interactions_mat_wf.get_shape())


# validation data
valid_interactions_mat_wf = valid_interactions_mat.copy()
r = valid_interactions_mat_wf.get_shape()[0]
c = train_interactions_mat_wf.get_shape()[1] - valid_interactions_mat_wf.get_shape()[1]

additional_business_mat = coo_matrix(np.zeros([r, c]), dtype = np.float32)
valid_interactions_mat_wf = coo_matrix(np.hstack((valid_interactions_mat_wf.toarray(), additional_business_mat.toarray())))
print("shape of valid_interactions_mat_wf: ", valid_interactions_mat_wf.get_shape())


shape of test_interactions_mat_wf:  (31710, 3675)
shape of valid_interactions_mat_wf:  (31710, 3675)


In [83]:
train_lr_wf = []
valid_lr_wf = []
print("Tuning Using validation Set: \n")
learning_rates_list = [0.01, 0.03, 0.05, 0.07, 0.09, 0.11, 0.13, 0.15, 0.17, 0.19]

for learning_rate in learning_rates_list:
    model = train_model_without_features(train_interactions_mat, train_interactions_weights, learning_rate)
    
    train_p, train_r, train_auc =  eval_metrics_without_features(model, train_interactions_mat)
    
    # Validation Accuracy
    valid_p, valid_r, valid_auc = eval_metrics_without_features(model, valid_interactions_mat_wf)

    tr = {"learning_rate": learning_rate, "precision": train_p, "recall":train_r, "auc":train_auc}
    te = {"learning_rate": learning_rate, "precision": valid_p, "recall":valid_r, "auc":valid_auc}
    
    print(tr)
    print(te, "\n")
    
    train_lr_wf.append(tr)
    valid_lr_wf.append(te)

Tuning Using validation Set: 

Learning Rate  0.01
Epochs  30
{'learning_rate': 0.01, 'precision': 0.045701675, 'recall': 0.0728546834368458, 'auc': 0.8359411}
{'learning_rate': 0.01, 'precision': 0.0004320404, 'recall': 0.0021917376222011986, 'auc': 0.5080379} 

Learning Rate  0.03
Epochs  30
{'learning_rate': 0.03, 'precision': 0.08600126, 'recall': 0.16646684097459635, 'auc': 0.92328733}
{'learning_rate': 0.03, 'precision': 0.00033112583, 'recall': 0.0016556291390728477, 'auc': 0.509432} 

Learning Rate  0.05
Epochs  30
{'learning_rate': 0.05, 'precision': 0.101324506, 'recall': 0.20811995673824893, 'auc': 0.9523789}
{'learning_rate': 0.05, 'precision': 0.00035635446, 'recall': 0.0018133081046988332, 'auc': 0.5084141} 

Learning Rate  0.07
Epochs  30
{'learning_rate': 0.07, 'precision': 0.09849259, 'recall': 0.21049341475332878, 'auc': 0.95966893}
{'learning_rate': 0.07, 'precision': 0.00040681177, 'recall': 0.0020498265531378114, 'auc': 0.5027879} 

Learning Rate  0.09
Epochs  30
{

In [86]:
best_learning_rate_wf = 0.05
print("Best Learning parameter found: ", best_learning_rate_wf)
print("\n",train_lr_wf, "\n\n")
print(valid_lr_wf)

Best Learning parameter found:  0.05

 [{'learning_rate': 0.01, 'precision': 0.045701675, 'recall': 0.0728546834368458, 'auc': 0.8359411}, {'learning_rate': 0.03, 'precision': 0.08600126, 'recall': 0.16646684097459635, 'auc': 0.92328733}, {'learning_rate': 0.05, 'precision': 0.101324506, 'recall': 0.20811995673824893, 'auc': 0.9523789}, {'learning_rate': 0.07, 'precision': 0.09849259, 'recall': 0.21049341475332878, 'auc': 0.95966893}, {'learning_rate': 0.09, 'precision': 0.09735099, 'recall': 0.20868999032717486, 'auc': 0.9626061}, {'learning_rate': 0.11, 'precision': 0.090286985, 'recall': 0.18942986828908948, 'auc': 0.96036196}, {'learning_rate': 0.13, 'precision': 0.07552507, 'recall': 0.15110098977927147, 'auc': 0.94846445}, {'learning_rate': 0.15, 'precision': 0.038234, 'recall': 0.0681520614801283, 'auc': 0.89857805}, {'learning_rate': 0.17, 'precision': 0.010753705, 'recall': 0.0119740083037716, 'auc': 0.8484811}, {'learning_rate': 0.19, 'precision': 0.0058751185, 'recall': 0.00

2. Tuning for hyperparameter - **epochs**

In [88]:
train_epoch_wf = []
valid_epoch_wf = []
print("Tuning Using validation Set: \n")
epochs_list = [5, 10, 15, 20, 25, 30, 35, 40, 45, 50]

for epochs in epochs_list:
    model = train_model_without_features(train_interactions_mat, train_interactions_weights, 
                        learning_rate_p = best_learning_rate_wf, epochs_p = epochs)
    
    train_p, train_r, train_auc =  eval_metrics_without_features(model, train_interactions_mat)
    
    # Validation Accuracy
    valid_p, valid_r, valid_auc = eval_metrics_without_features(model, valid_interactions_mat_wf)

    tr = {"epochs": learning_rate, "precision": train_p, "recall":train_r, "auc":train_auc}
    te = {"epochs": learning_rate, "precision": valid_p, "recall":valid_r, "auc":valid_auc}
    
    print("Train set: ",tr)
    print("Validation set: ",te, "\n")
    
    train_epoch_wf.append(tr)
    valid_epoch_wf.append(te)

Tuning Using validation Set: 

Learning Rate  0.05
Epochs  5
Train set:  {'epochs': 0.19, 'precision': 0.06198676, 'recall': 0.10656149125154254, 'auc': 0.88581157}
Validation set:  {'epochs': 0.19, 'precision': 0.00039735102, 'recall': 0.001986754966887417, 'auc': 0.50656205} 

Learning Rate  0.05
Epochs  10
Train set:  {'epochs': 0.19, 'precision': 0.0822895, 'recall': 0.15514996270066458, 'auc': 0.92136055}
Validation set:  {'epochs': 0.19, 'precision': 0.00036896882, 'recall': 0.0018448438978240303, 'auc': 0.5108431} 

Learning Rate  0.05
Epochs  15
Train set:  {'epochs': 0.19, 'precision': 0.08941344, 'recall': 0.17479655774777142, 'auc': 0.9347146}
Validation set:  {'epochs': 0.19, 'precision': 0.00040365817, 'recall': 0.0020813623462630085, 'auc': 0.50747967} 

Learning Rate  0.05
Epochs  20
Train set:  {'epochs': 0.19, 'precision': 0.09528225, 'recall': 0.19071709201997988, 'auc': 0.94148415}
Validation set:  {'epochs': 0.19, 'precision': 0.00042257961, 'recall': 0.002128666035

In [89]:
best_epochs_wf = 35
print("Best Number of Epochs found: ", best_epochs_wf)
print("\n",train_epoch_wf,"\n\n")
print(valid_epoch_wf)

Best Number of Epochs found:  35

 [{'epochs': 0.19, 'precision': 0.06198676, 'recall': 0.10656149125154254, 'auc': 0.88581157}, {'epochs': 0.19, 'precision': 0.0822895, 'recall': 0.15514996270066458, 'auc': 0.92136055}, {'epochs': 0.19, 'precision': 0.08941344, 'recall': 0.17479655774777142, 'auc': 0.9347146}, {'epochs': 0.19, 'precision': 0.09528225, 'recall': 0.19071709201997988, 'auc': 0.94148415}, {'epochs': 0.19, 'precision': 0.097392, 'recall': 0.1994588740857212, 'auc': 0.9482284}, {'epochs': 0.19, 'precision': 0.09867235, 'recall': 0.20335510494643347, 'auc': 0.95178956}, {'epochs': 0.19, 'precision': 0.10089879, 'recall': 0.213685624476355, 'auc': 0.95380366}, {'epochs': 0.19, 'precision': 0.100879855, 'recall': 0.2135428913483959, 'auc': 0.9566728}, {'epochs': 0.19, 'precision': 0.103658155, 'recall': 0.22198699946079833, 'auc': 0.9586674}, {'epochs': 0.19, 'precision': 0.10398613, 'recall': 0.2220210317559777, 'auc': 0.96095896}] 


[{'epochs': 0.19, 'precision': 0.00039735

#### 6. Model Training - using learnt best hyperparameters

##### 6.1 FM with Features


In [90]:
model_final = train_model(train_interactions_mat, train_user_features_mat, train_business_features_mat, 
                          train_interactions_weights, learning_rate_p = best_learning_rate, epochs_p = best_epochs)

Learning Rate  0.13
Epochs  25


##### 6.2 FM without Features


In [96]:
model_final_wf = train_model_without_features(train_interactions_mat, train_interactions_weights, 
                        learning_rate_p = best_learning_rate_wf, epochs_p = best_epochs_wf)
    

Learning Rate  0.05
Epochs  35


#### 7. Model Evaluation - on Train and Test Data

##### 7.1 FM with Features


In [94]:
# test precision
test_p, test_r, test_auc = eval_metrics(model_final, test_interactions_mat, 
                                        test_business_features_mat, test_user_features_mat)

# training precision
train_p, train_r, train_auc =  eval_metrics(model_final, train_interactions_mat, 
                                            train_business_features_mat, train_user_features_mat)

print('\nPrecision: train %.3f, test %.3f' % (train_p, test_p))
print('Recall: train %.3f, test %.3f' % (train_r, test_r))
print('AUC: train %.2f, test %.3f' % (train_auc, test_auc))


Precision: train 0.023, test 0.004
Recall: train 0.039, test 0.036
AUC: train 0.80, test 0.749


##### 7.2 FM without Features


In [99]:
# test precision
wf_test_p, wf_test_r, wf_test_auc = eval_metrics_without_features(model_final_wf, test_interactions_mat_wf)

# training precision
wf_train_p, wf_train_r, wf_train_auc =  eval_metrics_without_features(model_final_wf, train_interactions_mat_wf)

print('\nPrecision: train %.3f, test %.3f' % (wf_train_p, wf_test_p))
print('Recall: train %.3f, test %.3f' % (wf_train_r, wf_test_r))
print('AUC: train %.2f, test %.3f' % (wf_train_auc, wf_test_auc))


Precision: train 0.101, test 0.000
Recall: train 0.211, test 0.003
AUC: train 0.95, test 0.506


#### 8. Generating Recommendations

In [111]:
def get_recommendation_score(model, user_business_df):    
    recommendation_score = []
    print("Number of Pairs for recommendations: ", user_business_df.shape[0])
    user_business_df = user_business_df.drop_duplicates()
    print("Number of unique Pairs for recommendations: ", user_business_df.shape[0], "\n\n")
    user_business_df = user_business_df.reset_index(drop=True)

    user_business_df['rating'] = 0

    for i, row in user_business_df.iterrows():
        if i%1000==0:
            print("Processed ",i, " user-business recommendation pair")  
        uid = row['user_id']
        bid = row['business_id']
        ub_df = user_business_df[i:i+1]

        dummy_int_mat, dummy_int_weights, business_features_mat, user_features_mat = get_rating_user_business_mat(ub_df, user_df, business_df)

        u_mapped = users_map[uid]
        b_mapped = business_map[bid]
        predictions = model.predict(u_mapped,
                            [b_mapped],
                            user_features=user_features_mat,
                            item_features=business_features_mat)
        recommendation_score.append(predictions[0])
    
    user_business_df['recommendation_score'] = recommendation_score
    return user_business_df

In [112]:
def find_top_k(x, k):
    aa= x.sort_values(by=['recommendation_score'], ascending = False).head(k)
    return list(aa['business_id'])

def find_top_k_recommendation(user_business_df, k):
    df = user_business_df.groupby('user_id').apply(lambda x: find_top_k(x, k)).reset_index(drop = False)
    df.columns = ['user_id', 'recommendations']
    return df

In [None]:
k = 10
user_business_df = recommendation_df
df_re_score = get_recommendation_score(model_final, user_business_df)

top_k_recommendations_df = find_top_k_recommendation(df_re_score, k)
top_k_recommendations_df.to_csv("top_k_recommendations.csv", index = False)

Number of Pairs for recommendations:  1745448
Number of unique Pairs for recommendations:  1745448 


Processed  0  user-business recommendation pair
Processed  1000  user-business recommendation pair
Processed  2000  user-business recommendation pair
Processed  3000  user-business recommendation pair
Processed  4000  user-business recommendation pair
Processed  5000  user-business recommendation pair
Processed  6000  user-business recommendation pair
Processed  7000  user-business recommendation pair
Processed  8000  user-business recommendation pair
Processed  9000  user-business recommendation pair
Processed  10000  user-business recommendation pair
Processed  11000  user-business recommendation pair
Processed  12000  user-business recommendation pair
Processed  13000  user-business recommendation pair
Processed  14000  user-business recommendation pair
Processed  15000  user-business recommendation pair
Processed  16000  user-business recommendation pair
Processed  17000  user-busi

Processed  155000  user-business recommendation pair
Processed  156000  user-business recommendation pair
Processed  157000  user-business recommendation pair
Processed  158000  user-business recommendation pair
Processed  159000  user-business recommendation pair
Processed  160000  user-business recommendation pair
Processed  161000  user-business recommendation pair
Processed  162000  user-business recommendation pair
Processed  163000  user-business recommendation pair
Processed  164000  user-business recommendation pair
Processed  165000  user-business recommendation pair
Processed  166000  user-business recommendation pair
Processed  167000  user-business recommendation pair
Processed  168000  user-business recommendation pair
Processed  169000  user-business recommendation pair
Processed  170000  user-business recommendation pair
Processed  171000  user-business recommendation pair
Processed  172000  user-business recommendation pair
Processed  173000  user-business recommendatio

Processed  310000  user-business recommendation pair
Processed  311000  user-business recommendation pair
Processed  312000  user-business recommendation pair
Processed  313000  user-business recommendation pair
Processed  314000  user-business recommendation pair
Processed  315000  user-business recommendation pair
Processed  316000  user-business recommendation pair
Processed  317000  user-business recommendation pair
Processed  318000  user-business recommendation pair
Processed  319000  user-business recommendation pair
Processed  320000  user-business recommendation pair
Processed  321000  user-business recommendation pair
Processed  322000  user-business recommendation pair
Processed  323000  user-business recommendation pair
Processed  324000  user-business recommendation pair
Processed  325000  user-business recommendation pair
Processed  326000  user-business recommendation pair
Processed  327000  user-business recommendation pair
Processed  328000  user-business recommendatio

In [209]:
top_k_recommendations_df

Unnamed: 0,user_id,recommendations
0,33d2OBr5yMWMMW_ibYbqIA,[QnTuluWsuNb3aYCl-J9HVQ]
1,SORwHZxyWwR8iv_ZrMICEg,[FUhJLCocwgZEiVn1Wg1KSg]
2,ZL_zmesRw89J8PrsdJg6Uw,[0OdZXIKQypu6vplpxFilsA]
3,iboD-HEmzhLnqAbLYN-Qhw,[FMo1PJTUV5OpyiZlnTM1Rg]
4,n45NIRpIDhu3iurWXzAVjg,[99kGGQoig4YaRi-52VtqMA]
