# Group Recommendation
Notebook runs all necessary steps needed to train and evaluate a group recommendation model based on 4-way graph-regularized Matrix Factorization and Markov Chain rank aggregation.

In [1]:
import numpy as np
import pickle

import DB.database as DB
import src.preprocessing as pre
import src.Solver as Solver
import src.MF as MF
import src.gr_recommender as gr
import src.mc as mc

## Step 1: Preprocess Data
### Set Dimensions and load data

In [2]:
N = 100000  #Number of Users
M = 100000  #Number of Businesses
db = DB.Database()
db.__enter__()
friends, relations, business_attributes = DB.get_data(N, M)
users, businesses, reviews, categroy_names, cities = DB.get_entities(db, N, M)

Got users
Got businesses
Got reviews


### Delete users with too few reviews and friends

In [3]:
min_entries = 10
relations_cut, user_idx, bus_idx, friends, business_attributes = \
    pre.cold_start_preprocessing(relations, friends, business_attributes, min_entries=min_entries)

Shape before: (100000, 100000)
Shape after: (808, 763)


### Split Data: 80% Training, 10% Validation, 10% Test

In [4]:
r_data = Solver.Dataset(relations_cut)
rel_data = r_data.data
rel_data[r_data.test_ind] = 0
rel_data[r_data.val_ind] = 0

Original size:(808, 763)
Nonzero entries:15863
Train:12690, Val:1587, Test:1586


### Center Training Data and subtract means from Validation Data

In [5]:
rel_data, means, _ = Solver.center(rel_data)
validation_val = Solver.center_val(r_data.val_values, r_data.val_ind, means)

### Get Graph regularization Matrices

In [6]:
business_conn = DB.get_buss_conn_mat(M, rel_data, 0, 2)

## Step 2: Train Factorization Model
### Set Parameters

In [7]:
k = 256         # dimension of latent space
reg_lambda = 2  # weight of L2 regularization
gamma = 2       # weight of graph regularization
max_steps = 100
patience = 3    # number of allowed training steps with increasing validation loss
log_every = 1   # how often should the loss be computed and logged

### Train Model

In [8]:
U, V, W, val_losses, train_loss, conv = \
    MF.three_latent_factor_connected_graph_alternating_optimization(
        friends, business_attributes, rel_data, business_conn, k=k,
        val_idx = r_data.val_ind, val_values = validation_val,
        reg_lambda=reg_lambda, gamma=gamma, max_steps=max_steps,
        log_every=log_every, patience=patience, eval_every=log_every)

Iteration 0, training_loss: 9457279.296873, review error: 3244079.220109, validation loss: 408723.436467
Iteration 1, training_loss: 16665.308081, review error: 10811.635747, validation loss: 1594.005273
Iteration 2, training_loss: 8693.077969, review error: 7250.690887, validation loss: 1527.894274
Iteration 3, training_loss: 6673.218393, review error: 5559.216565, validation loss: 1513.164473
Iteration 4, training_loss: 6247.043885, review error: 5179.157731, validation loss: 1514.699040
Iteration 5, training_loss: 6114.201171, review error: 5059.048715, validation loss: 1517.097890
Converged after 3 iterations


### Save Results and check test error

In [9]:
with open('factorized_matrices.pickle', 'wb') as f:
    pickle.dump((U, V, W, means), f)

In [10]:
test_err = Solver.RMSE(U, V, r_data.test_ind, r_data.test_values, means)
print("Root Mean Squared Error on the test set: ", test_err)

Root Mean Squared Error on the test set:  [0.96686274]


## Step 3: Group Recommendation
### Choose a group of friends and add filter criteria

In [11]:

g = 4 #group size

user_ids = gr.get_friended_users(g, rel_data, friends)
#user_ids = gr.get_random_users(g, rel_data)
#user_ids = gr.get_nearest_users(g, U)

### Collaborative Filtering

In [14]:
city = gr.get_most_rated_city(user_ids, rel_data, businesses, bus_idx)
#city = 'Las Vegas'

scores = gr.collaborative_filtering(user_ids, U, V, means)
clipped_scores = gr.clip(scores, 1, 5)
filled_scores = gr.fill_real_ratings(clipped_scores, rel_data[:, user_ids].todense() + means[user_ids])
filtered_scores, original_inds = gr.filter_cities_and_categories(filled_scores, businesses, city, ['Restaurants'], bus_idx)
filtered_scores, idx_transition = gr.filter_misery(filtered_scores, filtered_scores.shape[0]//2, original_inds)

### Markov Chain Simulation

In [15]:
import importlib
importlib.reload(mc)

ratings, rankings = gr.order(filtered_scores, idx_transition)
order = mc.simulate_markov_chains(2, 1000, rankings, n=20, processes=20, min_samples=200, max_items_per_step=2, verbose=True)

[ 25 205]
1. Simulation ended; Found: [37, 348]; Total of 2 items
[188]
2. Simulation ended; Found: [318]; Total of 3 items
[58]
3. Simulation ended; Found: [112]; Total of 4 items
[11]
4. Simulation ended; Found: [14]; Total of 5 items
[6]
5. Simulation ended; Found: [6]; Total of 6 items
[212]
6. Simulation ended; Found: [361]; Total of 7 items
[211]
7. Simulation ended; Found: [359]; Total of 8 items
[210]
8. Simulation ended; Found: [358]; Total of 9 items
[209]
9. Simulation ended; Found: [355]; Total of 10 items
[208]
10. Simulation ended; Found: [354]; Total of 11 items
[207]
11. Simulation ended; Found: [352]; Total of 12 items
[206]
12. Simulation ended; Found: [351]; Total of 13 items
[204]
13. Simulation ended; Found: [347]; Total of 14 items
[203]
14. Simulation ended; Found: [345]; Total of 15 items
[202]
15. Simulation ended; Found: [341]; Total of 16 items
[201]
16. Simulation ended; Found: [340]; Total of 17 items
[200]
17. Simulation ended; Found: [339]; Total of 18 it

### Evaluate Results

In [16]:
names = [businesses[o].name for o in order]
print("Recommeded Restaurants: \n-","\n- ".join(names))

Recommeded Restaurants: 
- Grand Lux Cafe
- I Love Sushi
- The Orleans Hotel & Casino
- LAVO Italian Restaurant & Lounge
- Bouchon at the Venezia Tower
- The Buffet
- Elara by Hilton Grand Vacations
- Ping Pang Pong
- The Hotel At Mandalay Bay
- Egg Works
- Lucille's Smokehouse Bar-B-Que
- The White Chocolate Grill
- Pho So 1
- Postino East
- Daily Dose
- Phoenix Public Market Cafe
- Palms Place
- Sweets Raku
- RM Seafood
- MTO Café
