In [None]:
!pip install scikit-surprise

Collecting scikit-surprise
  Downloading scikit-surprise-1.1.1.tar.gz (11.8 MB)
[K     |████████████████████████████████| 11.8 MB 4.4 MB/s 
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.1-cp37-cp37m-linux_x86_64.whl size=1619447 sha256=3e7e28ee8b786e7941fc87e1dbca024c568a76b11b718cef4ac1150808435148
  Stored in directory: /root/.cache/pip/wheels/76/44/74/b498c42be47b2406bd27994e16c5188e337c657025ab400c1c
Successfully built scikit-surprise
Installing collected packages: scikit-surprise
Successfully installed scikit-surprise-1.1.1


In [None]:
import pandas as pd
import numpy as np

from keras import Sequential
from keras.layers import InputLayer, Flatten, Embedding, Dense, Dropout, Reshape
from keras.models import Model

from collections import defaultdict
import matplotlib.pyplot as plt


from sklearn.model_selection import train_test_split

from tensorflow.keras.optimizers import Adam


from google.colab import drive 



In [None]:
from surprise import SVD
from surprise import KNNBasic

from surprise import Dataset
from surprise.model_selection import cross_validate
from surprise import Reader
from surprise import Dataset
from surprise.model_selection import KFold
from surprise.model_selection import GridSearchCV

## Precision and recall at K

In [None]:
def precision_recall_at_k(predictions, k=10, threshold=3.5):
    """Return precision and recall at k metrics for each user"""

    # First map the predictions to each user.
    user_est_true = defaultdict(list)
    for uid, _, true_r, est, _ in predictions:
        user_est_true[uid].append((est, true_r))

    precisions = dict()
    recalls = dict()
    for uid, user_ratings in user_est_true.items():

        # Sort user ratings by estimated value
        user_ratings.sort(key=lambda x: x[0], reverse=True)

        # Number of relevant items
        n_rel = sum((true_r >= threshold) for (_, true_r) in user_ratings)

        # Number of recommended items in top k
        n_rec_k = sum((est >= threshold) for (est, _) in user_ratings[:k])

        # Number of relevant and recommended items in top k
        n_rel_and_rec_k = sum(((true_r >= threshold) and (est >= threshold))
                              for (est, true_r) in user_ratings[:k])

        # Precision@K: Proportion of recommended items that are relevant
        # When n_rec_k is 0, Precision is undefined. We here set it to 0.

        precisions[uid] = n_rel_and_rec_k / n_rec_k if n_rec_k != 0 else 0

        # Recall@K: Proportion of relevant items that are recommended
        # When n_rel is 0, Recall is undefined. We here set it to 0.

        recalls[uid] = n_rel_and_rec_k / n_rel if n_rel != 0 else 0

    return precisions, recalls

## Load Data

In [None]:
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
path = 'gdrive/My Drive/MBA_USP/TCC/ifood'
df_ifood_ratings = pd.read_csv(path + '/user_rating_calc.csv',  encoding="utf-8-sig",sep=";")

# Creation of the dataframe. Column names are irrelevant.
ratings_dict = {'itemID': df_ifood_ratings['dishId'],
                'userID': df_ifood_ratings['userId'],
                'rating': df_ifood_ratings['finalRating']}

df = pd.DataFrame(ratings_dict)

In [None]:
# A reader is still needed but only the rating_scale param is requiered.
reader = Reader(rating_scale=(0, 10))
# The columns must correspond to user id, item id and ratings (in that order).
data = Dataset.load_from_df(df[['userID', 'itemID', 'rating']], reader)

In [None]:
def get_best_parameters(selected_algo, selected_param_grid):

  gs = GridSearchCV(selected_algo, selected_param_grid, measures=['rmse'], cv=5)
  # it's evaluating over a 5 folder cross validation 
  gs.fit(data)

  # best RMSE score
  print(gs.best_score['rmse'])
  
  # combination of parameters that gave the best RMSE score
  print(gs.best_params['rmse'])

  # We can now use the algorithm that yields the best rmse:
  algo = gs.best_estimator['rmse']

  return algo

def print_precision_recall(selected_model):
  precision_list = []
  recall_list = []
  kf = KFold(n_splits=5)
  for trainset, testset in kf.split(data):
      selected_model.fit(trainset)
      predictions = selected_model.test(testset)
      precisions, recalls = precision_recall_at_k(predictions, k=10, threshold=3)
      precision_list.append(sum(prec for prec in precisions.values()) / len(precisions))
      recall_list.append(sum(rec for rec in recalls.values()) / len(recalls))      
  print('Precision: ', np.mean(precision_list))
  print('Recall: ', np.mean(recall_list))

In [None]:
df_user_rating_matrix = data.df.pivot(index='userID',columns='itemID', values='rating')

not_filled = df_user_rating_matrix.isna().sum().sum()
filled =  df_user_rating_matrix.count().sum()
print('Não preenchidos:', not_filled)
print('Preenchidos:', filled)
print('Percentual preenchido: ', (filled/(df_user_rating_matrix.shape[0]*df_user_rating_matrix.shape[1]))*100, '%')
df_user_rating_matrix.head()


Não preenchidos: 231541
Preenchidos: 21446
Percentual preenchido:  8.477115424903255 %


itemID,000e66a0-1e19-4385-a748-7826478aa152,001cf782-5866-4d67-8871-a28adb58d9aa,003314f9-4399-4032-822f-19c746ca837a,0044680b-2628-4447-a492-eec271ea62cb,006c98af-2c7a-42cf-b64f-3c7e709146b7,00737b3a-b130-47e7-8ef6-469acb2ec1e4,00742dda-ea0a-4c46-b040-121e617b9fe3,0086f944-d4ec-4618-94f3-6060647fc566,00899de3-d612-4eea-b4d3-120faacddbed,008b6ce3-6fb4-4a9b-8319-cecf8ac42964,008ebd0b-ea7f-469e-9139-c0054d2aa439,00a1ec35-ef18-4b9b-876a-1947710f7925,00ace3a2-cb73-4010-9f02-2a086f085a3a,00c3d7b8-8138-4dfe-b84f-bb2c5f3b2185,00c3fac8-6b07-44d9-932d-9aef93c89f76,00df48bf-7b34-41b3-9c39-fbcb4864ec0c,00f1ce47-45ef-4470-aec4-83fe7035c1f5,00fcd33a-2e7b-48cb-ab25-45fa19860056,00fff2a0-4cf3-4fc4-b423-d0cc5cca97c5,011ce886-7d4d-404e-91cc-da466970b1b0,012b2455-8b31-48b1-91b1-265097dac65c,015a103e-46a9-444f-83ba-52741607dd28,015caaa7-c9f3-449b-9e3d-771617067a52,016442ba-1d87-42df-a0c8-6a2c461629f1,01668891-a543-4376-aca4-4d8039d9439d,0175576d-b3ab-4c2b-a73b-bf344bc62f78,0175df6a-3910-47cc-bcb6-ca3f6d2f83f9,017caea1-29d7-463e-8a8e-8d6b74840238,0198603e-5985-487b-a056-ae9d28a16a0f,0198a146-e5c5-403d-9370-dcef99c63160,01af5ff6-7347-4a82-b9ea-3d4956f90cac,01bb639b-4ed3-4fdb-b66f-3560929d72c1,01c83937-1c9e-431b-8db0-5a272d106663,01d3f391-50cb-48b8-8a61-0395a67487cf,01e3ba11-08b4-44ee-80c0-1a802f365d5d,01ea21f4-65b6-443d-8e43-4c4a9cbfd507,01fbac0a-dd9d-475d-89bc-7a0a81ba9d34,0202171c-9db6-4943-98d5-7c8776264803,0216ae4e-5d15-46f4-9c81-0ae2627c572c,02274f59-32b8-4723-aa83-56dd17a1577b,...,fd4f2cc5-9b92-432b-9761-b0dfb0b5ffd8,fd756d12-0047-4b57-9bae-7bb90e5493eb,fd7673fb-6ab5-4259-bf29-25bab3926ea1,fd7d1b3e-abba-4a9a-bc7f-0716055f496a,fd81aed9-11f8-4446-a917-b221bad33349,fd850904-7535-4159-a54f-2bcc6da9c142,fda20bc4-eba7-4f7d-a7ba-e895d619a263,fda56b4f-47ce-4fd3-9a08-4edf4145025c,fdcc3379-dff4-4e82-afa4-e7fdd9afc9ff,fdd3be1d-0499-4aac-846a-223b1ebc4ae0,fde9bc88-3418-424e-9d75-9278367e0430,fdf471b8-70f1-4563-99dc-f724e5c3a541,fdfc9e66-2634-4da6-88eb-ca4f9ab45cd6,fdfdde0f-7950-4046-99b7-d36ef41a7d01,fe0292c1-0811-46a1-bf91-f38b4350a174,fe2cb066-8b62-4c4c-bbf7-48b5c29704e2,fe2e7cb8-0ca1-46e9-844c-38bcaf24fae9,fe2e8770-925e-47e2-8789-278e5d3e4f4f,fe371604-8481-425a-a57f-1cb8968776ba,fe3faae4-f838-4580-909c-b2a32fb3903a,fe427133-d32a-45ed-b1e0-54994f4bcfd3,fe49166c-413c-4b0f-a796-826042a952b7,fe77ed40-f470-4a4e-9e30-512eb8f4f54b,fe8a3ecd-9584-4336-9543-41ee1d4e831e,fec64999-8397-4b61-9e9f-bef3495220cd,fec87a64-8753-4db4-9c75-c86ac427c4be,feceaa21-e727-428e-9c01-6f018d46403a,fef77daf-d5fd-4ffe-88cb-00c0233450fd,ff28bfea-32a0-4db1-8420-b44c3612ffa0,ff31d303-a09d-412b-99fd-5a8e661301b4,ff58aa16-93fb-41a8-ae29-9562b6323f5a,ff6770b4-6d86-40c9-9e6b-1d82cc6f5201,ff714041-921a-4ee9-a5d8-075e0e3364ee,ff74d1d8-d84c-4d75-994b-853f6ef4d68d,ff97b20e-759e-4ff5-99ff-31c50598648f,ff98bb6e-6c47-489e-8d60-3ccf64f309f6,ff9e5685-ea33-4887-851e-5e28e6d2304f,ffc8ab18-4e0f-462c-8a6a-419c4065a665,ffde7d73-fd06-4a9b-8837-d8f293ff8e6d,ffe9617a-cbb3-47e1-9966-524647a60f60
userID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
1,,,,,,,,,,,,,,,,3.0,,3.0,,,,,,,,,,,,,,,,,,,,3.0,,,...,,,,,,3.0,,,,,,,,,,,,,,,3.0,,,,3.0,,,,,,,,3.0,,,,,3.0,,
2,,,,,,,,,,,,,,,,,2.0,3.0,3.0,,,,,,3.0,,,,,,,,,3.0,,,,,,,...,,,,,,,,,,2.0,,,,,5.0,,,,,,,,,,,,,,,,,,,,,,,,,
3,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,2.0,,,,,,,,,,,,,,,,,,,,,2.0,,,,,,,,,
4,3.0,,2.0,,,,2.0,,,,5.0,,,,,,,3.0,,,,,,,,,,,,,,,5.0,,,2.0,,,,,...,,,,,,,,,,,,,,,,,,3.0,,,3.0,,,,,,,,,,,5.0,,,,,,,5.0,
5,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3.0,,5.0,,...,,,,,,,3.0,,,,,,,5.0,,,,,5.0,,,,,,,5.0,,,5.0,,,,,,,,,,,


## SVD

In [None]:

param_grid = {'n_epochs': [5, 10, 50], 'lr_all': [0.001, 0.002, 0.005] }

svd_algo = get_best_parameters(SVD, param_grid)


0.9263521882957468
{'n_epochs': 10, 'lr_all': 0.001}


In [None]:
print_precision_recall(svd_algo)

Precision:  0.8349692257855523
Recall:  0.21063108084919163


## KNN

In [None]:
knn_param_grid = {'min_k': [1, 5, 10, 50] }
knn_algo = get_best_parameters(KNNBasic, knn_param_grid)

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computi

In [None]:
print_precision_recall(knn_algo)

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Precision:  0.8228636216391317
Recall:  0.22984046722319737
