## Collaborative filtering Recommendation system 

In [1]:
# Utilities
import math, random, warnings
from time import time
from datetime import datetime
from collections import defaultdict
from IPython.core.interactiveshell import InteractiveShell
from tabulate import tabulate
from IPython.display import display
# Mathematical calculation
import numpy as np
from scipy.sparse.linalg import svds
from sklearn import model_selection
from sklearn.metrics.pairwise import cosine_similarity

# Data handling
import pandas as pd

# Data Visualization
import matplotlib.pyplot as plt
import seaborn as sns

## Fake Rates

In [2]:
fake_rates = pd.read_csv("https://raw.githubusercontent.com/nayera540/recommendation/main/FakeRates.csv")
fake_rates.tail()

Unnamed: 0,userId,productId,rating
32972,A204K1OFE8OB9Y,664283be8d673868671baf82,1
32973,A19GSC3QMK694Q,664283be8d673868671baf83,3
32974,A19GSC3QMK694Q,664283be8d673868671baf84,2
32975,A1NNMOD9H36Q8E,664283be8d673868671baf85,4
32976,A1NNMOD9H36Q8E,664283be8d673868671baf86,5


## Rates from DB

In [3]:
import pymongo
import pandas as pd

In [4]:
client = pymongo.MongoClient("mongodb+srv://Reem:xsHvzkOTrpUukM42@cluster0.5lyger6.mongodb.net/project")
dbs = databases = client.list_database_names()
print(dbs)

['project', 'admin', 'local']


In [5]:
db = client.get_database("project")
collections = db.list_collection_names()
print(collections)

['users', 'reviews', 'Products', 'recommendations']


In [6]:
collection = db["reviews"]
cursor = collection.find({})
data = list(cursor)
ratings = pd.DataFrame(data)
ratings = ratings.drop(['review', 'createdAt', '_id', '__v'], axis=1)
ratings.rename(columns={'product': 'productId'}, inplace=True)
ratings.rename(columns={'user': 'userId'}, inplace=True)
ratings

Unnamed: 0,rating,productId,userId
0,4,6642838d8d673868671b72d8,6644b71ce892910dd8334d6f
1,5,6642838d8d673868671b72e1,6644b8e6e892910dd8334d8c
2,4,6642838d8d673868671b72e5,6644b8e6e892910dd8334d8c
3,5,6642838d8d673868671b72fa,6644b8e6e892910dd8334d8c
4,4,664283928d673868671b7a25,6644b8e6e892910dd8334d8c
5,4,6642839b8d673868671b84e6,6644b8e6e892910dd8334d8c
6,2,6642839e8d673868671b8a0d,6644b8e6e892910dd8334d8c
7,2,664283a08d673868671b8a71,6644b8e6e892910dd8334d8c
8,3,664283a08d673868671b8c11,6644b8e6e892910dd8334d8c
9,4,664283a08d673868671b8c1c,6644b8e6e892910dd8334d8c


In [7]:
# type(ratings['userId'].iloc(0))
ratings['userId'] = ratings['userId'].astype(str)

In [8]:
reviewer_counts = ratings.groupby('userId').size().reset_index(name='num_ratings').sort_values(by='num_ratings', ascending=False)
reviewer_counts

Unnamed: 0,userId,num_ratings
2,6644b89be892910dd8334d89,11
3,6644b8e6e892910dd8334d8c,11
0,6644b71ce892910dd8334d6f,10
1,6644b877e892910dd8334d87,1
4,664508d5aa8768000a4fd830,1


In [9]:
min_no_rates = 10
reviewers_with_enough_rates = reviewer_counts[reviewer_counts['num_ratings']>=min_no_rates]
reviewers_with_enough_rates

Unnamed: 0,userId,num_ratings
2,6644b89be892910dd8334d89,11
3,6644b8e6e892910dd8334d8c,11
0,6644b71ce892910dd8334d6f,10


In [10]:
reviewers_id = np.array(reviewers_with_enough_rates['userId'])
reviewers_id

array(['6644b89be892910dd8334d89', '6644b8e6e892910dd8334d8c',
       '6644b71ce892910dd8334d6f'], dtype=object)

In [11]:
ratings = ratings[ratings['userId'].isin(reviewers_id)]
ratings

Unnamed: 0,rating,productId,userId
0,4,6642838d8d673868671b72d8,6644b71ce892910dd8334d6f
1,5,6642838d8d673868671b72e1,6644b8e6e892910dd8334d8c
2,4,6642838d8d673868671b72e5,6644b8e6e892910dd8334d8c
3,5,6642838d8d673868671b72fa,6644b8e6e892910dd8334d8c
4,4,664283928d673868671b7a25,6644b8e6e892910dd8334d8c
5,4,6642839b8d673868671b84e6,6644b8e6e892910dd8334d8c
6,2,6642839e8d673868671b8a0d,6644b8e6e892910dd8334d8c
7,2,664283a08d673868671b8a71,6644b8e6e892910dd8334d8c
8,3,664283a08d673868671b8c11,6644b8e6e892910dd8334d8c
9,4,664283a08d673868671b8c1c,6644b8e6e892910dd8334d8c


## Concat Rates

In [30]:
ratings = pd.concat([fake_rates,ratings ], axis=0, ignore_index=True)
ratings.tail(35)

Unnamed: 0,userId,productId,rating
65951,A19GSC3QMK694Q,664283be8d673868671baf84,2
65952,A1NNMOD9H36Q8E,664283be8d673868671baf85,4
65953,A1NNMOD9H36Q8E,664283be8d673868671baf86,5
65954,6644b71ce892910dd8334d6f,6642838d8d673868671b72d8,4
65955,6644b8e6e892910dd8334d8c,6642838d8d673868671b72e1,5
65956,6644b8e6e892910dd8334d8c,6642838d8d673868671b72e5,4
65957,6644b8e6e892910dd8334d8c,6642838d8d673868671b72fa,5
65958,6644b8e6e892910dd8334d8c,664283928d673868671b7a25,4
65959,6644b8e6e892910dd8334d8c,6642839b8d673868671b84e6,4
65960,6644b8e6e892910dd8334d8c,6642839e8d673868671b8a0d,2


## Sparse Matrix

In [13]:
# Create the User-Item sparse matrix
user_item = ratings.pivot(index='userId', columns='productId', values='rating').fillna(0)
print('Shape of User-Item sparse matrix:', user_item.shape)
user_item.head()

Shape of User-Item sparse matrix: (2212, 15573)


productId,6642838d8d673868671b72d2,6642838d8d673868671b72d8,6642838d8d673868671b72e1,6642838d8d673868671b72e5,6642838d8d673868671b72fa,6642838d8d673868671b73f5,6642838d8d673868671b744e,6642838d8d673868671b7451,6642838d8d673868671b7458,6642838d8d673868671b7461,...,664283be8d673868671baf7d,664283be8d673868671baf7e,664283be8d673868671baf7f,664283be8d673868671baf80,664283be8d673868671baf81,664283be8d673868671baf82,664283be8d673868671baf83,664283be8d673868671baf84,664283be8d673868671baf85,664283be8d673868671baf86
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
6644b71ce892910dd8334d6f,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6644b89be892910dd8334d89,3.0,0.0,0.0,0.0,0.0,4.0,5.0,2.0,3.0,3.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6644b8e6e892910dd8334d8c,0.0,0.0,5.0,4.0,5.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A100UD67AHFODS,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A100WO06OQR8BQ,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [14]:
# Create the Item-User sparse matrix
item_user = user_item.T
item_user.head()

userId,6644b71ce892910dd8334d6f,6644b89be892910dd8334d89,6644b8e6e892910dd8334d8c,A100UD67AHFODS,A100WO06OQR8BQ,A105S56ODHGJEK,A105TOJ6LTVMBG,A109XLG7SJQAIA,A10AFVU66A79Y1,A10CRW7XRJBJ2G,...,AZFF4CX9MQ4AE,AZMY6E8B52L2T,AZNUHQSHZHSUE,AZPOUCM043IY8,AZQGJ5CEAJGXB,AZV2U6GU5QA6C,AZXQ0WME7X6UT,AZYJE40XW6MFG,AZZ5ASC403N74,AZZYW4YOE1B6E
productId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
6642838d8d673868671b72d2,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6642838d8d673868671b72d8,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6642838d8d673868671b72e1,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6642838d8d673868671b72e5,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6642838d8d673868671b72fa,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Cosine Similarity

In [15]:
# Calculate the user-user similarity
user_similarity = cosine_similarity(user_item)
np.fill_diagonal(user_similarity, 0)
user_similarity_df = pd.DataFrame(user_similarity,index=user_item.index, columns=user_item.index)
user_similarity_df.head()

userId,6644b71ce892910dd8334d6f,6644b89be892910dd8334d89,6644b8e6e892910dd8334d8c,A100UD67AHFODS,A100WO06OQR8BQ,A105S56ODHGJEK,A105TOJ6LTVMBG,A109XLG7SJQAIA,A10AFVU66A79Y1,A10CRW7XRJBJ2G,...,AZFF4CX9MQ4AE,AZMY6E8B52L2T,AZNUHQSHZHSUE,AZPOUCM043IY8,AZQGJ5CEAJGXB,AZV2U6GU5QA6C,AZXQ0WME7X6UT,AZYJE40XW6MFG,AZZ5ASC403N74,AZZYW4YOE1B6E
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
6644b71ce892910dd8334d6f,0.0,0.084832,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6644b89be892910dd8334d89,0.084832,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6644b8e6e892910dd8334d8c,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A100UD67AHFODS,0.0,0.0,0.0,0.0,0.0,0.0,0.082058,0.0,0.0,0.0,...,0.0,0.097017,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A100WO06OQR8BQ,0.0,0.0,0.0,0.0,0.0,0.0,0.124534,0.0,0.0,0.09244,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [16]:
# Calculate the item-item similarity
item_similarity = cosine_similarity(item_user)
np.fill_diagonal(item_similarity, 0)
item_similarity_df = pd.DataFrame(item_similarity, index=item_user.index, columns=item_user.index)
np.set_printoptions(threshold=np.inf)
np.seterr(over='ignore')
item_similarity_df.head()

productId,6642838d8d673868671b72d2,6642838d8d673868671b72d8,6642838d8d673868671b72e1,6642838d8d673868671b72e5,6642838d8d673868671b72fa,6642838d8d673868671b73f5,6642838d8d673868671b744e,6642838d8d673868671b7451,6642838d8d673868671b7458,6642838d8d673868671b7461,...,664283be8d673868671baf7d,664283be8d673868671baf7e,664283be8d673868671baf7f,664283be8d673868671baf80,664283be8d673868671baf81,664283be8d673868671baf82,664283be8d673868671baf83,664283be8d673868671baf84,664283be8d673868671baf85,664283be8d673868671baf86
productId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
6642838d8d673868671b72d2,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6642838d8d673868671b72d8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6642838d8d673868671b72e1,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6642838d8d673868671b72e5,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6642838d8d673868671b72fa,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Get Most Close (product - user)

In [17]:
# Method to find top N neighbors
def find_n_neighbors(df,n):
    order = np.argsort(df.values, axis=1)[:, :n]
    df = df.apply(axis=1, func=lambda x: pd.Series(x.sort_values(ascending=False).iloc[:n].index,
                                                   index=['top{}'.format(i) for i in range(1, n+1)]))
    return df

In [18]:
# Find 10 neighbors of each user
user_10_neighbors = find_n_neighbors(user_similarity_df, 10)
user_10_neighbors.head()

Unnamed: 0_level_0,top1,top2,top3,top4,top5,top6,top7,top8,top9,top10
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
6644b71ce892910dd8334d6f,6644b89be892910dd8334d89,A3JNBO7H2SPL44,A3J3ZHGDUDPCFL,A3J8A5L5AF5TX9,A3JAH5WW61N2EK,A3JJ222HEKM10R,A3JL3YQYI7OR5O,A3JLOIXFM75QNV,A3JQ58CZBV3FOZ,A3KF79AODCE7YE
6644b89be892910dd8334d89,6644b71ce892910dd8334d6f,A3JQ58CZBV3FOZ,A3J8A5L5AF5TX9,A3JAH5WW61N2EK,A3JJ222HEKM10R,A3JL3YQYI7OR5O,A3JLOIXFM75QNV,A3JNBO7H2SPL44,A3JU16JTNLVK1J,A3KHCO3MJLKLVA
6644b8e6e892910dd8334d8c,6644b71ce892910dd8334d6f,A3JQ58CZBV3FOZ,A3J8A5L5AF5TX9,A3JAH5WW61N2EK,A3JJ222HEKM10R,A3JL3YQYI7OR5O,A3JLOIXFM75QNV,A3JNBO7H2SPL44,A3JU16JTNLVK1J,A3KHCO3MJLKLVA
A100UD67AHFODS,A2OOLI2WFY4L2,AZMY6E8B52L2T,AT53ZTTO707MB,A2XX2A4OJCDNLZ,A105TOJ6LTVMBG,A1KD8NJPZ01R37,A1AFS9M75F17IZ,ADAXXCMSLC0U9,A2J7FHZFKOKGZ6,A2W0GY64CJSV5D
A100WO06OQR8BQ,A105TOJ6LTVMBG,A3L1VJMHFWONCB,A1ZXMMQPYC3Z9I,AZBXKUH4AIW3X,A1RPJHUVVSI98A,A313DADVI76HKM,AG7EF0SVBQOUX,AAK6SOEJY30YG,A2LXX47A0KMJVX,A2X6J6AFLLYVXH


In [19]:
# Find 10 neighbors of each item0
item_10_neighbors = find_n_neighbors(item_similarity_df, 10)
item_10_neighbors.head()

Unnamed: 0_level_0,top1,top2,top3,top4,top5,top6,top7,top8,top9,top10
productId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
6642838d8d673868671b72d2,664283928d673868671b7a58,664283b08d673868671b9e37,6642838d8d673868671b73f5,6642838d8d673868671b744e,6642838d8d673868671b7451,6642838d8d673868671b7458,6642838d8d673868671b7461,664283928d673868671b77ec,664283928d673868671b79d6,664283928d673868671b79d5
6642838d8d673868671b72d8,664283928d673868671b77f5,664283928d673868671b77f6,664283b08d673868671b9df1,664283928d673868671b78b6,664283928d673868671b7843,664283b48d673868671ba49c,664283b48d673868671ba340,664283928d673868671b77fe,664283928d673868671b79d5,664283ac8d673868671b9b44
6642838d8d673868671b72e1,6642839e8d673868671b8a0d,6642839b8d673868671b84e6,6642838d8d673868671b72e5,6642838d8d673868671b72fa,664283a08d673868671b8c11,664283928d673868671b7a25,664283928d673868671b79e0,664283a08d673868671b8c1c,664283a08d673868671b8cc3,664283a08d673868671b8a71
6642838d8d673868671b72e5,6642839e8d673868671b8a0d,6642838d8d673868671b72e1,6642839b8d673868671b84e6,6642838d8d673868671b72fa,664283a08d673868671b8c11,664283928d673868671b7a25,664283928d673868671b79e0,664283a08d673868671b8c1c,664283a08d673868671b8cc3,664283a08d673868671b8a71
6642838d8d673868671b72fa,6642839e8d673868671b8a0d,6642838d8d673868671b72e1,6642838d8d673868671b72e5,6642839b8d673868671b84e6,664283a08d673868671b8c11,664283928d673868671b7a25,664283928d673868671b79e0,664283a08d673868671b8c1c,664283a08d673868671b8cc3,664283a08d673868671b8a71


## Predict User Rating For all Products

In [20]:
# Method to predict the rating
def predict(ratings, similarity, type='user'):
    if type == 'user':
        mean_user_rating = ratings.mean(axis=1)  
        mean_user_rating = np.array(mean_user_rating)
        #We use np.newaxis so that mean_user_rating has same format as ratings
        ratings_diff = (ratings - mean_user_rating[:, np.newaxis])
        pred = mean_user_rating[:, np.newaxis] + similarity.dot(ratings_diff) / np.array([np.abs(similarity).sum(axis=1)]).T
    elif type == 'item':
        pred = ratings.dot(similarity) / np.array([np.abs(similarity).sum(axis=1)])
    return pred

In [21]:
#predict user rating for all products
user_prediction = predict(user_item, user_similarity, type='user')
user_prediction = pd.DataFrame(user_prediction, index=user_item.index, columns=user_item.columns)
user_prediction.head()

  pred = mean_user_rating[:, np.newaxis] + similarity.dot(ratings_diff) / np.array([np.abs(similarity).sum(axis=1)]).T


productId,6642838d8d673868671b72d2,6642838d8d673868671b72d8,6642838d8d673868671b72e1,6642838d8d673868671b72e5,6642838d8d673868671b72fa,6642838d8d673868671b73f5,6642838d8d673868671b744e,6642838d8d673868671b7451,6642838d8d673868671b7458,6642838d8d673868671b7461,...,664283be8d673868671baf7d,664283be8d673868671baf7e,664283be8d673868671baf7f,664283be8d673868671baf80,664283be8d673868671baf81,664283be8d673868671baf82,664283be8d673868671baf83,664283be8d673868671baf84,664283be8d673868671baf85,664283be8d673868671baf86
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
6644b71ce892910dd8334d6f,2.999807,-0.000193,-0.000193,-0.000193,-0.000193,3.999807,4.999807,1.999807,2.999807,2.999807,...,-0.000193,-0.000193,-0.000193,-0.000193,-0.000193,-0.000193,-0.000193,-0.000193,-0.000193,-0.000193
6644b89be892910dd8334d89,0.000193,4.000193,0.000193,0.000193,0.000193,0.000193,0.000193,0.000193,0.000193,0.000193,...,0.000193,0.000193,0.000193,0.000193,0.000193,0.000193,0.000193,0.000193,0.000193,0.000193
6644b8e6e892910dd8334d8c,,,,,,,,,,,...,,,,,,,,,,
A100UD67AHFODS,0.004373,0.004373,0.004373,0.004373,0.004373,0.004373,0.004373,0.004373,0.004373,0.004373,...,0.004373,0.004373,0.004373,0.004373,0.004373,0.004373,0.004373,0.004373,0.004373,0.004373
A100WO06OQR8BQ,0.000244,0.000244,0.000244,0.000244,0.000244,0.000244,0.000244,0.000244,0.000244,0.000244,...,0.000244,0.000244,0.000244,0.000244,0.000244,0.000244,0.000244,0.000244,0.000244,0.000244


In [22]:
item_prediction = predict(user_item, item_similarity, type='item')
item_prediction.head()

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,15563,15564,15565,15566,15567,15568,15569,15570,15571,15572
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
6644b71ce892910dd8334d6f,0.244898,3.581395,0.0,0.0,0.0,0.244898,0.244898,0.244898,0.244898,0.244898,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6644b89be892910dd8334d89,3.591837,0.27907,0.0,0.0,0.0,3.489796,3.387755,3.693878,3.591837,3.591837,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6644b8e6e892910dd8334d8c,0.0,0.0,3.8,3.9,3.8,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A100UD67AHFODS,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A100WO06OQR8BQ,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Recommend

In [23]:
# Method to Recommend the items with the highest predicted ratings
def recommend_items(userId, orig_df, preds_df, top_n):
    # Get and sort the user's ratings
    sorted_user_ratings = orig_df.loc[userId].sort_values(ascending=False) #sorted_user_ratings
    
    sorted_user_predictions = preds_df.loc[userId].sort_values(ascending=False) #sorted_user_predictions
    
    # Prepare recommendations
    recommedations = pd.concat([sorted_user_ratings, sorted_user_predictions], axis=1)
    
    recommedations.index.name = 'Recommended Items'
    
    recommedations.columns = ['user_ratings', 'user_predictions']
    
    # Take the products which user has NOT rated
    recommedations = recommedations.loc[recommedations.user_ratings == 0] 
    
    recommedations = recommedations.sort_values('user_predictions', ascending=False)
    
    #print(recommedations[:top_n])
    return recommedations.head(top_n)

In [24]:
def get_result(userID):
    ## NO. recommendations
    no_products = 5
    selected_columns = ['productId', 'name','user_ratings', 'user_predictions']
    column_mapping = {'productId': 'RProductID', 'name': 'RProductName'}
    
    recom_UBCF = recommend_items(userID, user_item, user_prediction, no_products)

    recom_IBCF = recommend_items(userID, user_item, item_prediction, no_products)


    result = pd.concat([recom_UBCF, recom_IBCF], axis=0)

    
    result =  pd.DataFrame(result.index)
    result.dropna(inplace=True)
    result.drop_duplicates(subset=['Recommended Items'], inplace=True)
    result = result.transpose()

    result['userId'] = userID
    result = result.set_index('userId')

    # Rename columns
    num_columns = len(result.columns)
    new_columns = ['recommend_{}'.format(i) for i in range(num_columns)]
    result.columns = new_columns

    return result

In [26]:
## Get unique users only from database
subset_df = ratings.iloc[32977:]
feature_values = subset_df['userId']
users_Ids = feature_values.unique()
users_Ids = pd.DataFrame(users_Ids,columns=['userId'])
users_Ids

Unnamed: 0,userId
0,6644b71ce892910dd8334d6f
1,6644b8e6e892910dd8334d8c
2,6644b89be892910dd8334d89


## Apply on all users

In [29]:
from bson import ObjectId
result_df = pd.DataFrame()

# Loop through each value in the column
for user in users_Ids['userId']:
    # Call your function and get the result DataFrame
    result = get_result(user)
    # Concatenate the result DataFrame to the result_df
    result_df = pd.concat([result_df, result])
result_df = result_df.astype(str)
result_df.reset_index(inplace=True)
result_df['userId'] = result_df['userId'].apply(ObjectId)
result_df

Unnamed: 0,userId,recommend_0,recommend_1,recommend_2,recommend_3,recommend_4,recommend_5,recommend_6,recommend_7,recommend_8,recommend_9
0,6644b71ce892910dd8334d6f,6642838d8d673868671b744e,664283928d673868671b79d6,664283b08d673868671b9e37,6642838d8d673868671b73f5,664283928d673868671b77ec,664283ac8d673868671b9b44,664283ac8d673868671b9b43,6642838d8d673868671b72d2,664283ac8d673868671b9b46,664283ac8d673868671b9b42
1,6644b8e6e892910dd8334d8c,664283ac8d673868671b9b45,664283ac8d673868671b9b47,664283ac8d673868671b9b46,6642838d8d673868671b72d2,664283ac8d673868671b9b44,,,,,
2,6644b89be892910dd8334d89,664283928d673868671b77f6,6642838d8d673868671b72d8,664283b48d673868671ba49c,664283b08d673868671b9df1,664283928d673868671b77fe,664283ac8d673868671b9b47,664283ac8d673868671b9b3b,664283ac8d673868671b9b3c,664283ac8d673868671b9b3d,664283ac8d673868671b9b3e


## Save to DB

In [28]:
CF_recom = db["CF_recom"]

# Drop the collection if it exists
if CF_recom.name in db.list_collection_names():
    CF_recom.drop()

data = result_df.to_dict(orient='records')

# Insert data into MongoDB
CF_recom.insert_many(data)
print("DataFrame successfully saved to MongoDB collection.")

DataFrame successfully saved to MongoDB collection.
