In [1]:
%load_ext nb_black

<IPython.core.display.Javascript object>

In [2]:
import math

import numpy as np
import pandas as pd

from sklearn.metrics.pairwise import cosine_similarity
from surprise import Dataset, KNNBasic, Reader

<IPython.core.display.Javascript object>

# Data

In [3]:
USER_IDS = [1, 1, 2, 1, 2]
ITEM_IDS = [1, 2, 1, 3, 2]
RATINGS = [1, 2, 3, 4, 5]


df = pd.DataFrame({"user_id": USER_IDS, "item_id": ITEM_IDS, "rating": RATINGS})
df

Unnamed: 0,user_id,item_id,rating
0,1,1,1
1,1,2,2
2,2,1,3
3,1,3,4
4,2,2,5


<IPython.core.display.Javascript object>

# Surprise

In [4]:
reader = Reader()
data = Dataset.load_from_df(df, reader)


<IPython.core.display.Javascript object>

In [5]:
sim_options = {"name": "cosine", "user_based": False}

algo = KNNBasic(sim_options=sim_options)

<IPython.core.display.Javascript object>

In [6]:
trainset = data.build_full_trainset()
algo.fit(trainset)

Computing the cosine similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBasic at 0x1a94d790310>

<IPython.core.display.Javascript object>

In [7]:
pred = algo.predict(uid=1, iid=1)
pred

Prediction(uid=1, iid=1, r_ui=None, est=2.3335251799991403, details={'actual_k': 3, 'was_impossible': False})

<IPython.core.display.Javascript object>

# Item based

In [25]:
item_user_matrix = df.pivot_table(index="item_id", columns="user_id", values="rating")
item_user_matrix

user_id,1,2
item_id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,1.0,3.0
2,2.0,5.0
3,4.0,


<IPython.core.display.Javascript object>

In [26]:
def get_cosine_similarity(df: pd.DataFrame) -> pd.DataFrame:
    df.fillna(0, inplace=True)

    # create an empty dataframe to store the similarity score
    similarity_df = pd.DataFrame(index=df.index, columns=df.index)

    # compute the cosine similarity
    for i in range(0, len(similarity_df.columns)):
        for j in range(0, len(similarity_df.columns)):
            similarity_df.iloc[i, j] = np.dot(df.iloc[i], df.iloc[j]) / (
                np.sqrt(np.dot(df.iloc[i], df.iloc[i]))
                * np.sqrt(np.dot(df.iloc[j], df.iloc[j]))
            )

    return similarity_df

<IPython.core.display.Javascript object>

In [56]:
similarity_df = get_cosine_similarity(item_user_matrix)
similarity_df

item_id,1,2,3
item_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,1.0,0.998274,0.316228
2,0.998274,1.0,0.371391
3,0.316228,0.371391,1.0


<IPython.core.display.Javascript object>

In [55]:
np.round(cosine_similarity(item_user_matrix.fillna(0)), 6)

array([[1.      , 0.998274, 0.316228],
       [0.998274, 1.      , 0.371391],
       [0.316228, 0.371391, 1.      ]])

<IPython.core.display.Javascript object>

In [81]:
def predict_rating(iid: int, uid: int) -> float:
    
    global item_user_matrix
    
    similarity_matrix = np.round(cosine_similarity(item_user_matrix.fillna(0)), 6)
    
    similarity_df_index = similarity_matrix.shape[0] + 1
    similarity_df = pd.DataFrame(similarity_matrix, index=range(1, similarity_df_index), columns=range(1, similarity_df_index))
    
    # Get the similar items to the given item
    similar_items = similarity_df[iid]

    # Get the ratings of the similar items by the given user
    ratings = item_user_matrix[uid]

    # Get the indices of the non-NA ratings
    idx = ratings.notna()

    # Calculate the predicted rating
    pred = similar_items[idx].dot(ratings[idx]) / similar_items[idx].sum()

    return pred




<IPython.core.display.Javascript object>

In [80]:
IID = 2
UID = 1
print(
    predict_rating(iid=IID, uid=UID)
)  # prints the predicted rating for item 1 by user 1

1.892182228289653


<IPython.core.display.Javascript object>

In [77]:
similarity_df = get_cosine_similarity(item_user_matrix)
display(similarity_df)

weights = similarity_df.loc[IID, :]  # cosine similarity of items with item IID
display(weights)

np.round(np.dot(item_user_matrix.loc[:, UID], weights), 5) / np.round(weights.sum(), 5)

item_id,1,2,3
item_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,1.0,0.998274,0.316228
2,0.998274,1.0,0.371391
3,0.316228,0.371391,1.0


item_id
1    0.998274
2         1.0
3    0.371391
Name: 2, dtype: object

1.8921790797874807

<IPython.core.display.Javascript object>

# User based

In [14]:
user_item_matrix = df.pivot_table(index="user_id", columns="item_id", values="rating")
user_item_matrix

item_id,1,2,3
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,1.0,2.0,4.0
2,3.0,5.0,


<IPython.core.display.Javascript object>

In [79]:
def get_prediction(uid: int, iid: int) -> float:
    global user_item_matrix

    similarity_matrix = np.round(cosine_similarity(user_item_matrix.fillna(0)), 6)

    similarity_df_index = similarity_matrix.shape[0] + 1
    similarity_df = pd.DataFrame(
        similarity_matrix,
        index=range(1, similarity_df_index),
        columns=range(1, similarity_df_index),
    )

    # Get similar users
    similar_users = similarity_df[uid]

    # Get ratings from similar users
    ratings = user_item_matrix[iid]

    # Get the indices of the non-NA ratings
    idx = ratings.notna()

    # Calculate the predicted rating
    pred = similar_users[idx].dot(ratings[idx]) / similar_users[idx].sum()

    return pred


get_prediction(uid=1, iid=1)

1.6545694521339538

<IPython.core.display.Javascript object>