In [1]:
%load_ext nb_black

<IPython.core.display.Javascript object>

In [112]:
import math

import numpy as np
import pandas as pd

from sklearn.metrics.pairwise import cosine_similarity
from surprise import Dataset, KNNBasic, KNNWithMeans, Reader

<IPython.core.display.Javascript object>

# Data

In [133]:
USER_IDS = [1, 1, 2, 2]
ITEM_IDS = [1, 2, 1, 2]
RATINGS = [1, 2, 3, 5]


df = pd.DataFrame({"user_id": USER_IDS, "item_id": ITEM_IDS, "rating": RATINGS})
df

Unnamed: 0,user_id,item_id,rating
0,1,1,1
1,1,2,2
2,2,1,3
3,2,2,5


<IPython.core.display.Javascript object>

# Surprise

In [134]:
reader = Reader()
data = Dataset.load_from_df(df, reader)


<IPython.core.display.Javascript object>

In [151]:
sim_options = {"name": "cosine", "user_based": True}

algo = KNNBasic(k=3, sim_options=sim_options)

<IPython.core.display.Javascript object>

In [152]:
trainset = data.build_full_trainset()
algo.fit(trainset)

Computing the cosine similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBasic at 0x1a94d811ca0>

<IPython.core.display.Javascript object>

In [153]:
pred = algo.predict(uid=1, iid=1)
pred

Prediction(uid=1, iid=1, r_ui=None, est=1.998525070537733, details={'actual_k': 2, 'was_impossible': False})

<IPython.core.display.Javascript object>

# Item based

In [138]:
item_user_matrix = df.pivot_table(index="item_id", columns="user_id", values="rating")
item_user_matrix

user_id,1,2
item_id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,1,3
2,2,5


<IPython.core.display.Javascript object>

In [139]:
def get_cosine_similarity(df: pd.DataFrame) -> pd.DataFrame:
    df.fillna(0, inplace=True)

    # create an empty dataframe to store the similarity score
    similarity_df = pd.DataFrame(index=df.index, columns=df.index)

    # compute the cosine similarity
    for i in range(0, len(similarity_df.columns)):
        for j in range(0, len(similarity_df.columns)):
            similarity_df.iloc[i, j] = np.dot(df.iloc[i], df.iloc[j]) / (
                np.sqrt(np.dot(df.iloc[i], df.iloc[i]))
                * np.sqrt(np.dot(df.iloc[j], df.iloc[j]))
            )

    return similarity_df

<IPython.core.display.Javascript object>

In [140]:
similarity_df = get_cosine_similarity(
    item_user_matrix,
)
similarity_df

item_id,1,2
item_id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,1.0,0.998274
2,0.998274,1.0


<IPython.core.display.Javascript object>

In [141]:
np.round(cosine_similarity(item_user_matrix.fillna(0)), 10)

array([[1.        , 0.99827437],
       [0.99827437, 1.        ]])

<IPython.core.display.Javascript object>

In [142]:
def predict_rating(iid: int, uid: int) -> float:
    global item_user_matrix

    similarity_matrix = np.round(cosine_similarity(item_user_matrix.fillna(0)), 10)

    similarity_df_index = similarity_matrix.shape[0] + 1
    similarity_df = pd.DataFrame(
        similarity_matrix,
        index=range(1, similarity_df_index),
        columns=range(1, similarity_df_index),
    )

    # Get the similar items to the given item
    similar_items = similarity_df[iid]

    # Get the ratings of the similar items by the given user
    ratings = item_user_matrix[uid]

    # Get the indices of the non-NA ratings
    idx = ratings.notna()

    # Calculate the predicted rating
    pred = similar_items[idx].dot(ratings[idx]) / similar_items[idx].sum()

    return pred

<IPython.core.display.Javascript object>

In [143]:
IID = 1
UID = 1
print(
    predict_rating(iid=IID, uid=UID)
)  # prints the predicted rating for item 1 by user 1

1.4995682207550818


<IPython.core.display.Javascript object>

In [144]:
(0.99827437 * 2 + 0.31622777 * 4) / (0.99827437 + 0.31622777)

2.4811369420821174

<IPython.core.display.Javascript object>

In [145]:
similarity_df = get_cosine_similarity(item_user_matrix)
display(similarity_df)

weights = similarity_df.loc[IID, :]  # cosine similarity of items with item IID
display(weights)

np.round(np.dot(item_user_matrix.loc[:, UID], weights), 5) / np.round(weights.sum(), 5)

item_id,1,2
item_id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,1.0,0.998274
2,0.998274,1.0


item_id
1         1.0
2    0.998274
Name: 1, dtype: object

1.499572129892357

<IPython.core.display.Javascript object>

# User based

In [146]:
user_item_matrix = df.pivot_table(index="user_id", columns="item_id", values="rating")
user_item_matrix

item_id,1,2
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,1,2
2,3,5


<IPython.core.display.Javascript object>

In [154]:
def get_prediction(uid: int, iid: int) -> float:
    global user_item_matrix

    similarity_matrix = np.round(cosine_similarity(user_item_matrix.fillna(0)), 6)

    similarity_df_index = similarity_matrix.shape[0] + 1
    similarity_df = pd.DataFrame(
        similarity_matrix,
        index=range(1, similarity_df_index),
        columns=range(1, similarity_df_index),
    )

    # Get similar users
    similar_users = similarity_df[uid]

    # Get ratings from similar users
    ratings = user_item_matrix[iid]

    # Get the indices of the non-NA ratings
    idx = ratings.notna()

    # Calculate the predicted rating
    pred = similar_users[idx].dot(ratings[idx]) / similar_users[idx].sum()

    return pred


UID = 1
IID = 1

get_prediction(uid=UID, iid=IID)

1.9985248270702747

<IPython.core.display.Javascript object>