In [101]:
import pandas as pd

metadata = pd.read_csv("app/exports/metadata.csv")

In [102]:
import pickle

with open('app/exports/collaborative_similarity.pkl', 'rb') as f:
    collaborative_similarity = pickle.load(f)

with open('app/exports/content_similarity.pkl', 'rb') as f:
    content_similarity = pickle.load(f)


In [103]:
with open('datasets/test_ratings.pkl', 'rb') as f:
    test_ratings = pickle.load(f)

In [104]:
def recommend(movies=None, hide_rated=True, n=20, page=1):
    # weights for collaborative and content based similarity
    CONTENT_W = 0.2
    COLLAB_W = 0.8

    movie_similarities = pd.DataFrame()
    
    # if no movies are rated, return top rated movies
    if movies.shape[0] == 0:
        movie_similarities = metadata[['movieId', 'order']].sort_values(by='order', ascending=False).reset_index(drop=True)
    
    # else calculate similarity scores
    else:
        # iterate through rated movies
        for movie_id, rating in movies.iterrows():
            # score with item based collabarative similarity
            collaborative_score = collaborative_similarity.get(movie_id, 0)

            # score with content based similarity
            content_score = content_similarity.get(movie_id, 0)

            # add weighted score to dataframe
            # similarity = (collaborative_score * COLLAB_W + content_score * CONTENT_W) * (rating - 2.5)
            movie_similarities = pd.concat(
                [movie_similarities, (collaborative_score * COLLAB_W + content_score * CONTENT_W) * (float(rating) - 2.5)], axis=1)

        # sum similarity scores for each movie
        movie_similarities = movie_similarities.sum(axis=1)

        # hide already rated movies if requested
        if hide_rated:
            movie_similarities = movie_similarities.drop(movies.index)

        # sort similarity with the calculated score
        movie_similarities = movie_similarities.sort_values(ascending=False).reset_index().rename(columns={0: 'score', 'index': 'movieId'})
        
    # make subset of movies based on number of recommendation (n) and page number (page)
    movie_similarities = movie_similarities[(page-1)*n:page*n]

    return movie_similarities

In [108]:
#surpress warnings
import warnings
warnings.filterwarnings('ignore')

def get_precision_recall(row):
    all_rated = row.dropna()

    # get top highest ratings
    liked = all_rated.sort_values(ascending=False)[:min(100, all_rated.shape[0])]
    liked = liked[liked > 2.5]

    # use 50% of the liked movies for testing
    actual = liked.sample(frac=0.5, random_state=42)
    all_rated.drop(actual.index, inplace=True)

    predicted = recommend(pd.DataFrame(all_rated), n=20)

    if predicted is None:
        return pd.Series([None, None, None])

    predicted_ids = predicted['movieId'].tolist()
    actual_ids = actual.index.tolist()

    tp = len(set(actual_ids) & set(predicted_ids))
    fp = len(predicted_ids) - tp
    fn = len(actual_ids) - tp

    return pd.Series([tp, fp, fn])

result = test_ratings.sample(10, random_state=29).apply(get_precision_recall, axis=1)
result
tp, fp, fn = result.sum()

precision = tp / (tp + fp)
recall = tp / (tp + fn)
f1 = 2 * precision * recall / (precision + recall)

print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1: {f1}")


Precision: 0.405
Recall: 0.1643002028397566
F1: 0.23376623376623376
