In [None]:
!pip install scikit-surprise
!pip install "numpy<2.0"


Collecting scikit-surprise
  Downloading scikit_surprise-1.1.4.tar.gz (154 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/154.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.4/154.4 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (pyproject.toml) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.4-cp311-cp311-linux_x86_64.whl size=2461563 sha256=d8b066b3acb22ae80c808503b5ec4bc59b327b3b00738af985a781b572d4d249
  Stored in directory: /root/.cache/pip/wheels/2a/8f/6e/7e2899163e2d85d8266daab4aa1cdabec7a6c56f83c015b5af
Successfully built scikit-surprise
Installing collected packages: scikit-surprise
Succes

In [None]:
!pip install numpy==1.24.4 --force-reinstall --no-cache-dir


Collecting numpy==1.24.4
  Downloading numpy-1.24.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.6 kB)
Downloading numpy-1.24.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.3/17.3 MB[0m [31m164.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 2.0.2
    Uninstalling numpy-2.0.2:
      Successfully uninstalled numpy-2.0.2
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
treescope 0.1.9 requires numpy>=1.25.2, but you have numpy 1.24.4 which is incompatible.
pymc 5.22.0 requires numpy>=1.25.0, but you have numpy 1.24.4 which is incompatible.
jax 0.5.2 requires numpy>=1.25, but you have numpy 1.24.4 which is incompatible.
tensorflow 2.18.0 requires nump

In [None]:
import pandas as pd
import numpy as np
from surprise import SVD, Dataset, Reader
from sklearn.metrics.pairwise import cosine_similarity


In [None]:
books = pd.read_csv('/BX_Books.csv', sep=';', encoding='latin-1', on_bad_lines='skip', low_memory=False)
ratings = pd.read_csv('/BX-Book-Ratings.csv', sep=';', encoding='latin-1', on_bad_lines='skip')

books.columns = ['ISBN', 'Book-Title', 'Book-Author', 'Year-Of-Publication', 'Publisher', 'Image-URL-S', 'Image-URL-M', 'Image-URL-L']
ratings.columns = ['User-ID', 'ISBN', 'Book-Rating']

ratings = ratings[ratings['Book-Rating'] > 0]

data = pd.merge(ratings, books[['ISBN', 'Book-Title']], on='ISBN')
data = data.dropna()


In [None]:
reader = Reader(rating_scale=(1, 10))
svd_data = Dataset.load_from_df(data[['User-ID', 'Book-Title', 'Book-Rating']], reader)
trainset = svd_data.build_full_trainset()

algo = SVD(n_factors=50, n_epochs=20, lr_all=0.005, reg_all=0.02)
algo.fit(trainset)


<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7ec8d5e382d0>

In [None]:
def get_top_n_books(algo, data, user_id, n=20):
    all_books = data['Book-Title'].unique()
    read_books = data[data['User-ID'] == user_id]['Book-Title'].unique()
    unread_books = np.setdiff1d(all_books, read_books)

    testset = [[user_id, book, 4.] for book in unread_books]
    predictions = algo.test(testset)

    results = []
    for pred in predictions:
        results.append({'Book-Title': pred.iid, 'predicted_rating': round(pred.est, 2)})

    return pd.DataFrame(results).sort_values('predicted_rating', ascending=False).head(n)


In [None]:
def diversify_recommendations(recommendations, books, n=10, diversity_weight=0.7):
    book_authors = books.set_index('Book-Title')['Book-Author'].astype(str)
    rec_authors = book_authors.loc[recommendations['Book-Title']].fillna('Unknown')
    author_matrix = pd.get_dummies(rec_authors)

    similarity_matrix = cosine_similarity(author_matrix.values)
    selected = [0]
    remaining = list(range(1, len(recommendations)))

    while len(selected) < n and remaining:
        similarities = similarity_matrix[remaining][:, selected]
        avg_sim = similarities.max(axis=1)
        diversity_score = 1 - avg_sim
        combined_score = (
            (1 - diversity_weight) * recommendations.iloc[remaining]['predicted_rating'].values +
            diversity_weight * diversity_score
        )
        next_index = remaining[np.argmax(combined_score)]
        selected.append(next_index)
        remaining.remove(next_index)

    return recommendations.iloc[selected[:n]]


In [None]:
def explain_recommendations(user_id, recommendations, data, books):
    user_books = data[data['User-ID'] == user_id]
    fav_authors = books[books['Book-Title'].isin(user_books['Book-Title'])]['Book-Author'].value_counts().head(3).index.tolist()

    explanations = []
    for _, row in recommendations.iterrows():
        book_author = books[books['Book-Title'] == row['Book-Title']]['Book-Author'].values[0]
        if book_author in fav_authors:
            explanations.append(f"You like books by {book_author}")
        else:
            fallback = fav_authors[0] if fav_authors else 'similar readers'
            explanations.append(f"Popular with fans of {fallback}")
    recommendations['explanation'] = explanations
    return recommendations


In [None]:
def main():
    user_id = 276725
    basic_recs = get_top_n_books(algo, data, user_id, n=50)
    diverse_recs = diversify_recommendations(basic_recs, books, n=15)
    final_recs = explain_recommendations(user_id, diverse_recs, data, books)
    print(final_recs[['Book-Title', 'predicted_rating', 'explanation']].to_string(index=False))

main()


                                                                    Book-Title  predicted_rating                          explanation
                         Harry Potter and the Chamber of Secrets Postcard Book              9.38 Popular with fans of similar readers
                                  My Sister's Keeper : A Novel (Picoult, Jodi)              9.28 Popular with fans of similar readers
                                                  Dilbert: A Book of Postcards              9.21 Popular with fans of similar readers
                                                                 Lonesome Dove              9.11 Popular with fans of similar readers
                                                  Weirdos From Another Planet!              8.98 Popular with fans of similar readers
                                                         84 Charing Cross Road              8.98 Popular with fans of similar readers
                                                              

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  recommendations['explanation'] = explanations
