In [None]:
# import libraries (you may add additional imports but you may not have to)
import pandas as pd
import numpy as np
from sklearn.neighbors import NearestNeighbors
import matplotlib.pyplot as plt


In [None]:
# get data files
# Load the ratings data
ratings_df = pd.read_csv('BX-Book-Ratings.csv', sep=';', encoding='latin-1')

# Load the books data
books_df = pd.read_csv('BX-Books.csv', sep=';', encoding='latin-1')


In [None]:

# Remove users with less than 200 ratings
user_rating_counts = ratings_df['User-ID'].value_counts()
users_to_keep = user_rating_counts[user_rating_counts >= 200].index
ratings_df = ratings_df[ratings_df['User-ID'].isin(users_to_keep)]

# Remove books with less than 100 ratings
book_rating_counts = ratings_df['ISBN'].value_counts()
books_to_keep = book_rating_counts[book_rating_counts >= 100].index
ratings_df = ratings_df[ratings_df['ISBN'].isin(books_to_keep)]


In [None]:
# add your code here - consider creating a new cell for each section of code

In [None]:
rating_matrix = ratings_df.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating')
rating_matrix = rating_matrix.fillna(0)  # Fill missing ratings with 0



In [None]:
# Fit the KNN model
model_knn = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=6, n_jobs=-1)
model_knn.fit(rating_matrix.T)  # Transpose the matrix so each book is a row


In [None]:
def get_recommends(book_title):
    # Find the ISBN of the given book title
    book_isbn = books_df[books_df['Book-Title'] == book_title]['ISBN'].values[0]

    # Find the index of the book in the matrix
    book_index = rating_matrix.columns.get_loc(book_isbn)

    # Find the nearest neighbors
    distances, indices = model_knn.kneighbors(rating_matrix.iloc[:, book_index].values.reshape(1, -1))

    # Get the recommended books
    recommended_books = []
    for i in range(1, len(indices.flatten())):
        book_isbn_rec = rating_matrix.columns[indices.flatten()[i]]
        book_title_rec = books_df[books_df['ISBN'] == book_isbn_rec]['Book-Title'].values[0]
        recommended_books.append([book_title_rec, distances.flatten()[i]])

    return [book_title, recommended_books]


In [None]:
recommended_books = get_recommends("The Queen of the Damned (Vampire Chronicles (Paperback))")
print(recommended_books)


In [None]:
# Plot the distribution of ratings
plt.hist(ratings_df['Book-Rating'], bins=10, edgecolor='black')
plt.title('Distribution of Book Ratings')
plt.xlabel('Rating')
plt.ylabel('Frequency')
plt.show()
