In [1]:
# Import thư viện
import pandas as pd
import numpy as np
import pickle
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.metrics.pairwise import cosine_similarity

# Load model & data
model = pickle.load(open('artifacts/model.pkl','rb'))
books_tfidf = pickle.load(open('artifacts/books_tfidf.pkl','rb'))
book_names = pickle.load(open('artifacts/book_names.pkl','rb'))
final_rating = pickle.load(open('artifacts/final_rating.pkl','rb'))
book_pivot = pickle.load(open('artifacts/book_pivot.pkl','rb'))
cosine_sim = pickle.load(open('artifacts/cosine_sim.pkl','rb'))
books = pickle.load(open('artifacts/books.pkl','rb'))
book_clusters = pickle.load(open('artifacts/book_clusters.pkl','rb'))


In [2]:
# Evaluate Item-based CF (KNN)

# Chọn ngẫu nhiên 100 sách
test_books_idx = np.random.choice(len(book_pivot), size=100, replace=False)

true_ratings = []
pred_ratings = []

for idx in test_books_idx:
    # Vector sách
    book_vector = book_pivot.iloc[idx,:].values.reshape(1,-1)
    distance, suggestion = model.kneighbors(book_vector, n_neighbors=6)
    
    # Dự đoán = rating trung bình của neighbors
    neighbors_idx = suggestion[0][1:]  # bỏ chính nó
    pred_rating = book_pivot.iloc[neighbors_idx,:].mean().mean()
    
    # Rating thật = trung bình rating sách đó
    true_rating = book_pivot.iloc[idx,:].mean()
    
    pred_ratings.append(pred_rating)
    true_ratings.append(true_rating)

# Tính RMSE và MAE
rmse = np.sqrt(mean_squared_error(true_ratings, pred_ratings))
mae = mean_absolute_error(true_ratings, pred_ratings)

print(f"RMSE: {rmse:.4f}")
print(f"MAE: {mae:.4f}")


RMSE: 0.1557
MAE: 0.1290


In [3]:
# Evaluate Content-based TF-IDF

# Lấy 100 sách test
test_books = np.random.choice(books_tfidf['title'].unique(), size=100, replace=False)

correct = 0
total = len(test_books)

def recommend_content(book_title, top_n=5):
    idx = books_tfidf[books_tfidf['title'] == book_title].index[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:top_n+1]
    book_indices = [i[0] for i in sim_scores]
    return books_tfidf['title'].iloc[book_indices]

# Kiểm tra Precision@5
for book in test_books:
    try:
        recommended = recommend_content(book, top_n=5)
        if book in recommended.values:
            correct += 1
    except:
        pass  # có thể 1 số sách lỗi index

precision_at_5 = correct / total
print(f"Precision@5: {precision_at_5:.4f}")


Precision@5: 0.7900
