In [None]:
import torch  
torch.device("cpu")

In [None]:
# import necessary libraries
import os
from faker import Faker
import random
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
random.seed(42)
np.random.seed(42)

In [None]:
def generate_users(num_users=50):
    faker = Faker()
    users = []
    
    for i in range(num_users):
        # User metadata
        user_id = f"u{i:03d}"
        job = faker.job()
        interest = faker.word()
        profile = f"{job} who enjoys {interest}"
        
        # Generate 2-4 fake reviews with ratings
        reviews = [faker.sentence(nb_words=10) for _ in range(random.randint(2, 4))]
        ratings = [random.randint(1, 5) for _ in reviews]
        
        users.append({
            "user_id": user_id,
            "profile": profile,
            "reviews": reviews,
            "ratings": ratings
        })
    
    return pd.DataFrame(users)


In [None]:
def generate_books(num_books=100):
    faker = Faker()
    books = []
    
    for i in range(num_books):
        book_id = f"b{i:03d}"
        title = faker.sentence(nb_words=4).rstrip('.')
        description = faker.paragraph(nb_sentences=3)
        
        books.append({
            "book_id": book_id,
            "title": title,
            "description": description
        })
    
    return pd.DataFrame(books)


In [None]:
def recommend_books(user_df, book_df, model, top_k=3):
    recommendations = {}
    
    book_texts = book_df["title"] + " " + book_df["description"]
    book_embeddings = model.encode(book_texts.tolist(), convert_to_tensor=True).cpu().numpy()
    
    for _, user in user_df.iterrows():
        user_text = user["profile"] + " " + " ".join(user["reviews"])
        user_embedding = model.encode([user_text], convert_to_tensor=True).cpu().numpy()
        
        similarities = cosine_similarity(user_embedding, book_embeddings)[0]
        
        top_indices = np.argsort(similarities)[-top_k:][::-1] 
        recommended_books = [book_df.iloc[i]["book_id"] for i in top_indices]
        
        recommendations[user["user_id"]] = recommended_books
    
    return recommendations


In [11]:
def main():
    print("Starting book recommendation system.\n")
    
    # 1. Data Generation
    print("Generating synthetic data.")
    users = generate_users(50)
    books = generate_books(100)
    
    # 2. Model Loading
    print("\nLoading embedding model...")
    model = SentenceTransformer("all-MiniLM-L6-v2")
    
    # 3. Generate recommendations
    print("\nGenerating recommendations...")
    recommendations = recommend_books(users, books, model)
    
    # Print both formats
    print("\n=== SIMPLE RECOMMENDATIONS ===")
    for user_id, book_ids in list(recommendations.items())[:5]:  # First 5 users
        print(f"{user_id} → Recommended: {book_ids}")
    
    print("\n=== DETAILED RECOMMENDATIONS ===")
    for user_id, book_ids in list(recommendations.items())[:5]:  # First 5 users
        user_profile = users[users['user_id'] == user_id]['profile'].values[0]
        print(f"\nUSER {user_id}: {user_profile}")
        
        for i, book_id in enumerate(book_ids, 1):
            book = books[books['book_id'] == book_id].iloc[0]
            print(f"   {i}. {book['title']} ({book_id})")
            print(f"      \"{book['description'][:70]}...\"")

if __name__ == "__main__":
    main()

Starting book recommendation system.

Generating synthetic data.

Loading embedding model...

Generating recommendations...

=== SIMPLE RECOMMENDATIONS ===
u000 → Recommended: ['b088', 'b064', 'b046']
u001 → Recommended: ['b065', 'b081', 'b074']
u002 → Recommended: ['b056', 'b060', 'b088']
u003 → Recommended: ['b013', 'b068', 'b083']
u004 → Recommended: ['b064', 'b088', 'b068']

=== DETAILED RECOMMENDATIONS ===

USER u000: Pharmacist, community who enjoys thing
   1. Natural note society north (b088)
      "Hair figure training. Executive central although life important speak...."
   2. Threat among (b064)
      "Important will imagine loss worker. Seat nature professor guess finish..."
   3. Clearly out back (b046)
      "Medical program last represent city you natural. Occur international i..."

USER u001: Journalist, broadcasting who enjoys defense
   1. Necessary democratic hard (b065)
      "Speech defense off stand well him police. Character artist discover ac..."
   2. Good choi