In [7]:
%pip install numpy

In [8]:
import numpy as np
from scipy.spatial.distance import cosine
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [9]:
users = ['Alice', 'Bob', 'Charlie']
items = ['Book1', 'Book2', 'Book3', 'Book4', 'Book5']
item_descriptions = {
    'Book1': 'A fantasy novel about a young wizard',
    'Book2': 'A science fiction story set in a dystopian future',
    'Book3': 'A romantic comedy about two best friends',
    'Book4': 'A thriller novel with a twisted plot',
    'Book5': 'A historical fiction book set in World War II'
}
ratings = {
    'Alice': {'Book1': 5, 'Book2': 3, 'Book3': 4},
    'Bob': {'Book2': 4, 'Book4': 5},
    'Charlie': {'Book3': 3, 'Book4': 4, 'Book5': 5}
}


In [10]:
vectorizer = TfidfVectorizer()
item_vectors = vectorizer.fit_transform(item_descriptions.values())

# Calculate item similarities
item_sim_matrix = cosine_similarity(item_vectors)

def content_based_filtering(user, item, weight_type='binary'):
    # Get the items the user has rated
    user_items = ratings[user].keys()
    
    # Calculate the score
    score = 0
    total_weight = 0
    target_vector = item_vectors[items.index(item)]
    
    for other_item in items:
        if other_item != item:
            other_vector = item_vectors[items.index(other_item)]
            sim = item_sim_matrix[items.index(item), items.index(other_item)]
            
            if other_item in user_items:
                rating = ratings[user][other_item]
                
                # Calculate the weight
                if weight_type == 'binary':
                    weight = 1
                elif weight_type == 'rating':
                    user_mean_rating = sum(ratings[user].values()) / len(ratings[user])
                    weight = abs(rating - user_mean_rating)
                elif weight_type == 'confidence':
                    # Implement a confidence function based on rating age, source, etc.
                    weight = 1  # Dummy implementation
                
                score += weight * sim * rating
                total_weight += weight * sim
    
    if total_weight > 0:
        score /= total_weight
    
    return score

In [11]:
# test
user = 'Alice'
item = 'Book4'
score = content_based_filtering(user, item, weight_type='rating')
print(f"Predicted score for {user} and {item}: {score}")

Predicted score for Alice and Book4: 5.0
