In [1]:
import pickle
import json
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

In [3]:
with open('tfidf_vectorizer.pkl', 'rb') as f:
    vectorizer = pickle.load(f)

with open('original_text.json', 'r') as f:
    original_texts = json.load(f)

with open('model_similarity.json', 'r') as f:
    model = json.load(f)

In [4]:
movie_ids = model['movie_ids']
movie_titles = model['movie_titles']

In [5]:
def calculate_similarity_for_new_movie(new_movie):

    feature_text = f" {new_movie.get('genres', '')} " \
                   f"{new_movie.get('keywords', '')} {new_movie.get('overview', '')}"

    new_vector = vectorizer.transform([feature_text])

    original_matrix = vectorizer.transform(original_texts)
    similarities = cosine_similarity(new_vector, original_matrix)[0]

    similar_indices = np.argsort(similarities)[-6:][::-1]

    results = []
    for idx in similar_indices:
        results.append({
            'id': movie_ids[idx],
            'title': movie_titles[idx],
            'similarity': float(similarities[idx])
        })

    return results

In [6]:
new_movie = {
    'id': 99999,
    'title': 'The Batman 2',
    'genres': 'Action Crime Drama',
    'overview': 'Bruce Wayne faces a new villain...',
    'keywords': 'dc comics superhero batman',
}

similar_movies = calculate_similarity_for_new_movie(new_movie)

print(f"Films similaires à '{new_movie['title']}':")
for movie in similar_movies:
    print(f"  - {movie['title']} (score: {movie['similarity']:.3f})")

Films similaires à 'The Batman 2':
  - Batman: The Dark Knight Returns, Part 1 (score: 0.357)
  - Batman Begins (score: 0.326)
  - Zack Snyder's Justice League (score: 0.301)
  - Batman and Superman: Battle of the Super Sons (score: 0.261)
  - The Batman (score: 0.219)
  - The Dark Knight (score: 0.208)
