In [3]:
from pymongo import MongoClient

In [2]:
import pandas as pd

In [12]:
from pymongo import MongoClient
import pandas as pd
from datetime import datetime, timedelta
from collections import Counter
import ast
import numpy as np
import os
from dotenv import load_dotenv
load_dotenv()
MONGO_URI = os.getenv('MONGO_URI')
# Connect
client = MongoClient(MONGO_URI)

# Databases
log_db = client['log_database']
rec_collection = log_db['recommendation_log']

movie_db = client['movie_database']
movie_info_collection = movie_db['movie_info']

# Current time
now = datetime.now()

# Week 0 range (latest week)
start_week0 = now - timedelta(days=7)
end_week0 = now

# Week 10 range (70-77 days ago)
start_week10 = now - timedelta(days=35)
end_week10 = now - timedelta(days=28)

# Build movie_id -> genres mapping
movie_genres = {}
for movie in movie_info_collection.find():
    movie_id = movie.get('movie_id')
    genres_raw = movie.get('genres')
    
    if isinstance(genres_raw, str):
        try:
            genres_list = ast.literal_eval(genres_raw)
            genre_names = [g['name'] for g in genres_list]
            movie_genres[movie_id] = genre_names
        except Exception:
            continue

# Helper: Calculate entropy
def calculate_entropy(start_time, end_time):
    cursor = rec_collection.find({
        'time': {'$gte': start_time, '$lt': end_time}
    })

    all_recommendations = []
    for doc in cursor:
        results = doc.get('recommendation_results', [])
        cleaned_results = [item.strip() for item in results if isinstance(item, str)]
        all_recommendations.extend(cleaned_results)

    counter = Counter(all_recommendations)
    top_20_movies = [movie for movie, _ in counter.most_common(20)]

    genre_counter = Counter()
    for movie in top_20_movies:
        genres = movie_genres.get(movie, [])
        genre_counter.update(genres)

    # Calculate entropy
    total = sum(genre_counter.values())
    probs = np.array([count / total for count in genre_counter.values()])
    entropy = -np.sum(probs * np.log2(probs + 1e-9))  # Add tiny value to avoid log(0)
    return entropy

# Calculate entropy for both weeks
entropy_week0 = calculate_entropy(start_week0, end_week0)
entropy_week10 = calculate_entropy(start_week10, end_week10)

# Print result
print(f"Genre Entropy - Latest Week (Week 4): {entropy_week0:.4f}")
print(f"Genre Entropy - 4 Weeks Ago (Week 0): {entropy_week10:.4f}")
print(f"Entropy Change: {entropy_week0 - entropy_week10:.4f}")

Genre Entropy - Latest Week (Week 4): 3.3984
Genre Entropy - 4 Weeks Ago (Week 0): 3.5318
Entropy Change: -0.1334
