In [3]:
import pandas as pd
from sklearn.neighbors import NearestNeighbors

# User data
user_data = {
    "Id": 1,
    "Name": "Breno Brito",
    "Username": "BBrito",
    "Password": "1234",
    "Mood": "Relaxed",
    "MoviesWatched": [1, 3, 4, 10, 16, 122, 136, 149, 38]
}

# Load the movie dataset
movies_df = pd.read_csv("../data/movies.csv")

# Define mood-genre associations
mood_genre_associations = {
    "Happy": ["Comedy", "Romance", "Animation"],
    "Sad": ["Drama", "Romance", "Biography"],
    "Angry": ["Action", "Crime", "Thriller"],
    "Excited": ["Adventure", "Action", "Fantasy"],
    "Anxious": ["Thriller", "Horror", "Mystery"],
    "Relaxed": ["Comedy", "Drama", "Documentary"],
    "Grateful": ["Musical", "Biography", "Romance"]
}

# Updated weights
weights = {
    "Mood": 0.3,
    "MovieGenre": 0.3,
    "Rate": 0.3,
    "Year": 0.1
}

# One-hot encode the 'Genre' column
movies_df_encoded = pd.get_dummies(movies_df, columns=['Genre'])

# Initialize and train the KNN model
k = 5  # Number of neighbors to consider
knn_model = NearestNeighbors(n_neighbors=k)
features = movies_df_encoded.select_dtypes(include=['number'])
knn_model.fit(features)

# Recommendation Generation
def recommend_movies_for_user(user_data, upcoming_movies):
    # Calculate composite scores for all movies
    composite_scores = []

    movies_watched_ids = user_data["MoviesWatched"]
    movies_watched = movies_df_encoded[movies_df_encoded['Id'].isin(movies_watched_ids)]
    
    max_year = movies_df['Year'].max()
    min_year = movies_df['Year'].min()

    watched_year = movies_watched["Year"].mean()
    watched_score = movies_watched["AudienceScore"].mean()
    
    # Calculate genre percentages based on user's watched movies
    genre_percentages = {}
    total_movies_watched = len(user_data["MoviesWatched"])

    for movie_id in movies_watched_ids:
        movie_genres = movies_df_encoded[movies_df_encoded['Id'] == movie_id].filter(regex='Genre_*')
        for genre in movie_genres.columns:
            if genre in genre_percentages:
                genre_percentages[genre] += movie_genres[genre].values[0]
            else:
                genre_percentages[genre] = movie_genres[genre].values[0]

    # Calculate genre percentages
    for genre, count in genre_percentages.items():
        genre_percentages[genre] = count / total_movies_watched


    for index, movie_data in movies_df_encoded.iterrows():
        composite_score = 0

        # Check which genres have a value of 1 for the current movie and add the correct value for the composite score
        genres_watched = [genre.split('_')[1] for genre, value in movie_data.items() if value == 1 and genre.startswith('Genre_')]
        genre = genres_watched[0]
        composite_score += genre_percentages[f'Genre_{genre}'] * weights["MovieGenre"]
        
        # Relevant genres based on user's mood
        user_mood = user_data["Mood"]
        relevant_genres = mood_genre_associations.get(user_mood, [])
        
        # Add composite score if the movie's genre is in the relevant genres
        if genre in relevant_genres:
            composite_score += weights["Mood"]

        # Add movie rate to composite score
        composite_score += movie_data["AudienceScore"] / 100 * weights["Rate"]

        # Calculate the contribution of the average year and add to composite score
        composite_score += (1 - abs(movie_data["Year"] - watched_year) / (max_year - min_year)) * weights["Year"]

        composite_scores.append(composite_score)

    # Add composite scores as a new column to the DataFrame
    movies_df_encoded['CompositeScore'] = composite_scores

    # Sort movies based on composite scores
    recommended_movies = movies_df_encoded.sort_values(by='CompositeScore', ascending=False)

    # Select top 10
    top_n_recommendations = recommended_movies.head(10)

    return top_n_recommendations

# Generate movie recommendations for the user
recommended_movies = recommend_movies_for_user(user_data, False)

# Iterate through each movie in the recommended movies DataFrame
for index, row in recommended_movies.iterrows():
    # Filter out the 'Genre' columns that start with "Genre_" and have a value of 1
    genre_columns = [col.split('_')[1] for col in recommended_movies.columns if col.startswith('Genre_') and row[col] == 1]
    
    # Format the output with fixed width for each column
    print(f"{row['Movie']:40} {genre_columns[0]:15} {row['Year']:10} {row['AudienceScore']:5} {row['CompositeScore']:20}")



300                                      Action                2007    90   0.6985714285714296
Iron Man                                 Action                2008    91   0.6968095238095228
Transformers                             Action                2007    89   0.6955714285714297
Grindhouse                               Action                2007    86   0.6865714285714296
Live Free or Die Hard                    Action                2007    86   0.6865714285714296
Avatar                                   Action                2009    92   0.6855238095238084
Star Trek                                Action                2009    91   0.6825238095238085
Shooter                                  Action                2007    82   0.6745714285714296
Inception                                Action                2010    93   0.6742380952380942
Zombieland                               Action                2009    87   0.6705238095238085
