# Creating Personalized Recommendations

This section allows you to create a new user profile, rate movies you've watched, and get personalized recommendations.

In [2]:
# Function to create a new user and add movie ratings

def create_user_profile(movies_df, existing_ratings_df, new_user_id=None):
    """
    Creates a new user profile and allows rating movies to get personalized recommendations.
    
    Args:
        movies_df: DataFrame containing movie information
        existing_ratings_df: DataFrame containing existing ratings
        new_user_id: Optional user ID for the new user. If None, one will be generated.
        
    Returns:
        new_ratings_df: DataFrame with the new user's ratings added
        user_id: The ID of the new user
    """
    # If no user_id is provided, generate one by taking the max user_id + 1
    if new_user_id is None:
        new_user_id = existing_ratings_df['userId'].max() + 1
    
    print(f"Creating a new user with ID: {new_user_id}")
    
    # Create an empty list to store the new ratings
    new_ratings = []
    
    # Let's start by showing some popular movies to rate
    popular_movies = existing_ratings_df.groupby('movieId').size().sort_values(ascending=False).head(20)
    popular_movie_ids = popular_movies.index.tolist()
    
    print("\nPlease rate some popular movies (1-5 stars, 0 if you haven't seen it):")
    
    for movie_id in popular_movie_ids:
        movie_title = movies_df[movies_df['movieId'] == movie_id]['title'].values[0]
        valid_rating = False
        
        while not valid_rating:
            try:
                rating = float(input(f"{movie_title}: "))
                if rating == 0:
                    # Skip if the user hasn't seen the movie
                    valid_rating = True
                    continue
                elif 0.5 <= rating <= 5:
                    # Add the rating to our list
                    new_ratings.append({
                        'userId': new_user_id,
                        'movieId': movie_id,
                        'rating': rating,
                        'timestamp': int(time.time())
                    })
                    valid_rating = True
                else:
                    print("Please enter a rating between 0.5 and 5, or 0 if you haven't seen it.")
            except ValueError:
                print("Please enter a valid number.")
    
    # Allow the user to search for and rate additional movies
    while True:
        search_term = input("\nSearch for more movies to rate (or type 'done' to finish): ")
        
        if search_term.lower() == 'done':
            break
        
        # Search for movies containing the search term
        matching_movies = movies_df[movies_df['title'].str.contains(search_term, case=False)]
        
        if matching_movies.empty:
            print("No movies found matching that search term. Try again.")
            continue
        
        # Display the top 10 matches
        print("\nMatching movies:")
        for i, (_, movie) in enumerate(matching_movies.head(10).iterrows()):
            print(f"{i+1}. {movie['title']}")
        
        # Let the user select a movie to rate
        selection = input("\nEnter the number of the movie you want to rate (or any other key to search again): ")
        
        try:
            selection_idx = int(selection) - 1
            if 0 <= selection_idx < len(matching_movies.head(10)):
                selected_movie = matching_movies.iloc[selection_idx]
                movie_id = selected_movie['movieId']
                movie_title = selected_movie['title']
                
                # Check if the movie has already been rated
                already_rated = any(r['movieId'] == movie_id for r in new_ratings)
                if already_rated:
                    print(f"You've already rated {movie_title}. Let's find something else.")
                    continue
                
                # Get the rating
                valid_rating = False
                while not valid_rating:
                    try:
                        rating = float(input(f"Rating for {movie_title} (0.5-5): "))
                        if 0.5 <= rating <= 5:
                            new_ratings.append({
                                'userId': new_user_id,
                                'movieId': movie_id,
                                'rating': rating,
                                'timestamp': int(time.time())
                            })
                            valid_rating = True
                        else:
                            print("Please enter a rating between 0.5 and 5.")
                    except ValueError:
                        print("Please enter a valid number.")
        except (ValueError, IndexError):
            # If the input is not a valid number, continue searching
            pass
    
    # Create a DataFrame from the new ratings
    new_ratings_df = pd.DataFrame(new_ratings)
    
    # Combine with the existing ratings
    combined_ratings_df = pd.concat([existing_ratings_df, new_ratings_df], ignore_index=True)
    
    print(f"\nThank you! You've rated {len(new_ratings)} movies.")
    
    return combined_ratings_df, new_user_id


def get_personal_recommendations(user_id, ratings_df, movies_df, n_recommendations=10):
    """
    Generate personalized recommendations for a user based on their ratings.
    
    Args:
        user_id: The user ID to get recommendations for
        ratings_df: DataFrame containing all ratings
        movies_df: DataFrame containing movie information
        n_recommendations: Number of recommendations to return
        
    Returns:
        recommendations_df: DataFrame with recommended movies
    """
    import numpy as np
    import pandas as pd
    from scipy.sparse import csr_matrix
    from sklearn.metrics.pairwise import cosine_similarity
    
    # Step 1: Normalize the ratings
    def normalize_ratings(df):
        user_mean_ratings = df.groupby('userId')['rating'].mean()
        df_normalized = df.copy()
        df_normalized['rating_normalized'] = df_normalized.apply(
            lambda x: x['rating'] - user_mean_ratings[x['userId']], 
            axis=1
        )
        return df_normalized
    
    ratings_normalized = normalize_ratings(ratings_df)
    
    # Step 2: Create utility matrices
    def create_X(df, rating_column='rating'):
        M = df['userId'].nunique()
        N = df['movieId'].nunique()
        
        user_mapper = dict(zip(np.unique(df["userId"]), list(range(M))))
        movie_mapper = dict(zip(np.unique(df["movieId"]), list(range(N))))
        
        user_inv_mapper = dict(zip(list(range(M)), np.unique(df["userId"])))
        movie_inv_mapper = dict(zip(list(range(N)), np.unique(df["movieId"])))
        
        user_index = [user_mapper[i] for i in df['userId']]
        item_index = [movie_mapper[i] for i in df['movieId']]
        
        X = csr_matrix((df[rating_column], (user_index, item_index)), shape=(M, N))
        
        return X, user_mapper, movie_mapper, user_inv_mapper, movie_inv_mapper
    
    X_norm, user_mapper_norm, movie_mapper_norm, user_inv_mapper_norm, movie_inv_mapper_norm = create_X(
        ratings_normalized, rating_column='rating_normalized'
    )
    
    # Step 3: Calculate Bayesian averages
    C = ratings_df.groupby('movieId')['rating'].count().mean()
    m = ratings_df.groupby('movieId')['rating'].mean().mean()
    
    def bayesian_avg(ratings):
        return (C*m + ratings.sum())/(C + ratings.count())
    
    bayesian_avg_ratings = ratings_df.groupby('movieId')['rating'].agg(bayesian_avg).reset_index()
    bayesian_avg_ratings.columns = ['movieId', 'bayesian_avg']
    
    # Step 4: Generate recommendations
    if user_id not in user_mapper_norm:
        print(f"User {user_id} not found in the dataset.")
        return pd.DataFrame()
    
    # Get the user's index in the matrix
    user_idx = user_mapper_norm[user_id]
    user_ratings = X_norm[user_idx].toarray().flatten()
    
    # Find movies the user hasn't rated
    rated_movies_idx = np.where(user_ratings != 0)[0]
    rated_movies_ids = [movie_inv_mapper_norm[idx] for idx in rated_movies_idx]
    
    all_movie_indices = np.arange(X_norm.shape[1])
    unrated_movie_indices = np.setdiff1d(all_movie_indices, rated_movies_idx)
    
    # Calculate similarity with other users
    user_similarities = X_norm.dot(X_norm[user_idx].T).toarray().flatten()
    user_similarities[user_idx] = 0  # Remove the user from their own similarity list
    
    # Get the most similar users
    k = 50  # Number of similar users to consider
    most_similar_users = np.argsort(user_similarities)[-k:]
    
    # Predict ratings for unrated movies
    predictions = {}
    
    for movie_idx in unrated_movie_indices:
        movie_id = movie_inv_mapper_norm[movie_idx]
        
        # Get ratings from similar users for this movie
        similar_users_ratings = []
        similar_users_similarities = []
        
        for similar_user_idx in most_similar_users:
            rating = X_norm[similar_user_idx, movie_idx]
            if rating != 0:  # Only consider non-zero ratings
                similar_users_ratings.append(rating)
                similar_users_similarities.append(user_similarities[similar_user_idx])
        
        if len(similar_users_ratings) == 0:
            continue  # Skip if no similar user has rated this movie
        
        # Calculate predicted rating
        predicted_norm_rating = np.average(similar_users_ratings, weights=similar_users_similarities)
        
        # Convert back to the original scale
        user_mean = ratings_df[ratings_df['userId'] == user_id]['rating'].mean()
        predicted_rating = predicted_norm_rating + user_mean
        
        # Combine with Bayesian average
        bayesian_rating = bayesian_avg_ratings[bayesian_avg_ratings['movieId'] == movie_id]['bayesian_avg'].values
        if len(bayesian_rating) > 0:
            # Equal weight to collaborative filtering and Bayesian average
            combined_rating = 0.5 * predicted_rating + 0.5 * bayesian_rating[0]
            predictions[movie_id] = combined_rating
    
    # Sort by predicted rating
    sorted_predictions = sorted(predictions.items(), key=lambda x: x[1], reverse=True)
    recommended_movie_ids = [movie_id for movie_id, _ in sorted_predictions[:n_recommendations]]
    
    # Create a recommendations DataFrame
    recommendations = []
    for movie_id in recommended_movie_ids:
        movie_info = movies_df[movies_df['movieId'] == movie_id].iloc[0]
        bayesian_rating = bayesian_avg_ratings[bayesian_avg_ratings['movieId'] == movie_id]['bayesian_avg'].values[0]
        predicted_rating = predictions[movie_id]
        
        recommendations.append({
            'movieId': movie_id,
            'title': movie_info['title'],
            'genres': "|".join(movie_info['genres']),
            'predicted_rating': predicted_rating,
            'bayesian_avg': bayesian_rating
        })
    
    recommendations_df = pd.DataFrame(recommendations)
    
    return recommendations_df


# Example of how to use these functions (commented out)
"""
import time

# Create a new user profile and add ratings
combined_ratings, my_user_id = create_user_profile(movies, ratings)

# Generate recommendations for the new user
my_recommendations = get_personal_recommendations(my_user_id, combined_ratings, movies, n_recommendations=15)

# Display the recommendations
print("\nYour Personalized Movie Recommendations:")
for i, (_, movie) in enumerate(my_recommendations.iterrows()):
    print(f"{i+1}. {movie['title']} - Predicted rating: {movie['predicted_rating']:.2f}")
    
# Compare with what you've already rated
print("\nMovies you've already rated:")
my_ratings = combined_ratings[combined_ratings['userId'] == my_user_id]
my_rated_movies = my_ratings.merge(movies[['movieId', 'title']], on='movieId')
my_rated_movies = my_rated_movies.sort_values(by='rating', ascending=False)

for _, row in my_rated_movies.iterrows():
    print(f"- {row['title']} (Your rating: {row['rating']})")
"""

'\nimport time\n\n# Create a new user profile and add ratings\ncombined_ratings, my_user_id = create_user_profile(movies, ratings)\n\n# Generate recommendations for the new user\nmy_recommendations = get_personal_recommendations(my_user_id, combined_ratings, movies, n_recommendations=15)\n\n# Display the recommendations\nprint("\nYour Personalized Movie Recommendations:")\nfor i, (_, movie) in enumerate(my_recommendations.iterrows()):\n    print(f"{i+1}. {movie[\'title\']} - Predicted rating: {movie[\'predicted_rating\']:.2f}")\n    \n# Compare with what you\'ve already rated\nprint("\nMovies you\'ve already rated:")\nmy_ratings = combined_ratings[combined_ratings[\'userId\'] == my_user_id]\nmy_rated_movies = my_ratings.merge(movies[[\'movieId\', \'title\']], on=\'movieId\')\nmy_rated_movies = my_rated_movies.sort_values(by=\'rating\', ascending=False)\n\nfor _, row in my_rated_movies.iterrows():\n    print(f"- {row[\'title\']} (Your rating: {row[\'rating\']})")\n'

## How to Use the Personal Recommendation System

1. **Create your user profile**:
   - The `create_user_profile()` function allows you to rate popular movies and search for specific movies to rate
   - It returns a new ratings DataFrame and your user ID

2. **Get personalized recommendations**:
   - The `get_personal_recommendations()` function uses your ratings to generate personalized recommendations
   - It combines collaborative filtering with Bayesian averaging for better quality recommendations

3. **Uncomment the example code** at the bottom of the cell to try it out

This personal recommendation system uses the same advanced techniques from the main notebook:
- Normalized ratings to remove personal rating bias
- Collaborative filtering based on similar users
- Bayesian averages to handle movies with few ratings

## Example Usage

```python
# First, create your user profile
combined_ratings, my_user_id = create_user_profile(movies, ratings)

# Then get your personalized recommendations
my_recommendations = get_personal_recommendations(my_user_id, combined_ratings, movies)

# View your top recommendations
print("\nYour Personalized Movie Recommendations:")
for i, (_, movie) in enumerate(my_recommendations.iterrows()):
    print(f"{i+1}. {movie['title']} - Predicted rating: {movie['predicted_rating']:.2f}")
```

This implementation allows you to:
1. Rate as many or as few movies as you want
2. Search for specific movies you've watched
3. Get personalized recommendations based on your taste
4. See predicted ratings for movies you might enjoy