In [1]:
import numpy as np
from scipy.spatial.distance import pdist, squareform

ratings = {
    'Alice': {'The Shawshank Redemption': 5, 'The Godfather': 5, 'Pulp Fiction': 4, 'The Dark Knight': 0, 'Forrest Gump': 3, 'Inception': 0},
    'Bob': {'The Shawshank Redemption': 4, 'The Godfather': 5, 'Pulp Fiction': 0, 'The Dark Knight': 5, 'Forrest Gump': 0, 'Inception': 4},
    'Charlie': {'The Shawshank Redemption': 0, 'The Godfather': 4, 'Pulp Fiction': 5, 'The Dark Knight': 3, 'Forrest Gump': 5, 'Inception': 0},
    'David': {'The Shawshank Redemption': 5, 'The Godfather': 0, 'Pulp Fiction': 4, 'The Dark Knight': 0, 'Forrest Gump': 4, 'Inception': 5},
    'Eve': {'The Shawshank Redemption': 3, 'The Godfather': 4, 'Pulp Fiction': 0, 'The Dark Knight': 4, 'Forrest Gump': 0, 'Inception': 3},
    'Frank': {'The Shawshank Redemption': 0, 'The Godfather': 3, 'Pulp Fiction': 5, 'The Dark Knight': 4, 'Forrest Gump': 5, 'Inception': 0},
}


def create_user_item_matrix(ratings_data):
    """
    Creates a user-item matrix and mappings from the raw ratings data.

    Args:
        ratings_data (dict): The raw ratings dictionary.

    Returns:
        tuple: A tuple containing:
            - matrix (np.array): The user-item matrix where rows are users and columns are items.
            - user_map (dict): A mapping from user names to row indices.
            - item_map (dict): A mapping from item names to column indices.
    """

    users = list(ratings_data.keys())
    all_items = set(item for user_items in ratings_data.values() for item in user_items)
    items = sorted(list(all_items))


    user_map = {user: i for i, user in enumerate(users)}
    item_map = {item: i for i, item in enumerate(items)}


    matrix = np.zeros((len(users), len(items)))
    for user, user_items in ratings_data.items():
        for item, rating in user_items.items():
            user_idx = user_map[user]
            item_idx = item_map[item]
            matrix[user_idx, item_idx] = rating

    return matrix, user_map, item_map


# --- 2. COLLABORATIVE FILTERING LOGIC ---

def calculate_cosine_similarity(matrix):
    """
    Calculates the cosine similarity between users (rows) in the user-item matrix.
    Cosine similarity measures the cosine of the angle between two vectors,
    which indicates how similar their orientations are. A value of 1 means identical,
    0 means unrelated (orthogonal), and -1 means diametrically opposed.

    Args:
        matrix (np.array): The user-item matrix.

    Returns:
        np.array: A square matrix where similarity_matrix[i, j] is the
                  cosine similarity between user i and user j.
    """

    pairwise_dist = pdist(matrix, 'cosine')

    similarity_matrix = 1 - squareform(pairwise_dist)

    return similarity_matrix


def get_recommendations(target_user, ratings_data, similarity_threshold=0.5):
    """
    Generates movie recommendations for a target user.

    This function implements user-user collaborative filtering.
    1. It finds users who are "similar" to the target user.
    2. It looks at the movies those similar users have rated highly but the target user has not seen.
    3. It scores these movies based on the ratings from similar users.
    4. It returns the top-rated movies as recommendations.

    Args:
        target_user (str): The name of the user to get recommendations for.
        ratings_data (dict): The raw ratings data.
        similarity_threshold (float): The minimum similarity score for a user to be
                                      considered "similar".

    Returns:
        list: A sorted list of (score, movie_title) tuples.
    """
    if target_user not in ratings_data:
        return f"Error: User '{target_user}' not found in the dataset."

    matrix, user_map, item_map = create_user_item_matrix(ratings_data)

    inv_user_map = {i: user for user, i in user_map.items()}
    inv_item_map = {i: item for item, i in item_map.items()}

    user_similarity_matrix = calculate_cosine_similarity(matrix)


    target_user_idx = user_map[target_user]


    similar_users_indices = np.where(user_similarity_matrix[target_user_idx] > similarity_threshold)[0]


    similar_users_indices = similar_users_indices[similar_users_indices != target_user_idx]

    if len(similar_users_indices) == 0:
        print(f"No users found similar enough to '{target_user}' (threshold={similarity_threshold}). Try lowering the threshold.")
        return []

    print(f"Found {len(similar_users_indices)} users similar to '{target_user}': {[inv_user_map[i] for i in similar_users_indices]}")


    recommendation_scores = {}

    target_user_rated_items = set(item for item, rating in ratings_data[target_user].items() if rating > 0)


    for user_idx in similar_users_indices:

        similarity_score = user_similarity_matrix[target_user_idx, user_idx]


        for item_idx, rating in enumerate(matrix[user_idx]):
            if rating > 0:
                item_name = inv_item_map[item_idx]


                if item_name not in target_user_rated_items:

                    if item_name not in recommendation_scores:
                        recommendation_scores[item_name] = 0
                    recommendation_scores[item_name] += rating * similarity_score


    sorted_recommendations = sorted(recommendation_scores.items(), key=lambda x: x[1], reverse=True)


    return [(score, movie) for movie, score in sorted_recommendations]


# --- 3. GET AND PRINT RECOMMENDATIONS ---

if __name__ == "__main__":

    # --- Example 1: Get recommendations for 'Alice' ---

    target_user_alice = 'Alice'
    print(f"--- Recommendations for {target_user_alice} ---")
    recommendations_alice = get_recommendations(target_user_alice, ratings)
    if recommendations_alice:
        for score, movie in recommendations_alice:
            print(f"Movie: {movie:<20} | Predicted Score: {score:.2f}")
    print("\n" + "="*50 + "\n")


    # --- Example 2: Get recommendations for 'Charlie' ---

    target_user_charlie = 'Charlie'
    print(f"--- Recommendations for {target_user_charlie} ---")
    recommendations_charlie = get_recommendations(target_user_charlie, ratings)
    if recommendations_charlie:
        for score, movie in recommendations_charlie:
            print(f"Movie: {movie:<20} | Predicted Score: {score:.2f}")
    print("\n" + "="*50 + "\n")

    # --- Example 3: Lowering the threshold for a user with fewer similar matches ---

    target_user_david = 'David'
    print(f"--- Recommendations for {target_user_david} (with lower threshold) ---")
    recommendations_david = get_recommendations(target_user_david, ratings, similarity_threshold=0.2)
    if recommendations_david:
        for score, movie in recommendations_david:
            print(f"Movie: {movie:<20} | Predicted Score: {score:.2f}")

--- Recommendations for Alice ---
Found 5 users similar to 'Alice': ['Bob', 'Charlie', 'David', 'Eve', 'Frank']
Movie: The Dark Knight      | Predicted Score: 10.02
Movie: Inception            | Predicted Score: 7.39


--- Recommendations for Charlie ---
Found 3 users similar to 'Charlie': ['Alice', 'David', 'Frank']
Movie: The Shawshank Redemption | Predicted Score: 6.22
Movie: Inception            | Predicted Score: 2.55


--- Recommendations for David (with lower threshold) ---
Found 5 users similar to 'David': ['Alice', 'Bob', 'Charlie', 'Eve', 'Frank']
Movie: The Godfather        | Predicted Score: 11.26
Movie: The Dark Knight      | Predicted Score: 7.88
