<a href="https://colab.research.google.com/github/rakeshxp2007/Machine-Learning/blob/main/User_based_Collaborative_Filtering.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### **FLOW DIAGRAM OF THE ALGORITHM**
```
INPUT: User 0 wants recommendations
                        ‚Üì
STEP 1: Find all movies User 0 hasn't watched
                        ‚Üì
STEP 2: For each unwatched movie:
                        ‚Üì
    2a. Find all users who watched this movie
                        ‚Üì
    2b. Check how similar they are to User 0
                        ‚Üì
    2c. Take top 3 most similar users
                        ‚Üì
    2d. Calculate weighted average of their ratings
                        ‚Üì
    2e. That's the predicted rating!
                        ‚Üì
STEP 3: Sort all predictions (highest first)
                        ‚Üì
STEP 4: Return top 3
                        ‚Üì
OUTPUT: Movie C (4.50), Movie F (4.20), Movie D (3.80)


# **LIVE DEMO - Setting Up Our Tools**


In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

# **LIVE DEMO - Creating Our Rating Matrix**

In [None]:
# Create a rating matrix
# Rows = Users, Columns = Movies
# Values = Ratings (1-5 stars), 0 = Not rated yet

ratings = np.array([
    [5, 4, 0, 0, 2, 0],  # User 0
    [4, 0, 0, 3, 0, 0],  # User 1
    [0, 5, 4, 0, 0, 0],  # User 2
    [0, 0, 5, 4, 0, 5],  # User 3
    [2, 0, 0, 0, 5, 4],  # User 4
    [0, 3, 0, 0, 4, 5],  # User 5
])

# Let's give names to make it clearer
movies = ['Movie A', 'Movie B', 'Movie C', 'Movie D', 'Movie E', 'Movie F']
users = ['User 0', 'User 1', 'User 2', 'User 3', 'User 4', 'User 5']

# Convert to a nice table format
ratings_df = pd.DataFrame(ratings, columns=movies, index=users)
print(ratings_df)

        Movie A  Movie B  Movie C  Movie D  Movie E  Movie F
User 0        5        4        0        0        2        0
User 1        4        0        0        3        0        0
User 2        0        5        4        0        0        0
User 3        0        0        5        4        0        5
User 4        2        0        0        0        5        4
User 5        0        3        0        0        4        5


# **LIVE DEMO - Finding Who's Similar to Whom**

In [None]:
# Calculate cosine similarity between ALL users
user_similarity = cosine_similarity(ratings)

# Convert to a nice table
user_similarity_df = pd.DataFrame(
    user_similarity,
    columns=users,
    index=users
)

print("\nUser Similarity Matrix:")
print(user_similarity_df)


User Similarity Matrix:
          User 0    User 1    User 2    User 3    User 4    User 5
User 0  1.000000  0.596285  0.465620  0.000000  0.444444  0.421637
User 1  0.596285  1.000000  0.000000  0.295420  0.238514  0.000000
User 2  0.465620  0.000000  1.000000  0.384473  0.000000  0.331295
User 3  0.000000  0.295420  0.384473  1.000000  0.366988  0.435194
User 4  0.444444  0.238514  0.000000  0.366988  1.000000  0.843274
User 5  0.421637  0.000000  0.331295  0.435194  0.843274  1.000000


# **LIVE DEMO - Predicting a Rating (The Magic Moment!)**

In [None]:
def predict_rating(user_id, item_id, ratings, user_similarity, k=3):
    """
    Predict what rating a user would give to an item

    user_id: Which user we're predicting for
    item_id: Which item (movie) we're predicting
    ratings: Our rating table
    user_similarity: Our similarity table
    k: How many similar users to consider (neighbors)
    """

    # Step 1: Get how similar this user is to everyone else
    similarities = user_similarity[user_id]

    # Step 2: Get ratings for this specific movie by all users
    item_ratings = ratings[:, item_id]

    # Step 3: Find who has actually watched this movie (rating > 0)
    rated_mask = item_ratings > 0

    # Step 4: Filter to get only relevant similarities and ratings
    relevant_similarities = similarities[rated_mask]
    relevant_ratings = item_ratings[rated_mask]

    # Step 5: Pick the top-k most similar users
    if len(relevant_similarities) > k:
        top_k_indices = np.argsort(relevant_similarities)[-k:]
        relevant_similarities = relevant_similarities[top_k_indices]
        relevant_ratings = relevant_ratings[top_k_indices]

    # Step 6: Calculate weighted average
    if np.sum(relevant_similarities) == 0:
        return 0  # No similar users found - can't predict

    predicted_rating = np.sum(relevant_similarities * relevant_ratings) / np.sum(relevant_similarities)

    return predicted_rating

# Let's predict: What would User 0 rate Movie C?
user_idx = 0
movie_idx = 2  # Movie C is column 2
predicted = predict_rating(user_idx, movie_idx, ratings, user_similarity, k=3)

print(f"\nüé¨ Predicted rating for {users[user_idx]} on {movies[movie_idx]}: {predicted:.2f} stars")


üé¨ Predicted rating for User 0 on Movie C: 4.00 stars


# **LIVE DEMO - Getting Full Recommendations**

In [None]:
def get_recommendations(user_id, ratings, user_similarity, n_recommendations=3):
    """
    Get top N recommendations for a user
    """
    # Step 1: Find all movies this user hasn't watched yet
    user_ratings = ratings[user_id]
    unrated_items = np.where(user_ratings == 0)[0]

    # Step 2: Predict ratings for ALL unwatched movies
    predictions = []
    for item_id in unrated_items:
        pred = predict_rating(user_id, item_id, ratings, user_similarity)
        predictions.append((item_id, pred))

    # Step 3: Sort by predicted rating (highest first)
    predictions.sort(key=lambda x: x[1], reverse=True)

    # Step 4: Return top N recommendations
    return predictions[:n_recommendations]

# Get recommendations for User 0
user_idx = 0
recommendations = get_recommendations(user_idx, ratings, user_similarity, n_recommendations=3)

print(f"\nüéØ Top 3 Recommendations for {users[user_idx]}:")
print("=" * 50)
for item_id, predicted_rating in recommendations:
    print(f"  üé¨ {movies[item_id]}: Predicted rating {predicted_rating:.2f} ‚≠ê")


üéØ Top 3 Recommendations for User 0:
  üé¨ Movie F: Predicted rating 4.49 ‚≠ê
  üé¨ Movie C: Predicted rating 4.00 ‚≠ê
  üé¨ Movie D: Predicted rating 3.00 ‚≠ê
