<a href="https://colab.research.google.com/github/rakeshxp2007/Machine-Learning/blob/main/practical_hybrid_implementation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Code: Setup and Data**

In [None]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Movie database
movies = pd.DataFrame({
    'movie_id': [1, 2, 3, 4, 5],
    'title': ['DDLJ', '3 Idiots', 'Dangal', 'Gully Boy', 'ZNMD'],
    'description': [
        'romantic drama family Shah Rukh Khan',
        'comedy friendship college Aamir Khan',
        'sports drama inspiration Aamir Khan',
        'music rap Mumbai youth urban',
        'friendship travel adventure comedy Spain'
    ]
})

# User ratings (1-5 scale)
ratings = pd.DataFrame({
    'user_id': [1, 1, 1, 2, 2, 2, 3, 3, 3],
    'movie_id': [1, 2, 3, 1, 3, 4, 2, 4, 5],
    'rating': [5, 4, 5, 3, 5, 4, 5, 5, 4]
})

print("Movies:")
print(movies)
print("\nRatings:")
print(ratings)


Movies:
   movie_id      title                               description
0         1       DDLJ      romantic drama family Shah Rukh Khan
1         2   3 Idiots      comedy friendship college Aamir Khan
2         3     Dangal       sports drama inspiration Aamir Khan
3         4  Gully Boy              music rap Mumbai youth urban
4         5       ZNMD  friendship travel adventure comedy Spain

Ratings:
   user_id  movie_id  rating
0        1         1       5
1        1         2       4
2        1         3       5
3        2         1       3
4        2         3       5
5        2         4       4
6        3         2       5
7        3         4       5
8        3         5       4


# **Code: Content-Based Component**

In [None]:
# Build TF-IDF vectors for content-based
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(movies['description'])

# Calculate content similarity
content_similarity = cosine_similarity(tfidf_matrix)

print("Content-Based Similarity Matrix:")
print(pd.DataFrame(
    content_similarity,
    index=movies['title'],
    columns=movies['title']
).round(2))


Content-Based Similarity Matrix:
title      DDLJ  3 Idiots  Dangal  Gully Boy  ZNMD
title                                             
DDLJ       1.00      0.11    0.25        0.0  0.00
3 Idiots   0.11      1.00    0.31        0.0  0.34
Dangal     0.25      0.31    1.00        0.0  0.00
Gully Boy  0.00      0.00    0.00        1.0  0.00
ZNMD       0.00      0.34    0.00        0.0  1.00


# **Code: Collaborative Component**

In [None]:
# Build user-item matrix for collaborative filtering
user_item_matrix = ratings.pivot(
    index='user_id',
    columns='movie_id',
    values='rating'
).fillna(0)

print("\nUser-Item Matrix:")
print(user_item_matrix)

# Calculate item similarity based on user ratings
# (which movies are rated similarly by users)
collab_similarity = cosine_similarity(user_item_matrix.T)

print("\nCollaborative Similarity Matrix:")
print(pd.DataFrame(
    collab_similarity,
    index=movies['title'],
    columns=movies['title']
).round(2))



User-Item Matrix:
movie_id    1    2    3    4    5
user_id                          
1         5.0  4.0  5.0  0.0  0.0
2         3.0  0.0  5.0  4.0  0.0
3         0.0  5.0  0.0  5.0  4.0

Collaborative Similarity Matrix:
title      DDLJ  3 Idiots  Dangal  Gully Boy  ZNMD
title                                             
DDLJ       1.00      0.54    0.97       0.32  0.00
3 Idiots   0.54      1.00    0.44       0.61  0.78
Dangal     0.97      0.44    1.00       0.44  0.00
Gully Boy  0.32      0.61    0.44       1.00  0.78
ZNMD       0.00      0.78    0.00       0.78  1.00


# **Code: Weighted Hybrid Combination**

In [None]:
def hybrid_recommendations(movie_title, content_weight=0.6, collab_weight=0.4, top_n=3):
    """
    Combine content and collaborative filtering

    content_weight: How much to trust content similarity (0-1)
    collab_weight: How much to trust collaborative similarity (0-1)
    """
    # Get movie index
    movie_idx = movies[movies['title'] == movie_title].index[0]

    # Get content-based scores
    content_scores = content_similarity[movie_idx]

    # Get collaborative scores
    collab_scores = collab_similarity[movie_idx]

    # HYBRID: Combine with weights
    hybrid_scores = (content_weight * content_scores) + (collab_weight * collab_scores)

    # Create results dataframe
    results = pd.DataFrame({
        'title': movies['title'],
        'content_score': content_scores,
        'collab_score': collab_scores,
        'hybrid_score': hybrid_scores
    })

    # Remove the input movie itself
    results = results[results['title'] != movie_title]

    # Sort by hybrid score
    results = results.sort_values('hybrid_score', ascending=False)

    return results.head(top_n)

# Test: You watched DDLJ
print("\n" + "="*60)
print("YOU WATCHED: DDLJ")
print("="*60)

print("\nWeighted Hybrid Recommendations (60% Content + 40% Collaborative):\n")
recommendations = hybrid_recommendations('DDLJ', content_weight=0.6, collab_weight=0.4)

for idx, row in recommendations.iterrows():
    print(f"{row['title']}")
    print(f"  Content Score: {row['content_score']:.2f}")
    print(f"  Collaborative Score: {row['collab_score']:.2f}")
    print(f"  Final Hybrid Score: {row['hybrid_score']:.2f}")
    print()



YOU WATCHED: DDLJ

Weighted Hybrid Recommendations (60% Content + 40% Collaborative):

Dangal
  Content Score: 0.25
  Collaborative Score: 0.97
  Final Hybrid Score: 0.54

3 Idiots
  Content Score: 0.11
  Collaborative Score: 0.54
  Final Hybrid Score: 0.28

Gully Boy
  Content Score: 0.00
  Collaborative Score: 0.32
  Final Hybrid Score: 0.13



# **Code: Experimenting with Different Weights**

In [None]:
# Compare different weight combinations

print("\n" + "="*60)
print("COMPARING DIFFERENT HYBRID STRATEGIES")
print("="*60)

# Strategy 1: Trust content more (80% content, 20% collaborative)
print("\nStrategy 1: 80% Content + 20% Collaborative (New User)")
recs1 = hybrid_recommendations('DDLJ', content_weight=0.8, collab_weight=0.2, top_n=3)
print(recs1[['title', 'hybrid_score']].to_string(index=False))

# Strategy 2: Balanced (50% content, 50% collaborative)
print("\nStrategy 2: 50% Content + 50% Collaborative (Balanced)")
recs2 = hybrid_recommendations('DDLJ', content_weight=0.5, collab_weight=0.5, top_n=3)
print(recs2[['title', 'hybrid_score']].to_string(index=False))

# Strategy 3: Trust collaborative more (20% content, 80% collaborative)
print("\nStrategy 3: 20% Content + 80% Collaborative (Experienced User)")
recs3 = hybrid_recommendations('DDLJ', content_weight=0.2, collab_weight=0.8, top_n=3)
print(recs3[['title', 'hybrid_score']].to_string(index=False))



COMPARING DIFFERENT HYBRID STRATEGIES

Strategy 1: 80% Content + 20% Collaborative (New User)
    title  hybrid_score
   Dangal      0.395151
 3 Idiots      0.193290
Gully Boy      0.064281

Strategy 2: 50% Content + 50% Collaborative (Balanced)
    title  hybrid_score
   Dangal      0.610773
 3 Idiots      0.321683
Gully Boy      0.160701

Strategy 3: 20% Content + 80% Collaborative (Experienced User)
    title  hybrid_score
   Dangal      0.826395
 3 Idiots      0.450076
Gully Boy      0.257122


# **Real-World Tuning Strategy**

Let me show you how companies actually set these weights.

# **Netflix's approach:**


In [None]:
def adaptive_weights(user_id, movie_id):
    """Calculate weights based on data availability"""

    # How many movies has user rated?
    user_rating_count = count_user_ratings(user_id)

    # How many ratings does movie have?
    movie_rating_count = count_movie_ratings(movie_id)

    # NEW USER (rated < 5 movies)
    if user_rating_count < 5:
        content_weight = 0.8
        collab_weight = 0.2

    # EXPERIENCED USER (rated 5-20 movies)
    elif user_rating_count < 20:
        content_weight = 0.5
        collab_weight = 0.5

    # POWER USER (rated 20+ movies)
    else:
        content_weight = 0.3
        collab_weight = 0.7

    # NEW MOVIE (< 10 ratings)
    if movie_rating_count < 10:
        content_weight += 0.2  # Trust content more
        collab_weight -= 0.2

    return content_weight, collab_weight
