<a href="https://colab.research.google.com/github/rakeshxp2007/Machine-Learning/blob/main/content_based_filtering.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Code: Setting Up Data**

In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Our movie database
movies_data = {
    'title': [
        'Dilwale Dulhania Le Jayenge',
        'Kabhi Khushi Kabhie Gham',
        'Gully Boy',
        'Zindagi Na Milegi Dobara',
        'Dangal',
        '3 Idiots'
    ],
    'description': [
        'romantic drama about love marriage family tradition Shah Rukh Khan Kajol Europe India songs',
        'family drama emotional relationships Shah Rukh Khan Kajol songs joint family tradition',
        'rap music drama Mumbai street life struggle dreams youth Ranveer Singh hip hop',
        'friendship adventure Spain travel self-discovery fun comedy Hrithik Roshan road trip',
        'sports drama wrestling father daughter Aamir Khan rural India inspiration true story',
        'college comedy friendship engineering life lessons Aamir Khan humor education'
    ]
}

movies_df = pd.DataFrame(movies_data)


# **Code: Convert Text to Numbers (TF-IDF)**

In [None]:
# Create TF-IDF vectors from descriptions
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(movies_df['description'])

print(f"\nTF-IDF Matrix Shape: {tfidf_matrix.shape}")
print("Each movie is now represented by numbers!")

# Show the important words extracted
feature_names = tfidf.get_feature_names_out()
print(f"\nImportant words found: {len(feature_names)}")



TF-IDF Matrix Shape: (6, 54)
Each movie is now represented by numbers!

Important words found: 54


# **Code: Calculate Similarities**

In [None]:
# Calculate similarity between ALL movies
similarity_matrix = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Convert to DataFrame for nice display
similarity_df = pd.DataFrame(
    similarity_matrix,
    index=movies_df['title'],
    columns=movies_df['title']
)

print("\nSimilarity Matrix:")
print(similarity_df.round(2))


Similarity Matrix:
title                        Dilwale Dulhania Le Jayenge  \
title                                                      
Dilwale Dulhania Le Jayenge                         1.00   
Kabhi Khushi Kabhie Gham                            0.56   
Gully Boy                                           0.03   
Zindagi Na Milegi Dobara                            0.00   
Dangal                                              0.14   
3 Idiots                                            0.04   

title                        Kabhi Khushi Kabhie Gham  Gully Boy  \
title                                                              
Dilwale Dulhania Le Jayenge                      0.56       0.03   
Kabhi Khushi Kabhie Gham                         1.00       0.03   
Gully Boy                                        0.03       1.00   
Zindagi Na Milegi Dobara                         0.00       0.00   
Dangal                                           0.07       0.03   
3 Idiots               

# **Code: Make Recommendations**

In [None]:
def get_recommendations(movie_title, top_n=3):
    """Get top N similar movies"""
    # Get similarity scores for this movie
    movie_similarities = similarity_df[movie_title]

    # Sort by similarity (highest first)
    similar_movies = movie_similarities.sort_values(ascending=False)

    # Remove the movie itself
    similar_movies = similar_movies[1:]

    # Get top N
    recommendations = similar_movies.head(top_n)

    return recommendations

# Test: You watched DDLJ
print("\nYOU WATCHED: Dilwale Dulhania Le Jayenge")
print("\nTop 3 Recommendations:\n")

recommendations = get_recommendations('Dilwale Dulhania Le Jayenge', top_n=3)

for i, (movie, score) in enumerate(recommendations.items(), 1):
    percentage = score * 100
    stars = "★" * int(score * 5)
    print(f"{i}. {movie}")
    print(f"   Similarity: {percentage:.1f}% {stars}")



YOU WATCHED: Dilwale Dulhania Le Jayenge

Top 3 Recommendations:

1. Kabhi Khushi Kabhie Gham
   Similarity: 56.5% ★★
2. Dangal
   Similarity: 14.2% 
3. 3 Idiots
   Similarity: 4.0% 
