# Import Libraries

In [2]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity


# 📁 Step 2: Load the Dataset

In [4]:
movies = pd.read_csv("movies_sample.csv")
print("✅ Dataset loaded successfully!")


✅ Dataset loaded successfully!


# 🔍 Step 3: Preview the Dataset

In [5]:

print(movies.head())

# Optional: check columns
print("\nColumns in dataset:", movies.columns.tolist())


   movieId          title                   genres
0        1     The Matrix            Action|Sci-Fi
1        2      Inception  Action|Adventure|Sci-Fi
2        3        Titanic            Drama|Romance
3        4  The Godfather              Crime|Drama
4        5   Interstellar   Adventure|Drama|Sci-Fi

Columns in dataset: ['movieId', 'title', 'genres']


# 🧹 Step 4: Preprocess the Data

In [6]:

if 'genres' not in movies.columns or 'title' not in movies.columns:
    raise ValueError("❌ The dataset must contain 'title' and 'genres' columns.")

# Replace '|' with spaces for CountVectorizer compatibility
movies['genres'] = movies['genres'].astype(str).str.replace('|', ' ', regex=False)


# 🔠 Step 5: Convert Genres to Vectors

In [7]:

vectorizer = CountVectorizer()
genre_matrix = vectorizer.fit_transform(movies['genres'])


# 🔗 Step 6: Compute Cosine Similarity

In [8]:

cosine_sim = cosine_similarity(genre_matrix, genre_matrix)


# 🗂 Step 7: Map Movie Titles to Indices


In [9]:

movie_indices = pd.Series(movies.index, index=movies['title']).drop_duplicates()


# 💡 Step 8: Define Recommendation Function



In [10]:
def recommend_movies(title, num=5):
    idx = movie_indices.get(title)
    if idx is None:
        print(f"⚠️ Movie '{title}' not found in the dataset.")
        return []
    
    # Get similarity scores
    sim_scores = list(enumerate(cosine_sim[idx]))
    
    # Sort by similarity and get top matches
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:num+1]
    
    # Get movie indices and titles
    movie_indices_sim = [i[0] for i in sim_scores]
    return movies['title'].iloc[movie_indices_sim].tolist()


# 🧪 Step 9: Try it Out with User Input

In [11]:

print("\nSample movies available:")
print(movies['title'].head(10).to_string(index=False))

# Get user input
selected_movie = input("\nEnter a movie title: ")
top_n = int(input("Enter number of recommendations: "))

# Show results
print("\n📽 Recommended Movies:")
recommendations = recommend_movies(selected_movie, top_n)
for i, movie in enumerate(recommendations, 1):
    print(f"{i}. {movie}")



Sample movies available:
       The Matrix
        Inception
          Titanic
    The Godfather
     Interstellar
  The Dark Knight
Avengers: Endgame
     Forrest Gump
        Gladiator
    The Lion King



Enter a movie title:  The Matrix
Enter number of recommendations:  3



📽 Recommended Movies:
1. Inception
2. Avengers: Endgame
3. Interstellar
