In [None]:
# ✅ Fix NumPy compatibility
!pip install numpy==1.24.4 --quiet
import os
os.kill(os.getpid(), 9)  # Restarts runtime


In [2]:
# 📦 Reinstall necessary libraries
!pip install scikit-surprise --quiet
!pip install scikit-learn --quiet

# ✅ Imports
import pandas as pd
from surprise import SVD, Dataset, Reader
from surprise.model_selection import train_test_split
from surprise import accuracy
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from google.colab import files

# 📤 Upload ratings.csv and movies.csv
print("📤 Upload your files (ratings.csv and movies.csv)")
uploaded = files.upload()

# ✅ Load datasets
ratings = pd.read_csv("/content/ratings.csv")
movies = pd.read_csv("/content/movies.csv")

# 🧹 Preprocess
ratings['userId'] = ratings['userId'].astype(int)
ratings['movieId'] = ratings['movieId'].astype(int)
movies['movieId'] = movies['movieId'].astype(int)
movies.dropna(subset=['title'], inplace=True)
movies['genres'] = movies['genres'].fillna('')

# 📊 Build collaborative filtering model
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)
model = SVD()
model.fit(trainset)
print("✅ Trained collaborative filtering model.")

# 🧪 Evaluate
predictions = model.test(testset)
print("✅ RMSE:", accuracy.rmse(predictions))

# 🎯 Collaborative recommendations
def get_collab_recommendations(user_id, n=5):
    movie_ids = movies['movieId'].unique()
    rated = ratings[ratings['userId'] == user_id]['movieId'].tolist()
    to_predict = [m for m in movie_ids if m not in rated]
    preds = [model.predict(user_id, m) for m in to_predict]
    preds.sort(key=lambda x: x.est, reverse=True)
    top_n = [int(p.iid) for p in preds[:n]]
    return movies[movies['movieId'].isin(top_n)][['title', 'genres']]

# 🎯 Content-based recommendations
def get_content_recommendations(fav_movie_title, n=5):
    tfidf = TfidfVectorizer(stop_words='english')
    tfidf_matrix = tfidf.fit_transform(movies['genres'])
    cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

    indices = pd.Series(movies.index, index=movies['title']).drop_duplicates()
    idx = indices.get(fav_movie_title)

    if idx is None:
        return ["Movie not found in the dataset."]

    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:n+1]

    movie_indices = [i[0] for i in sim_scores]
    return movies.iloc[movie_indices][['title', 'genres']]

# ✅ Combine both
def get_combined_recommendations(user_id, fav_movie_title, top_n=5):
    print(f"\n🎬 Top {top_n} Collaborative Filtering Recommendations for User {user_id}:")
    collab = get_collab_recommendations(user_id, top_n)
    print(collab.to_string(index=False))

    print(f"\n🎬 Top {top_n} Content-Based Recommendations similar to '{fav_movie_title}':")
    content = get_content_recommendations(fav_movie_title, top_n)
    print(content.to_string(index=False))

# 🧪 Try it out
get_combined_recommendations(user_id=1, fav_movie_title="Toy Story (1995)", top_n=5)


📤 Upload your files (ratings.csv and movies.csv)


Saving movies.csv to movies (1).csv
Saving ratings.csv to ratings (1).csv
✅ Trained collaborative filtering model.
RMSE: 0.9381
✅ RMSE: 0.9380872882880715

🎬 Top 5 Collaborative Filtering Recommendations for User 1:
                                                                      title  genres
                                                   L.A. Confidential (1997) Unknown
Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb (1963) Unknown
                                                         Rear Window (1954) Unknown
                                                           Chinatown (1974) Unknown
                                           Manchurian Candidate, The (1962) Unknown

🎬 Top 5 Content-Based Recommendations similar to 'Toy Story (1995)':
                                               title  genres
                                    GoldenEye (1995) Unknown
                                   Four Rooms (1995) Unknown
                           