In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from fuzzywuzzy import process
from implicit.als import AlternatingLeastSquares
import scipy.sparse as sp
import pickle  # For saving and loading models

# Load datasets
movies_df = pd.read_csv("movies.csv").head(50000)
rating_df = pd.read_csv("ratings.csv").head(50000)

# Ensure ratings correspond to movies within the selected subset
rating_df = rating_df[rating_df['Movie_id'].isin(movies_df['id'])]


# Step 1: Train Content-Based Filtering Model
movies_df["combined_features"] = movies_df["genres"].fillna('') + " " + \
                                   movies_df["overview"].fillna("") + " " + \
                                   movies_df["keywords"].fillna("")

# Train TF-IDF
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(movies_df["combined_features"])
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Save TF-IDF model
with open("tfidf_model.pkl", "wb") as f:
    pickle.dump((tfidf, cosine_sim), f)

# Step 2: Train Collaborative Filtering Model (ALS)
user_movie_matrix = rating_df.pivot(index="User_id", columns="Movie_id", values="Rating").fillna(0)

# Align movie IDs between ratings and selected movies
user_movie_matrix = user_movie_matrix.loc[:, user_movie_matrix.columns.isin(movies_df["id"])]
sparse_matrix = sp.csr_matrix(user_movie_matrix.values)

# Train ALS Model
als_model = AlternatingLeastSquares(factors=50, regularization=0.1, iterations=20)
als_model.fit(sparse_matrix)

# Save ALS model
with open("als_model.pkl", "wb") as f:
    pickle.dump((als_model, user_movie_matrix.columns), f)

  check_blas_config()


  0%|          | 0/20 [00:00<?, ?it/s]

In [4]:
# Load datasets
movies_df = pd.read_csv("movies.csv").head(50000)
rating_df = pd.read_csv("ratings.csv").head(50000)

In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from fuzzywuzzy import process
from implicit.als import AlternatingLeastSquares
import scipy.sparse as sp
import pickle  # For saving and loading models

In [2]:
# Load trained models
def load_models():
    with open("tfidf_model.pkl", "rb") as f:
        tfidf, cosine_sim = pickle.load(f)
    with open("als_model.pkl", "rb") as f:
        als_model, movie_columns = pickle.load(f)
    return tfidf, cosine_sim, als_model, movie_columns

In [5]:
# Fuzzy Matching Function
def get_closest_match(title):
    best_match = process.extractOne(title, movies_df["title"].dropna())[0]
    return best_match

# Hybrid Recommendation Function
def hybrid_recommendation(movie_title, top_n=5):
    # Load trained models
    tfidf, cosine_sim, als_model, movie_columns = load_models()
    
    # Correct movie title
    correct_movie = get_closest_match(movie_title)
    movie_idx = movies_df[movies_df["title"] == correct_movie].index[0]
    
    # Content-Based Recommendations
    content_scores = list(enumerate(cosine_sim[movie_idx]))
    content_scores = sorted(content_scores, key=lambda x: x[1], reverse=True)[1:top_n+1]
    content_recommendations = [movies_df.iloc[i[0]]["title"] for i in content_scores]
    
    # Collaborative Filtering Recommendations (ALS)
    movie_id = movies_df.iloc[movie_idx]["id"]
    if movie_id in movie_columns:
        als_movie_idx = list(movie_columns).index(movie_id)
#         scores = als_model.item_factors.to_numpy() @ als_model.item_factors.to_numpy()[als_movie_idx]
        scores = als_model.item_factors.dot(als_model.item_factors[als_movie_idx])
        colab_indices = np.argsort(scores)[::-1][1:top_n+1]
        collaborative_recommendations = [movies_df.iloc[i]["title"] for i in colab_indices]
    else:
        collaborative_recommendations = []
    
    # Combine recommendations
    hybrid_recommendations = list(dict.fromkeys(content_recommendations + collaborative_recommendations))[:top_n]
    
    return {
        "Corrected Movie Title": correct_movie,
        "Recommended Movies": hybrid_recommendations
    }

# Example usage:
movie_input = "2001: A Space Odyssey"
recommendations = hybrid_recommendation(movie_input)
print(recommendations)

{'Corrected Movie Title': '2001: A Space Odyssey', 'Recommended Movies': ['2010', 'First Man', 'Conquest of Space', 'Spaceman', 'Journey to Space']}


In [4]:
# Example usage:
movie_input = "Midnight in Paris"
recommendations = hybrid_recommendation(movie_input)
print(recommendations)

{'Corrected Movie Title': 'Midnight in Paris', 'Recommended Movies': ['Playing It Cool', 'Proof of Love', 'Titanic', 'The Science of Sleep', 'Bottle Shock']}


In [5]:
# Example usage:
movie_input = "Deadpool"
recommendations = hybrid_recommendation(movie_input)
print(recommendations)

{'Corrected Movie Title': 'Deadpool', 'Recommended Movies': ['Deadpool 2', 'Deadpool & Wolverine', 'Once Upon a Deadpool', 'Deadpool: No Good Deed', "Gettin' Wet on Wet with Deadpool 2"]}


In [None]:
# Example usage:
movie_input = "Titanic"
recommendations = hybrid_recommendation(movie_input)
print(recommendations)