In [2]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load data
df = pd.read_csv("cleaned_hotstar_data.csv")

# Fill missing descriptions if needed
df['description'] = df['description'].fillna('')

# Create TF-IDF matrix
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['description'])

# Compute cosine similarity
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Recommendation function
def recommend(title, df=df, sim_matrix=cosine_sim):
    idx = df[df['title'].str.lower() == title.lower()].index
    if idx.empty:
        return f"No title named '{title}' found."
    idx = idx[0]
    sim_scores = list(enumerate(sim_matrix[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:6]
    rec_indices = [i[0] for i in sim_scores]
    return df['title'].iloc[rec_indices]

# Example
print(recommend("Ernest Saves Christmas"))

49                   The Search for Santa Paws
1393                            Twas the Night
45      Prep & Landing: Operation Secret Santa
1330                          The Santa Clause
622                          My Dog, the Thief
Name: title, dtype: object


In [4]:
import pandas as pd
import numpy as np
import re
import string
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# ------------------- Load Dataset -------------------
df = pd.read_csv("cleaned_hotstar_data.csv")

# ------------------- Check and Fill Required Columns -------------------
# We'll use: title, description, cast, director, listed_in (genre equivalent), language info not available

df['description'] = df['description'].fillna('')
df['cast'] = df['cast'].fillna('')
df['director'] = df['director'].fillna('')
df['listed_in'] = df['listed_in'].fillna('')
df['title'] = df['title'].fillna('')

# ------------------- Combine Features -------------------
def clean_text(text):
    text = text.lower()
    text = re.sub(r'\s+', ' ', text)
    text = text.translate(str.maketrans('', '', string.punctuation))
    return text.strip()

def combine_features(row):
    return clean_text(" ".join([
        row['title'],
        row['description'],
        row['cast'],
        row['director'],
        row['listed_in']
    ]))

df['combined_features'] = df.apply(combine_features, axis=1)

# ------------------- Vectorization -------------------
tfidf = TfidfVectorizer(stop_words='english', max_df=0.85, min_df=2)
tfidf_matrix = tfidf.fit_transform(df['combined_features'])

# ------------------- Cosine Similarity -------------------
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# ------------------- Recommendation Function -------------------
def get_recommendations(title, top_n=5):
    matches = df[df['title'].str.lower() == title.lower()]
    
    if matches.empty:
        return f"❌ Title '{title}' not found in dataset."
    
    idx = matches.index[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:top_n+1]

    recommended_indices = [i[0] for i in sim_scores]
    results = df.iloc[recommended_indices][['title', 'listed_in', 'cast', 'director']]
    results['similarity_score'] = [i[1] for i in sim_scores]

    return results

# ------------------- Example -------------------
# Change "Avengers" to any valid title from your dataset
print(get_recommendations("Ernest Saves Christmas", top_n=5))


                                        title  \
49                  The Search for Santa Paws   
1177             Santa Paws 2: The Santa Pups   
1393                           Twas the Night   
459   Surviving the Mount St. Helens Disaster   
1377                                Toy Story   

                               listed_in  \
49                Drama, Family, Fantasy   
1177                    Fantasy, Musical   
1393              Comedy, Crime, Fantasy   
459        Animals & Nature, Documentary   
1377  Action-Adventure, Animation, Buddy   

                                                   cast       director  \
49    Resse Alexander, Diedrich Bader, Sahar Biniaz,...   Robert Vince   
1177  Cheryl Ladd, George Newbern, Pat Finn, Danny W...   Robert Vince   
1393  Bryan Cranston, Josh Zuckerman, Jefferson Mapp...    Nick Castle   
459                                         Bill Ratner  Sophie Harris   
1377  Tom Hanks, Tim Allen, Don Rickles, Jim Varney,...  John Lasseter