In [3]:
# Step 1: Download dataset
!wget -q https://raw.githubusercontent.com/khushbu93-birhade/new_code_start/refs/heads/main/bollywood_full.csv

import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Step 2: Load the CSV
df = pd.read_csv('bollywood_full.csv')

# Step 3: Check the column names
print("Columns:", df.columns.tolist()[:10])
# Likely output includes: ['title_x','imdb_id','poster_path','wiki_link','title_y','original_title',...,'genres','story','summary',...]

# Step 4: Use the content columns safely
df['title'] = df['original_title'].fillna(df['title_y'].fillna(df['title_x']))
df['plot'] = df['story'].fillna(df['summary'].fillna(''))
df = df[['title', 'genres', 'plot']].dropna(subset=['genres','plot']).reset_index(drop=True)

# Step 5: Prepare combined text for recommendation
df['combined'] = df['genres'].astype(str) + ' ' + df['plot']

# Step 6: TF-IDF vectorization
tfidf = TfidfVectorizer(stop_words='english', max_features=5000)
tfidf_matrix = tfidf.fit_transform(df['combined'])

# Step 7: Build cosine similarity matrix
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Step 8: Recommendation function
def recommend(movie_title, top_n=5):
    if movie_title not in df['title'].values:
        return f"Movie '{movie_title}' not found in dataset."
    idx = df.index[df['title'] == movie_title][0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:top_n+1]
    titles = [df.iloc[i[0]]['title'] for i in sim_scores]
    return titles

# Step 9: Interactive test
movie = input("Enter Bollywood movie title: ")
result = recommend(movie, top_n=5)
print("\nTop recommendations:\n", result)


Columns: ['title_x', 'imdb_id', 'poster_path', 'wiki_link', 'title_y', 'original_title', 'is_adult', 'year_of_release', 'runtime', 'genres']
Enter Bollywood movie title: Hichki

Top recommendations:
 ['Rough Book', 'Itihaas', 'Notebook', 'Dushman', 'Kal Ho Naa Ho']


Now, modify the cell below to run the `movir_recommender.py` file with streamlit.