In [None]:
# Import necessary libraries
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer

# Load the dataset
movies_df = pd.read_csv('indian_movies.csv')

# Display the first few rows to understand the structure
movies_df.head()

# Combine relevant features for similarity assessment
# Here we can use both 'Movie Name' and 'Genre' as a combined feature
movies_df['combined'] = movies_df['Movie Name'] + ' ' + movies_df['Genre']

# Initialize the Count Vectorizer
vectorizer = CountVectorizer(stop_words='english')

# Fit and transform the combined features
combined_matrix = vectorizer.fit_transform(movies_df['combined'])

# Calculate the cosine similarity matrix for the combined features
cosine_similarities = cosine_similarity(combined_matrix)

# Function to get the top N similar movies
def get_similar_movies(movie_title, cosine_sim, movies_df, top_n=10):
    # Get the index of the movie that matches the title
    idx = movies_df.index[movies_df['Movie Name'] == movie_title].tolist()[0]
    
    # Get the pairwise similarity scores of all movies with that movie
    sim_scores = list(enumerate(cosine_sim[idx]))
    
    # Sort the movies based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    
    # Filter out duplicates and get the top N movies
    seen_movies = set()
    recommended_movies = []
    
    for i in sim_scores[1:]:  # Skip the first one since it's the movie itself
        movie_index = i[0]
        movie_name = movies_df.iloc[movie_index]['Movie Name']
        
        if movie_name not in seen_movies:
            seen_movies.add(movie_name)
            recommended_movies.append(movies_df.iloc[movie_index])
        
        if len(recommended_movies) == top_n:
            break
    
    return pd.DataFrame(recommended_movies)

# Retrieve and display top 10 similar movies for each query
queries = ['Dilwale Dulhania Le Jayenge', 'Chennai Express', 'Kabir Singh']  # Use movie names that exist in your dataset
results = {}

for query in queries:
    try:
        results[query] = get_similar_movies(query, cosine_similarities, movies_df)
    except IndexError:
        print(f"Movie '{query}' not found in the dataset.")

# Display results
for query, similar_movies in results.items():
    print(f"\nTop 10 movies similar to '{query}':")
    print(similar_movies[['Movie Name', 'Genre']])  # Display titles and genres
