In [None]:
# Import necessary libraries
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer
import matplotlib.pyplot as plt
import seaborn as sns
from wordcloud import WordCloud
import networkx as nx

# Load the dataset
movies_df = pd.read_csv('indian_movies.csv')

# Display the first few rows to understand the structure
movies_df.head()

# Combine relevant features for similarity assessment
movies_df['combined'] = movies_df['Movie Name'] + ' ' + movies_df['Genre']

# Initialize the Count Vectorizer
vectorizer = CountVectorizer(stop_words='english')

# Fit and transform the combined features
combined_matrix = vectorizer.fit_transform(movies_df['combined'])

# Calculate the cosine similarity matrix for the combined features
cosine_similarities = cosine_similarity(combined_matrix)

# Function to get the top N similar movies
def get_similar_movies(movie_title, cosine_sim, movies_df, top_n=10):
    idx = movies_df.index[movies_df['Movie Name'] == movie_title].tolist()[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    
    seen_movies = set()
    recommended_movies = []
    
    for i in sim_scores[1:]:
        movie_index = i[0]
        movie_name = movies_df.iloc[movie_index]['Movie Name']
        
        if movie_name not in seen_movies:
            seen_movies.add(movie_name)
            movie_row = movies_df.iloc[[movie_index]].copy()  # Convert row to DataFrame
            movie_row['similarity_score'] = i[1]
            recommended_movies.append(movie_row)
        
        if len(recommended_movies) == top_n:
            break
    
    return pd.concat(recommended_movies, ignore_index=True)

# Running the code with the updated function should now avoid the AttributeError.
# Retrieve and display top 10 similar movies for each query
queries = ['Dilwale Dulhania Le Jayenge', 'Chennai Express', 'Kabir Singh']
results = {}

for query in queries:
    try:
        results[query] = get_similar_movies(query, cosine_similarities, movies_df)
    except IndexError:
        print(f"Movie '{query}' not found in the dataset.")

# Plotting similarity scores for each query result
for query, similar_movies in results.items():
    plt.figure(figsize=(10, 6))
    sns.barplot(
        y=similar_movies['Movie Name'],
        x=similar_movies['similarity_score'],
        palette='viridis'
    )
    plt.xlabel("Similarity Score")
    plt.ylabel("Movie Name")
    plt.title(f"Top 10 Movies Similar to '{query}'")
    plt.show()

    # Generate word cloud for genres in similar movies
    genre_text = ' '.join(similar_movies['Genre'])
    wordcloud = WordCloud(width=800, height=400, background_color='white').generate(genre_text)
    plt.figure(figsize=(10, 6))
    plt.imshow(wordcloud, interpolation='bilinear')
    plt.axis('off')
    plt.title(f"Most Common Genres in Movies Similar to '{query}'")
    plt.show()

    # Genre Distribution Pie Chart for recommendations
    genre_counts = similar_movies['Genre'].value_counts()
    plt.figure(figsize=(8, 8))
    plt.pie(genre_counts, labels=genre_counts.index, autopct='%1.1f%%', startangle=140, colors=sns.color_palette("Set3"))
    plt.title(f"Genre Distribution in Recommendations for '{query}'")
    plt.show()

# Heatmap of Cosine Similarity Scores (first 30 movies for clarity)
plt.figure(figsize=(12, 10))
sns.heatmap(cosine_similarities[:30, :30], cmap="coolwarm", xticklabels=movies_df['Movie Name'][:30], 
            yticklabels=movies_df['Movie Name'][:30], cbar=True)
plt.xticks(rotation=90)
plt.title("Cosine Similarity Heatmap for First 30 Movies")
plt.show()

# Network Graph of Similar Movies
for query in queries:
    similar_movies = results[query]
    G = nx.Graph()
    
    # Add nodes and edges for the network graph
    G.add_node(query)
    for _, row in similar_movies.iterrows():
        G.add_node(row['Movie Name'])
        G.add_edge(query, row['Movie Name'], weight=row['similarity_score'])
    
    plt.figure(figsize=(10, 8))
    pos = nx.spring_layout(G, k=0.15, iterations=20)
    nx.draw_networkx_nodes(G, pos, node_size=500, node_color='skyblue')
    nx.draw_networkx_edges(G, pos, width=1.0, alpha=0.7)
    nx.draw_networkx_labels(G, pos, font_size=10)
    plt.title(f"Network Graph of Movies Similar to '{query}'")
    plt.axis('off')
    plt.show()
