# Simple Content-Based Movie Recommendation System

This notebook implements a simple content-based movie recommendation system using TF-IDF and cosine similarity.

## 1. Import Required Libraries

In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

## 2. Load and Preprocess Data

In [None]:
def load_data(file_path):
    """Load movie data from CSV file."""
    return pd.read_csv(file_path)

def preprocess_data(df):
    """Preprocess the data by combining relevant text columns."""
    df['genres'] = df['genres'].apply(lambda x: ' '.join([i['name'] for i in eval(x)]))
    df['combined_features'] = df['title'] + ' ' + df['genres'] + ' ' + df['overview']
    return df

# Load and preprocess data
df = load_data('tmdb_5000_movies.csv')
df = preprocess_data(df)

print(f"Full dataset shape: {df.shape}")
df.head()

## 3. Sample 500 Movies

In [None]:
# Sample 500 movies randomly
df_sample = df.sample(n=500, random_state=42)
print(f"Sampled dataset shape: {df_sample.shape}")
df_sample.head()

## 4. Create TF-IDF Matrix

In [None]:
def create_tfidf_matrix(df):
    """Create TF-IDF matrix from the combined features."""
    tfidf = TfidfVectorizer(stop_words='english')
    return tfidf.fit_transform(df['combined_features'])

tfidf_matrix = create_tfidf_matrix(df_sample)
print(f"TF-IDF matrix shape: {tfidf_matrix.shape}")

## 5. Implement Recommendation Function

In [None]:
def get_recommendations(user_input, df, tfidf_matrix, top_n=5):
    """Get top N movie recommendations based on user input."""
    user_tfidf = TfidfVectorizer(stop_words='english').fit(df['combined_features'])
    user_vector = user_tfidf.transform([user_input])
    
    cosine_similarities = cosine_similarity(user_vector, tfidf_matrix).flatten()
    related_docs_indices = cosine_similarities.argsort()[:-top_n-1:-1]
    
    return [(df.iloc[idx]['title'], cosine_similarities[idx]) for idx in related_docs_indices]

## 6. Test the Recommendation System

In [None]:
user_input = "I love thrilling action movies set in space, with a comedic twist."
recommendations = get_recommendations(user_input, df_sample, tfidf_matrix)

print("Top 5 movie recommendations:")
for i, (title, score) in enumerate(recommendations, 1):
    print(f"{i}. {title} (Similarity: {score:.2f})")

## 7. Visualize Recommendations

In [None]:
def plot_recommendations(recommendations):
    titles, scores = zip(*recommendations)
    plt.figure(figsize=(10, 6))
    sns.barplot(x=list(scores), y=list(titles))
    plt.title("Top 5 Movie Recommendations")
    plt.xlabel("Similarity Score")
    plt.ylabel("Movie Title")
    plt.show()

plot_recommendations(recommendations)

## 8. Interactive Recommendation System

In [None]:
def interactive_recommendations():
    user_input = input("Describe the kind of movie you're looking for: ")
    recommendations = get_recommendations(user_input, df_sample, tfidf_matrix)
    
    print("\nTop 5 movie recommendations:")
    for i, (title, score) in enumerate(recommendations, 1):
        print(f"{i}. {title} (Similarity: {score:.2f})")
    
    plot_recommendations(recommendations)

interactive_recommendations()