<a href="https://colab.research.google.com/github/naikpooja202/YBI-Internship-project/blob/main/Movie_Recommandation_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load dataset
def load_data(file_path):
    try:
        data = pd.read_csv(file_path)
        print(f"Dataset loaded successfully. Shape: {data.shape}")
        return data
    except Exception as e:
        print(f"Error loading data: {e}")
        return None

# Preprocess and clean data
def preprocess_data(data):
    # Drop rows with missing values in required columns
    data = data.dropna(subset=['Title', 'Genre', 'Description'])
    # Combine features into a single string
    data['Combined'] = data['Genre'] + " " + data['Description']
    return data

# Build recommendation system
def recommend_movies(title, data, top_n=5):
    # Vectorize text data using TF-IDF
    tfidf = TfidfVectorizer(stop_words='english')
    tfidf_matrix = tfidf.fit_transform(data['Combined'])

    # Compute cosine similarity
    cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

    # Get the index of the movie that matches the title
    indices = pd.Series(data.index, index=data['Title']).drop_duplicates()

    if title not in indices:
        print(f"Movie '{title}' not found in the dataset.")
        return []

    idx = indices[title]

    # Get pairwise similarity scores for all movies
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort movies by similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get indices of top similar movies
    sim_indices = [i[0] for i in sim_scores[1:top_n+1]]

    return data['Title'].iloc[sim_indices]

# Main execution
if __name__ == "__main__":
    # Path to your dataset
    file_path = "movies.csv"  # Replace with your dataset path

    # Load and preprocess data
    movie_data = load_data(file_path)
    if movie_data is not None:
        movie_data = preprocess_data(movie_data)

        # Ask user for a movie title
        movie_title = input("Enter a movie title: ")

        # Get recommendations
        recommendations = recommend_movies(movie_title, movie_data)
        if recommendations:
            print(f"\nMovies similar to '{movie_title}':")
            for i, movie in enumerate(recommendations, 1):
                print(f"{i}. {movie}")


Error loading data: [Errno 2] No such file or directory: 'movies.csv'
