<a href="https://colab.research.google.com/github/khushisharmacs28-cloud/DeepLearning_2025-26_KhushiSharma/blob/main/Cprogramming.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Import necessary libraries
import pandas as pd
import ast
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Step 1: Load Datasets
def load_datasets():
    """
    Load movie and credit datasets from CSV files.
    """
    print("Loading datasets...")
    try:
        movies_df = pd.read_csv('/movies.csv')  # Ensure the file is in the same directory or give full path
        credits_df = pd.read_csv('/credits.csv')
        print("Datasets loaded successfully!")
    except FileNotFoundError as e:
        print("Error: File not found. Ensure you have the correct file paths.")
        raise e
    return movies_df, credits_df

# Step 2: Parse JSON-like Columns
def parse_json_column(data):
    """
    Parse JSON-like data (e.g., genres, cast, crew) into Python lists.
    """
    try:
        parsed = ast.literal_eval(data)
        return [item['name'] for item in parsed if isinstance(item, dict)]
    except (ValueError, SyntaxError):
        return []

def preprocess_data(movies_df, credits_df):
    """
    Merge datasets, preprocess columns, and create a combined 'features' column for recommendations.
    """
    print("Preprocessing data...")
    movies = movies_df[['id', 'title', 'overview', 'genres', 'vote_average']]
    credits = credits_df[['movie_id', 'cast', 'crew']]

    # Merge datasets
    movies = movies.merge(credits, left_on='id', right_on='movie_id', how='left')

    # Fill missing values and parse
    movies['genres'] = movies['genres'].fillna('[]').apply(parse_json_column)
    movies['cast'] = movies['cast'].fillna('[]').apply(parse_json_column)
    movies['crew'] = movies['crew'].fillna('[]').apply(parse_json_column)

    # Combine important features into a single string
    def create_feature(row):
        genres = " ".join(row['genres'])
        cast = " ".join(row['cast'][:3])  # top 3 cast members
        crew = " ".join(row['crew'][:3])  # top 3 crew members
        return f"{genres} {cast} {crew}"

    movies['features'] = movies.apply(create_feature, axis=1)

    print("Data preprocessing completed!")
    return movies

# Step 3: Compute TF-IDF Matrix
def compute_tfidf_matrix(movies):
    """
    Compute the TF-IDF matrix from the 'features' column.
    """
    print("Computing TF-IDF matrix...")
    tfidf = TfidfVectorizer(stop_words='english')
    tfidf_matrix = tfidf.fit_transform(movies['features'])
    print("TF-IDF matrix computed successfully!")
    return tfidf_matrix

# Step 4: Compute Similarity Scores
def compute_cosine_similarity(tfidf_matrix):
    """
    Compute cosine similarity based on the TF-IDF matrix.
    """
    print("Computing cosine similarity matrix...")
    cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
    print("Cosine similarity computed successfully!")
    return cosine_sim

# Step 5: Filter Movies by Genres
def filter_movies_by_genres(movies, selected_genres):
    """
    Filter movies based on selected genres.
    """
    print(f"Filtering movies for genres: {', '.join(selected_genres)}...")
    selected_genres = [genre.lower() for genre in selected_genres]
    filtered_movies = movies[movies['genres'].apply(
        lambda genres: any(genre in [g.lower() for g in genres] for genre in selected_genres))]
    print(f"{len(filtered_movies)} movies found for the selected genres.")
    return filtered_movies

# Step 6: Suggest Movies Based on Ratings
def suggest_movies_by_genres(movies, num_suggestions=10):
    """
    Suggest top-rated movies from the filtered dataset.
    """
    print(f"Fetching top {num_suggestions} movies...")
    top_movies = movies.sort_values(by='vote_average', ascending=False)
    suggestions = [
        f"{row['title']} ({row['vote_average']})"
        for _, row in top_movies.head(num_suggestions).iterrows()
    ]
    return suggestions

# Step 7: User Interaction for Genre Selection
def user_select_genres():
    """
    Prompt the user to select movie genres for recommendations.
    """
    print("Select genres you like. You can choose multiple genres.")
    print("Options: Action, Comedy, Drama, Horror, Romance, Sci-Fi, Thriller")
    selected_genres = input("Enter your preferred genres separated by commas: ").strip().split(',')
    return [genre.strip() for genre in selected_genres if genre.strip()]

# Main Function to Execute Recommendation System
def main():
    """
    Execute the movie recommendation system step-by-step.
    """
    print("üé¨ Welcome to the Movie Recommendation System! üé¨")

    # Step 1: Load datasets
    movies_df, credits_df = load_datasets()

    # Step 2: Preprocess data
    movies = preprocess_data(movies_df, credits_df)

    # Step 3: Compute matrices
    tfidf_matrix = compute_tfidf_matrix(movies)
    cosine_sim = compute_cosine_similarity(tfidf_matrix)

    # Step 4: Get user-selected genres
    selected_genres = user_select_genres()

    # Step 5: Filter movies based on genres
    filtered_movies = filter_movies_by_genres(movies, selected_genres)
    if filtered_movies.empty:
        print(f"No movies found for the selected genres: {', '.join(selected_genres)}")
        return

    # Step 6: Recommend movies
    recommendations = suggest_movies_by_genres(filtered_movies, num_suggestions=10)
    print("\nüçø Recommended Movies:")
    print("\n".join(recommendations))

# Run the program
if __name__ == "__main__":
    main()


üé¨ Welcome to the Movie Recommendation System! üé¨
Loading datasets...
Datasets loaded successfully!
Preprocessing data...
Data preprocessing completed!
Computing TF-IDF matrix...
TF-IDF matrix computed successfully!
Computing cosine similarity matrix...
Cosine similarity computed successfully!
Select genres you like. You can choose multiple genres.
Options: Action, Comedy, Drama, Horror, Romance, Sci-Fi, Thriller
Enter your preferred genres separated by commas: action,drama
Filtering movies for genres: action, drama...
3112 movies found for the selected genres.
Fetching top 10 movies...

üçø Recommended Movies:
Dancer, Texas Pop. 81 (10.0)
Me You and Five Bucks (10.0)
One Man's Hero (9.3)
There Goes My Baby (8.5)
The Shawshank Redemption (8.5)
The Godfather (8.4)
The Prisoner of Zenda (8.4)
Whiplash (8.3)
Schindler's List (8.3)
The Godfather: Part II (8.3)
