<a href="https://colab.research.google.com/github/rekha111-ml/movie-recommender/blob/main/GKBLabs.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Movie Recommendation System Based on Collaborative Filtering

In [None]:
import pandas as pd
import json

# Load data
movies = pd.read_csv("/content/sample_data/movies_metadata.csv (1).zip")
ratings = pd.read_csv("/content/sample_data/ratings_small.csv (1).zip")

# Check first few rows
print(movies.head())
print(ratings.head())

# Convert 'id' in movies to numeric, coercing errors
movies['id'] = pd.to_numeric(movies['id'], errors='coerce')

# Drop rows with NaN in 'id' after coercion
movies.dropna(subset=['id'], inplace=True)

# Convert 'id' to int64 to match 'movieId' type
movies['id'] = movies['id'].astype(int)


# Merge on movieId
merged_df = pd.merge(movies, ratings, left_on="id", right_on="movieId")

# Drop unnecessary columns if needed
merged_df = merged_df[['title', 'genres', 'rating']]

# Parse the 'genres' column
def parse_genres(genre_list_str):
    try:
        # Safely evaluate the string representation of the list
        genre_list = json.loads(genre_list_str.replace("'", '"'))
        # Extract genre names
        return [genre['name'] for genre in genre_list]
    except (json.JSONDecodeError, TypeError):
        return []

merged_df['genres'] = merged_df['genres'].apply(parse_genres)

# Remove rows with empty genre lists
merged_df = merged_df[merged_df['genres'].apply(lambda x: len(x) > 0)]

# Explode the genres column
merged_df = merged_df.explode('genres')

print(merged_df.head())

def recommend_movies(selected_genres, top_n=5):
    # Filter by selected genres
    filtered = merged_df[merged_df['genres'].isin(selected_genres)]

    # Group by title and calculate average rating
    recommendations = (filtered.groupby('title')
                              .agg({'rating': 'mean'})
                              .reset_index()
                              .sort_values(by='rating', ascending=False))

    # Display top N
    return recommendations.head(top_n)

def main():
    unique_genres = merged_df['genres'].unique()
    print("Available genres:")
    print(", ".join(unique_genres))

    # Get input from user
    selected = input("\nEnter your preferred genres separated by commas: ").split(',')
    selected = [g.strip() for g in selected]

    print(f"\nTop movies for genres: {', '.join(selected)}\n")
    results = recommend_movies(selected)

    if not results.empty:
        print(results.to_string(index=False))
    else:
        print("No movies found for the selected genres.")

main()

  movies = pd.read_csv("/content/sample_data/movies_metadata.csv (1).zip")


   adult                              belongs_to_collection    budget  \
0  False  {'id': 10194, 'name': 'Toy Story Collection', ...  30000000   
1  False                                                NaN  65000000   
2  False  {'id': 119050, 'name': 'Grumpy Old Men Collect...         0   
3  False                                                NaN  16000000   
4  False  {'id': 96871, 'name': 'Father of the Bride Col...         0   

                                              genres  \
0  [{'id': 16, 'name': 'Animation'}, {'id': 35, '...   
1  [{'id': 12, 'name': 'Adventure'}, {'id': 14, '...   
2  [{'id': 10749, 'name': 'Romance'}, {'id': 35, ...   
3  [{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...   
4                     [{'id': 35, 'name': 'Comedy'}]   

                               homepage     id    imdb_id original_language  \
0  http://toystory.disney.com/toy-story    862  tt0114709                en   
1                                   NaN   8844  tt0113497         