In [29]:
# Import necessary libraries
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer

In [30]:
# Load the dataset (assuming 'movies.csv' contains the relevant movie information)
movies_df = pd.read_csv('Data/TMDB_movie_dataset_v11.csv')

# Create combined features for content-based filtering using title and genres
movies_df['combined_features'] = movies_df['title'] + " " + movies_df['genres']

In [31]:
# Handle NaN values by filling them with appropriate defaults
movies_df['title'] = movies_df['title'].fillna('')
movies_df['genres'] = movies_df['genres'].fillna('')
movies_df['release_date'] = movies_df['release_date'].fillna('')  # assuming 'release_date' is a string
movies_df['popularity'] = movies_df['popularity'].fillna(0)  # assuming popularity is numerical
movies_df['vote_average'] = movies_df['vote_average'].fillna(0)  # assuming vote average is numerical

In [32]:
# Convert 'release_date' to datetime and extract the year
movies_df['release_date'] = pd.to_datetime(movies_df['release_date'], errors='coerce')  # Converts invalid dates to NaT
movies_df['release_year'] = movies_df['release_date'].dt.year.fillna(0).astype(int)  # Extract year from date and handle NaN


In [33]:
# Function to recommend movies based on the title (using cosine similarity)
def recommend_by_title(movie_title, df, top_n=10):
    # Vectorize the combined features to create a count matrix
    count_matrix = CountVectorizer().fit_transform(df['combined_features'])
    cosine_sim = cosine_similarity(count_matrix)
    
    # Check if the movie exists in the dataset
    if movie_title not in df['title'].values:
        return f"Movie '{movie_title}' not found in the dataset."

    movie_index = df[df['title'] == movie_title].index[0]
    similar_movies = list(enumerate(cosine_sim[movie_index]))
    sorted_similar_movies = sorted(similar_movies, key=lambda x: x[1], reverse=True)[1:top_n+1]
    
    recommendations = []
    for movie in sorted_similar_movies:
        recommendations.append(df['title'].iloc[movie[0]])
    return recommendations

In [34]:
# Filtering by release date
def filter_by_release_date(df, start_year=None, end_year=None):
    if start_year and end_year:
        filtered_df = df[(df['release_year'] >= start_year) & (df['release_year'] <= end_year)]
    elif start_year:
        filtered_df = df[df['release_year'] >= start_year]
    elif end_year:
        filtered_df = df[df['release_year'] <= end_year]
    else:
        filtered_df = df
    return filtered_df

In [35]:
# Filtering by popularity
def filter_by_popularity(df, min_popularity=None):
    if min_popularity:
        return df[df['popularity'] >= min_popularity]
    return df


In [36]:

# Filtering by genre
def filter_by_genre(df, genres):
    # Filter rows where any of the selected genres match
    return df[df['genres'].apply(lambda x: any(genre in x for genre in genres))]

In [37]:


# Filtering by vote average
def filter_by_vote_average(df, min_vote=7.0):
    return df[df['vote_average'] >= min_vote]


In [38]:
# --- Individual Recommendation Functions ---

def recommend_by_release_date(year, df, n_recommendations=5):
    filtered_movies = df[df['release_year'] == year]
    return filtered_movies.nlargest(n_recommendations, 'popularity')['title'].tolist()

def recommend_by_popularity(min_popularity, df, n_recommendations=5):
    filtered_movies = df[df['popularity'] >= min_popularity]
    return filtered_movies.nlargest(n_recommendations, 'popularity')['title'].tolist()

def recommend_by_genre(genres, df, n_recommendations=5):
    filtered_movies = filter_by_genre(df, genres)
    return filtered_movies.nlargest(n_recommendations, 'popularity')['title'].tolist()

def recommend_by_vote_average(min_vote, df, n_recommendations=5):
    filtered_movies = filter_by_vote_average(df, min_vote)
    return filtered_movies.nlargest(n_recommendations, 'popularity')['title'].tolist()

def recommend_by_title(movie_title, df, n_recommendations=5):
    # Check if the movie title exists in the dataframe
    if movie_title in df['title'].values:
        return [movie_title]  # Return the title as a single-item list
    else:
        return f"Movie '{movie_title}' not found in the dataset."


In [39]:


# --- Unified Function to Handle Different Recommendation Features ---
def recommend_movies(feature, value, df, n_recommendations=5):
    if feature == 'release_date':
        return recommend_by_release_date(value, df, n_recommendations)
    elif feature == 'popularity':
        return recommend_by_popularity(value, df, n_recommendations)
    elif feature == 'genre':
        return recommend_by_genre(value, df, n_recommendations)
    elif feature == 'vote_average':
        return recommend_by_vote_average(value, df, n_recommendations)
    elif feature == 'title':
        return recommend_by_title(value, df, n_recommendations)
    else:
        return "Invalid feature specified."


In [40]:
# --- User Input for Recommendations ---
def get_user_input():
    print("Available features for filtering: release_date, popularity, genre, vote_average, title")
    feature = input("Please enter the filter feature you want to use: ").strip().lower()
    
    if feature == 'release_date':
        value = int(input("Please enter the year for release date: "))  # Assume year is an integer
    elif feature == 'popularity':
        value = float(input("Please enter the minimum popularity: "))
    elif feature == 'vote_average':
        value = float(input("Please enter the minimum vote average: "))
    elif feature == 'genre':
        value = input("Please enter the genre: ").strip()  # Single genre input
    elif feature == 'title':
        value = input("Please enter the movie title: ").strip()
    else:
        return None, "Invalid feature specified."
    
    return feature, value


In [41]:

# Main Execution
if __name__ == "__main__":
    feature, value = get_user_input()
    
    if value is None:
        print(feature)  # Display the error message for invalid feature
    else:
        n_recommendations = 5  # Number of recommendations
        final_recommendations = recommend_movies(feature, value, movies_df, n_recommendations)
        print("\nRecommendations:")
        print(final_recommendations)  # This will only print the titles

Available features for filtering: release_date, popularity, genre, vote_average, title


Please enter the filter feature you want to use:  genre
Please enter the genre:  Action



Recommendations:
['Blue Beetle', 'Gran Turismo', 'The Nun II', 'Meg 2: The Trench', 'Retribution']


In [42]:
import pickle

In [44]:
pickle.dump(movies_df,open('movies_list.pkl','wb'))

ERROR: Could not find a version that satisfies the requirement pickel (from versions: none)
ERROR: No matching distribution found for pickel
ERROR: Could not find a version that satisfies the requirement pickle (from versions: none)
ERROR: No matching distribution found for pickle
