In [4]:
# Import necessary libraries
import pandas as pd
from sentence_transformers import SentenceTransformer, util  # Use Sentence Transformers for better semantic matching

# Load the dataset (assuming 'movies.csv' contains the relevant movie information)
import csv
movies_df = pd.read_csv('/content/TMDB_movie_dataset_v11.csv')



# Handle NaN values by filling them with appropriate defaults
movies_df['title'] = movies_df['title'].fillna('')
movies_df['genres'] = movies_df['genres'].fillna('')
movies_df['release_date'] = movies_df['release_date'].fillna('')  # assuming 'release_date' is a string
movies_df['popularity'] = movies_df['popularity'].fillna(0)  # assuming popularity is numerical
movies_df['vote_average'] = movies_df['vote_average'].fillna(0)  # assuming vote average is numerical

# Convert 'release_date' to datetime and extract the year
movies_df['release_date'] = pd.to_datetime(movies_df['release_date'], errors='coerce')  # Converts invalid dates to NaT
movies_df['release_year'] = movies_df['release_date'].dt.year.fillna(0).astype(int)  # Extract year from date and handle NaN

# Load the pre-trained sentence transformer model (use a lightweight model for speed)
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

# Compute embeddings for all movie titles (once, for efficiency)
movie_embeddings = model.encode(movies_df['title'].tolist(), convert_to_tensor=True)

# Function to recommend movies based on the title (using sentence transformers)
def recommend_by_title(movie_title, df, n_recommendations=10):
    # Compute embedding for the input movie title
    query_embedding = model.encode(movie_title, convert_to_tensor=True)

    # Compute cosine similarities between the query and all movie titles
    cosine_similarities = util.pytorch_cos_sim(query_embedding, movie_embeddings).squeeze()

    # Get the indices of the top N similar movies (excluding the input movie)
    top_results = cosine_similarities.argsort(descending=True)[:n_recommendations+1].cpu().numpy()

    # Retrieve the titles of the most similar movies (skip the first one if it's the input movie)
    recommendations = [df['title'].iloc[idx] for idx in top_results if df['title'].iloc[idx].lower() != movie_title.lower()][:n_recommendations]

    return recommendations

# Filtering by release date
def filter_by_release_date(df, start_year=None, end_year=None):
    if start_year and end_year:
        filtered_df = df[(df['release_year'] >= start_year) & (df['release_year'] <= end_year)]
    elif start_year:
        filtered_df = df[df['release_year'] >= start_year]
    elif end_year:
        filtered_df = df[df['release_year'] <= end_year]
    else:
        filtered_df = df
    return filtered_df

# Filtering by popularity
def filter_by_popularity(df, min_popularity=None):
    if min_popularity:
        return df[df['popularity'] >= min_popularity]
    return df

# Filtering by genre
def filter_by_genre(df, genres):
    # Filter rows where any of the selected genres match
    return df[df['genres'].apply(lambda x: any(genre in x for genre in genres))]

# Filtering by vote average
def filter_by_vote_average(df, min_vote=7.0):
    return df[df['vote_average'] >= min_vote]

# --- Individual Recommendation Functions ---
def recommend_by_release_date(year, df, n_recommendations=5):
    filtered_movies = df[df['release_year'] == year]
    return filtered_movies.nlargest(n_recommendations, 'popularity')['title'].tolist()

def recommend_by_popularity(min_popularity, df, n_recommendations=5):
    filtered_movies = df[df['popularity'] >= min_popularity]
    return filtered_movies.nlargest(n_recommendations, 'popularity')['title'].tolist()

def recommend_by_genre(genres, df, n_recommendations=5):
    filtered_movies = filter_by_genre(df, genres)
    return filtered_movies.nlargest(n_recommendations, 'popularity')['title'].tolist()

def recommend_by_vote_average(min_vote, df, n_recommendations=5):
    filtered_movies = filter_by_vote_average(df, min_vote)
    return filtered_movies.nlargest(n_recommendations, 'popularity')['title'].tolist()

# --- Unified Function to Handle Different Recommendation Features ---
def recommend_movies(feature, value, df, n_recommendations=5):
    if feature == 'release_date':
        return recommend_by_release_date(value, df, n_recommendations)
    elif feature == 'popularity':
        return recommend_by_popularity(value, df, n_recommendations)
    elif feature == 'genre':
        return recommend_by_genre(value, df, n_recommendations)
    elif feature == 'vote_average':
        return recommend_by_vote_average(value, df, n_recommendations)
    elif feature == 'title':
        return recommend_by_title(value, df, n_recommendations)
    else:
        return "Invalid feature specified."

# --- User Input for Recommendations ---
def get_user_input():
    print("Available features for filtering: release_date, popularity, genre, vote_average, title")
    feature = input("Please enter the filter feature you want to use: ").strip().lower()

    if feature == 'release_date':
        value = int(input("Please enter the year for release date: "))  # Assume year is an integer
    elif feature == 'popularity':
        value = float(input("Please enter the minimum popularity: "))
    elif feature == 'vote_average':
        value = float(input("Please enter the minimum vote average: "))
    elif feature == 'genre':
        value = input("Please enter the genre: ").strip()  # Single genre input
    elif feature == 'title':
        value = input("Please enter the movie title: ").strip()
    else:
        return None, "Invalid feature specified."

    return feature, value

# Main Execution
if __name__ == "__main__":
    feature, value = get_user_input()

    if value is None:
        print(feature)  # Display the error message for invalid feature
    else:
        n_recommendations = 5  # Number of recommendations
        final_recommendations = recommend_movies(feature, value, movies_df, n_recommendations)
        print("\nRecommendations:")
        print(final_recommendations)


  from tqdm.autonotebook import tqdm, trange
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/3.73k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/314 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]



1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Available features for filtering: release_date, popularity, genre, vote_average, title
Please enter the filter feature you want to use: title
Please enter the movie title: Inception

Recommendations:
['Inception: Jump Right Into the Action', 'The Crack: Inception', 'WWA The Inception', 'Outset']


In [6]:
# Import necessary libraries
import pandas as pd
from sentence_transformers import SentenceTransformer, util
import gradio as gr

# Load the dataset (assuming 'TMDB_movie_dataset_v11.csv' contains the relevant movie information)
movies_df = pd.read_csv('/content/TMDB_movie_dataset_v11.csv')

# Handle NaN values by filling them with appropriate defaults
movies_df['title'] = movies_df['title'].fillna('')
movies_df['genres'] = movies_df['genres'].fillna('')
movies_df['release_date'] = movies_df['release_date'].fillna('')
movies_df['popularity'] = movies_df['popularity'].fillna(0)
movies_df['vote_average'] = movies_df['vote_average'].fillna(0)

# Convert 'release_date' to datetime and extract the year
movies_df['release_date'] = pd.to_datetime(movies_df['release_date'], errors='coerce')
movies_df['release_year'] = movies_df['release_date'].dt.year.fillna(0).astype(int)

# Load the pre-trained sentence transformer model
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

# Compute embeddings for all movie titles (once, for efficiency)
movie_embeddings = model.encode(movies_df['title'].tolist(), convert_to_tensor=True)

# Function to recommend movies based on various filters
def recommend_movies(feature, value, no_movies):
    no_movies = int(no_movies)  # Ensure the number of movies is an integer
    if feature == 'Title':
        recommendations = recommend_by_title(value, movies_df, n_recommendations=no_movies)
    elif feature == 'Release Year':
        recommendations = movies_df[movies_df['release_year'] == int(value)]['title'].nlargest(no_movies).tolist()
    elif feature == 'Genre':
        recommendations = movies_df[movies_df['genres'].str.contains(value, case=False, na=False)]['title'].nlargest(no_movies).tolist()
    elif feature == 'Popularity':
        recommendations = movies_df[movies_df['popularity'] >= float(value)]['title'].nlargest(no_movies).tolist()
    elif feature == 'Vote Average':
        recommendations = movies_df[movies_df['vote_average'] >= float(value)]['title'].nlargest(no_movies).tolist()
    else:
        return "Invalid feature specified."

    return "\n".join(recommendations) if recommendations else "Sorry, no recommendations found."

# Gradio prediction function
def predict(filter_type, filter_value, no_movies):
    return recommend_movies(filter_type, filter_value, no_movies)

# Define the Gradio interface
interface = gr.Interface(
    fn=predict,  # The function to be called
    inputs=[
        gr.Radio(label="Select Filter Type:", choices=['Title', 'Release Year', 'Genre', 'Popularity', 'Vote Average']),
        gr.Textbox(label="Filter Value:", placeholder="Enter the value for the selected filter"),
        gr.Textbox(label='Number of Recommendations:', value='5')  # Input type
    ],
    outputs=gr.Textbox(label="Recommendations:")  # Output type
)

# Launch the interface
interface.launch()


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://1b1f25c502c2559297.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


