# **Movie Recommendation Model**

## **Loading the dataset from TMDB**

In [1]:
# Importing necessary libraries
import requests
import json
import time
import csv

In [2]:
# TMDB API key
API_KEY = "3a5247664c3b9f56e879a099706a2f04"
BASE_URL = "https://api.themoviedb.org/3"

In [3]:
# Function get the details of a movie
def get_movie_details(movie_id):
    url = f"{BASE_URL}/movie/{movie_id}?api_key={API_KEY}&append_to_response=credits"
    response = requests.get(url)

    #if the repsonse is successful
    if response.status_code == 200:
        data = response.json()
        title = data.get("title", "Unknown")
        genres = ", ".join([g["name"] for g in data.get("genres", [])])
        director = "Unknown"
        actors = []
        overview = data.get("overview", "No overview available")

        # Get the director
        for crew_member in data.get("credits", {}).get("crew", []):
            if crew_member["job"] == "Director":
                director = crew_member["name"]
                break
        
        # Get the actors
        for cast_member in data.get("credits", {}).get("cast", []):
            actors.append(cast_member["name"])
        
        # Return the movie details
        return {
            "id": movie_id,
            "title": title,
            "genre": genres,
            "director": director,
            "actors": ", ".join(actors),
            "overview": overview
        }
    
    # If the response is not successful
    return None

In [4]:
# Function to fetch the movies with details
def get_movies(page_limit=15):
    movies_data = [] # List to store the movies data
    for page in range(1, page_limit+1):
        url = f"{BASE_URL}/discover/movie?api_key={API_KEY}&sort_by=popularity.desc&include_video=false&adult=false&page={page}"
        response = requests.get(url)

        # If the response is successful
        if response.status_code == 200:
            data = response.json().get("results", [])
            for movie in data:
                movie_id = movie.get("id")
                details = get_movie_details(movie_id)
                if details:
                    movies_data.append(details)
        time.sleep(0.5) # To avoid hitting the API rate limit
    return movies_data

In [61]:
movies = get_movies(100)

In [5]:
# Function to save the movies to a csv file
def save_movies_to_csv(movies, filename="movies.csv"):
    with open(filename, mode="w", newline="", encoding="utf-8") as file:
        writer = csv.writer(file)
        writer.writerow(["ID", "Title", "Genre", "Director", "Actors", "Overview"])
        for movie in movies:
            writer.writerow([movie["id"], movie["title"], movie["genre"], movie["director"], movie["actors"], movie["overview"]])
            print(f"{movie['title']} saved to {filename}")
    print("All movies saved to csv")


## **Applying the model to the dataset**

In [75]:
# Importing necessary libraries
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import random
from rapidfuzz import process, fuzz

#### **Preprocessing the data**

In [76]:
# Load the dataset
ds = pd.read_csv("movies.csv")

# Fill the missing values with empty string
ds.fillna("", inplace=True)

# Combine relevant features into a single string
ds["combined_features"] = ds["Genre"] + " " + ds["Director"] + " " + ds["Actors"] + " " + ds["Overview"]

#### **Converting text data to numerical data using TF-IDF**

In [77]:
# Convert the text to a matrix of TF-IDF features
vectorizer = TfidfVectorizer(stop_words="english")
tfidf_matrix = vectorizer.fit_transform(ds["combined_features"].values.astype('U'))

#### **Function to recommend movies based on given movie title**

In [78]:
# Compute the cosine similarity matrix
cosine_sim = cosine_similarity(tfidf_matrix)

# Function to get the movie recommendations
def get_recommendations(movie_title, cosine_sim=cosine_sim, ds=ds):
    movie_title = movie_title.lower()  # Convert to lowercase for case-insensitive matching

    # Convert all dataset titles to lowercase and find matching index
    ds["lower_title"] = ds["Title"].str.lower()
    
    # Use RapidFuzz to find the best match
    best_match, score, _ = process.extractOne(movie_title, 
                                              ds["lower_title"], 
                                              scorer=process.fuzz.WRatio) # Using WRatio for typos and different spellings

    if score < 75:  # If the best match is not good enough
        print("No close match found.")
        return {"message": "No close match found.", "recommendations": []}
    
    # Get the index of the movie that matches the title
    matched_idx = ds[ds["lower_title"] == best_match].index[0]
    matched_title = ds.iloc[matched_idx]["Title"]
    print(f"Best match found: {matched_title} with score {score}")
    idx = matched_idx

    # Get similarity scores for all movies
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the movies based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    
    # Get the scores of the 10 most similar movies
    top_movies = [ds.iloc[i[0]]["Title"] for i in sim_scores[0:10]]

    return {"message": f"Best match found: {matched_title}", "recommendations": top_movies}


In [None]:
# Test the function
movie_title = "avngers" 
recommendations = get_recommendations(movie_title, cosine_sim)

# Print the recommendations
if recommendations:
    print(f"Recommendations for {movie_title}:")
    for movie in recommendations:
        print(movie)
    

### **Function to recommend movies based on user preferences**

In [80]:
def get_reco_on_pref(fav_genres, fav_actors, fav_directors, ds=ds):
    filtered_movies = ds.copy()
    
    # Filter by genres
    if fav_genres:
        filtered_movies = filtered_movies[filtered_movies["Genre"].str.contains('|'.join(fav_genres), case=False, na=False)]

    # Filter by actors
    if fav_actors:
        filtered_movies = filtered_movies[filtered_movies["Actors"].str.contains('|'.join(fav_actors), case=False, na=False)]

    # Filter by directors
    if fav_directors:
        filtered_movies = filtered_movies[filtered_movies["Director"].str.contains('|'.join(fav_directors), case=False, na=False)]

    # If no movies match, return a random movie
    if filtered_movies.empty:
       return {"message": "No movies found matching your preferences.", "recommendations": []}
    
    # Handle the case where the filtered movies are less than 5
    if len(filtered_movies) < 5:
        return {"message": "Movies based on your preferences", "recommendations": filtered_movies.sample()["Title"].tolist()}
    
    return {"message": "Movies based on your preferences", "recommendations": filtered_movies.sample(5)["Title"].tolist()}

In [None]:
# Test the function
fav_genres = ["Action"]
fav_actors = [""]
fav_directors = ["Christopher Nolan"]
recommendations = get_reco_on_pref(fav_genres, fav_actors, fav_directors)

# Print the recommendations
if recommendations:
    print("Recommendations based on preferences:")
    for movie in recommendations:
        print(movie)


#### **Function to search movie by genre, actor or director**

In [81]:
def search_movies_by(category, keyword, ds=ds):
    if category not in ["Genre", "Actors", "Director"]:
        return {"message": "Invalid category. Choose 'Genre', 'Actors', or 'Director'.", "results": []}
    
    results = ds[ds[category].str.contains(keyword, case=False, na=False)]["Title"].tolist()
    results = random.sample(results, len(results))

    if results:
        return {"message": f"Movies found for {category} - {keyword}", "recommendations":results[:10]}
    return {"message": "No movies found.", "results": []}

In [None]:
# Test the function
category = "Actors"
keyword = "Cruise"
search_movies_by(category, keyword)

## **Flask API for the Movie Recommendations**

In [82]:
from flask import Flask, request, jsonify
app = Flask(__name__)

In [83]:
# Route to get movie recommendations based on title
@app.route("/recommend", methods=["GET"])
def recommend():
    movie_title = request.args.get("title")
    if not movie_title:
        return jsonify({"error": "No title provided."}), 400
    return jsonify({"recommendations": get_recommendations(movie_title)}), 200

In [84]:
# Route to get movie recommendations based on user preferences
@app.route("/recommend/preferences", methods=["POST"])
def recommend_preferences():
    data = request.get_json()
    fav_genres = data.get("genres", [])
    fav_actors = data.get("actors", [])
    fav_directors = data.get("directors", [])

    if not fav_genres and not fav_actors and not fav_directors:
        return jsonify({"error": "No preferences provided."}), 400
    return jsonify({"recommendations": get_reco_on_pref(fav_genres, fav_actors, fav_directors)}), 200

In [85]:
# Route to search movies by category
@app.route("/search", methods=["GET"])
def search():
    category = request.args.get("category")
    keyword = request.args.get("keyword")

    if not category or not keyword:
        return jsonify({"error": "Missing 'category' or 'keyword' parameter"}), 400

    return jsonify({"recommendations": search_movies_by(category, keyword)}), 200

In [None]:
if __name__ == "__main__":
    app.run(debug=True) # By default, Flask runs on port 5000