In [9]:
import requests
import time
!pip install rapidfuzz



In [11]:
import pandas as pd

In [13]:
API_KEY = '6d6853d790a2a113770546e2b036d1fe'
BASE_URL = 'https://api.themoviedb.org/3'

In [15]:
def get_movie_details(movie_id):
    url = f'{BASE_URL}/movie/{movie_id}?api_key={API_KEY}&language=en-US'
    try:
        response = requests.get(url)
        response.raise_for_status() 
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"Error fetching data for movie ID {movie_id}: {e}")
        return None

    
    response = requests.get(url)
    if response.status_code == 200:
        return response.json()
    else:
        return None

def search_movies(query):
    url = f'{BASE_URL}/search/movie?api_key={API_KEY}&query={query}&language=en-US&page=1&include_adult=false'
    response = requests.get(url)
    if response.status_code == 200:
        return response.json()
    else:
        return None


In [17]:
movie_id = 550 
details = get_movie_details(movie_id)
if details:
    print(f"Title: {details['title']}")
    print(f"Overview: {details['overview']}")
else:
    print("Movie not found.")

search_query = 'Inception'
results = search_movies(search_query)
if results and 'results' in results:
    for movie in results['results']:
        print(f"Title: {movie['title']}, Release Date: {movie['release_date']}")
else:
    print("No movies found.")


Title: Fight Club
Overview: A ticking-time-bomb insomniac and a slippery soap salesman channel primal male aggression into a shocking new form of therapy. Their concept catches on, with underground "fight clubs" forming in every town, until an eccentric gets in the way and ignites an out-of-control spiral toward oblivion.
Title: Inception, Release Date: 2010-07-15
Title: The Crack: Inception, Release Date: 2019-10-04
Title: Inception: The Cobol Job, Release Date: 2010-12-07
Title: The Inception of Dramatic Representation, Release Date: 
Title: Inception, Release Date: 1980-01-23
Title: Syndrome Halla, the Inception of Croatian Professional Film – Born to Die, Release Date: 2017-01-01
Title: Bikini Inception, Release Date: 2015-05-19
Title: Inception: Music from the Motion Picture, Release Date: 2010-12-07
Title: WWA The Inception, Release Date: 2001-10-26


In [25]:
def get_movie_details_extended(movie_id):
    url = f"https://api.themoviedb.org/3/movie/{movie_id}?api_key={'6d6853d790a2a113770546e2b036d1fe'}&language=en-US&append_to_response=credits"
    response = requests.get(url)
    return response.json()

def fetch_extended_movies_data(movie_id):
    movies_data = []
    for movie_id in movie_id:
        movie_details = get_movie_details_extended(movie_id)
        
        
        movie = {
            'id': movie_details['id'],
            'title': movie_details['title'],
            'overview': movie_details['overview'],
            'genres': [genre['name'] for genre in movie_details['genres']],
            'director': [crew['name'] for crew in movie_details['credits']['crew'] if crew['job'] == 'Director'],
            'cast': [cast['name'] for cast in movie_details['credits']['cast'][:5]],  # Top 5 cast members
            'belongs_to_collection': movie_details['belongs_to_collection']['name'] if movie_details['belongs_to_collection'] else None
        }
        movies_data.append(movie)
    return pd.DataFrame(movies_data)

movies_df = fetch_extended_movies_data(movie_id)
print(movies_df.head())


TypeError: 'int' object is not iterable

In [23]:

def fetch_movies_page(page):
    url = f"https://api.themoviedb.org/3/discover/movie?api_key={API_KEY}&language=en-US&sort_by=popularity.desc&page={page}"
    response = requests.get(url)
    return response.json()


def fetch_all_movie_ids(max_pages=None):
    movie_ids = []
    page = 1
    while True:
        data = fetch_movies_page(page)
        
        
        ids_on_page = [movie['id'] for movie in data['results']]
        movie_ids.extend(ids_on_page)
        
        
        if page >= data['total_pages'] or (max_pages and page >= max_pages):
            break
        
        
        page += 1
        time.sleep(0.2)  
        print(f"Fetched page {page} with {len(ids_on_page)} movie IDs.")
    return movie_ids


movie_ids = fetch_all_movie_ids(max_pages=10)  

print(f"Total movie IDs fetched: {len(movie_ids)}")
print(movie_ids) 
movies_df = fetch_extended_movies_data(movie_ids)
print(movies_df.head())

Fetched page 2 with 20 movie IDs.
Fetched page 3 with 20 movie IDs.
Fetched page 4 with 20 movie IDs.
Fetched page 5 with 20 movie IDs.
Fetched page 6 with 20 movie IDs.
Fetched page 7 with 20 movie IDs.
Fetched page 8 with 20 movie IDs.
Fetched page 9 with 20 movie IDs.
Fetched page 10 with 20 movie IDs.
Total movie IDs fetched: 200
[912649, 1034541, 1184918, 1118031, 558449, 533535, 933260, 1124641, 698687, 845781, 519182, 945961, 1051896, 1100856, 1116490, 335983, 1084736, 1022789, 1142518, 1041317, 335983, 1084736, 1100856, 1196470, 1100782, 98, 1014505, 1029235, 580489, 928480, 1094974, 976734, 1288998, 592831, 889737, 1214484, 1331375, 1328814, 1371727, 1144962, 592831, 1063877, 1144962, 1159311, 420634, 827931, 823219, 957452, 269509, 157336, 1014505, 1241982, 1140648, 726139, 928480, 663712, 947891, 1288998, 1354627, 616446, 814889, 1010581, 991610, 482600, 1151949, 1079091, 1255208, 1292359, 1011985, 639720, 653346, 748783, 1288575, 1145491, 718821, 824003, 1329336, 972614, 11

In [27]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [29]:

def combine_features(row):
    return ' '.join(row['genres']) + ' ' + ' '.join(row['cast']) + ' ' + ' '.join(row['director']) + ' ' + (row['overview'] or '') + ' ' + (row['belongs_to_collection'] or '')


movies_df['combined_features'] = movies_df.apply(combine_features, axis=1)
print(movies_df['combined_features'].head())


0    Science Fiction Action Adventure Tom Hardy Chi...
1    Horror Thriller Mystery Lauren LaVera David Ho...
2    Animation Science Fiction Family Lupita Nyong'...
3    Drama Action Horror Francisco Ortiz José María...
4    Action Adventure Drama Paul Mescal Denzel Wash...
Name: combined_features, dtype: object


In [58]:

tfidf = TfidfVectorizer(stop_words='english')
movies_df['combined_features'] = movies_df['combined_features'].fillna('')  
tfidf_matrix = tfidf.fit_transform(movies_df['combined_features'])


cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

def get_closest_movie_title(user_input, movie_titles, threshold=80):
    # Normalize titles and input
    movie_titles_normalized = [title.lower().strip() for title in movie_titles]
    user_input_normalized = user_input.lower().strip()

    # Perform fuzzy matching
    match = process.extractOne(user_input_normalized, movie_titles_normalized)
    if match and match[1] >= threshold:  # Ensure match score is above threshold
        # Retrieve original title
        original_index = movie_titles_normalized.index(match[0])
        return movie_titles[original_index]
    return None

from rapidfuzz import process

# Function to get the closest matching title using fuzzy matching
def get_closest_movie_title(user_input, movie_titles, threshold=80):
    movie_titles_normalized = [title.lower().strip() for title in movie_titles]
    user_input_normalized = user_input.lower().strip()

    match = process.extractOne(user_input_normalized, movie_titles_normalized)
    if match and match[1] >= threshold:
        original_index = movie_titles_normalized.index(match[0])
        return movie_titles[original_index]
    return None

# Recommendation function
def get_enhanced_recommendations(title, cosine_sim):
    movie_titles = movies_df['title'].tolist()
    
    # Apply fuzzy matching to get the closest movie title
    closest_title = get_closest_movie_title(title, movie_titles)
    
    if closest_title is None:
        print("Movie not found. Please try again.")
        return []

    user_response = input(f"Did you mean '{closest_title}'? (yes/no): ").strip().lower()
    if user_response != 'yes':
        print("Please try entering the movie name again.")
        return []

    # Find the movie index using the fuzzy-matched title
    idx = movies_df[movies_df['title'] == closest_title].index[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Use a set to track unique movie titles
    unique_recommendations = set()
    recommendations = []

    for movie_idx, score in sim_scores:
        movie_title = movies_df['title'].iloc[movie_idx]
        if movie_idx != idx and movie_title not in unique_recommendations:
            unique_recommendations.add(movie_title)
            recommendations.append(movie_title)
        if len(recommendations) == 5:  # Stop after getting 5 unique recommendations
            break

    return recommendations

In [70]:
user_input = input("Enter the movie name: ")
recommendations = get_enhanced_recommendations(user_input, cosine_sim)

if not recommendations:
    print("No recommendations found.")
else:
    print("Here are some movie recommendations:")
    for movie in recommendations:
        print(movie)


Enter the movie name:  iron man
Did you mean 'Iron Man'? (yes/no):  yes


Here are some movie recommendations:
Avengers: Infinity War
Spider-Man: No Way Home
Terrifier 2
Sonic the Hedgehog 3
Deadpool & Wolverine


In [54]:
print(movies_df['title'])

0                                  Venom: The Last Dance
1                                            Terrifier 3
2                                         The Wild Robot
3                 Apocalypse Z: The Beginning of the End
4                                           Gladiator II
                             ...                        
195                                           Inside Out
196    The Lord of the Rings: The Fellowship of the Ring
197                                       Drawing Closer
198                                                Wonka
199                  Harry Potter and the Goblet of Fire
Name: title, Length: 200, dtype: object
