Data Clean

The data source website:
https://www.imdb.com/?ref_=nv_home

The instruction of Cinemagoer:
https://cinemagoer.readthedocs.io/en/latest/

In [2]:
from imdb import Cinemagoer

In [3]:
# create an instance of the Cinemagoer class
ia = Cinemagoer()

In [3]:
#advanced search

from imdb import IMDb
import logging

# Set up basic configuration for logging
# logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')

ia = IMDb()

def advanced_search_movies_and_actors(criteria):
    movie_name = criteria.get('movie_name', '').strip()
    actor_name = criteria.get('actor_name', '').strip()
    genre = criteria.get('genre', '').strip()
    year = criteria.get('year', '').strip()

    # Search for movies and people
    movie_results = ia.search_movie(movie_name) if movie_name else []
    actor_results = ia.search_person(actor_name) if actor_name else []

    detailed_movies = []
    detailed_actors = []
    matched_movies = []

    # Process movie results
    for movie in movie_results:
        if len(detailed_movies) >= 10:
            break  # Limit to the first 10 movies

        try:
            movie_id = movie.movieID
            movie_details = ia.get_movie(movie_id)

            # Apply genre filter
            if genre and genre not in movie_details.get('genres', []):
                continue
            
            # Apply year filter
            if year and (not movie_details.get('year') or int(movie_details.get('year', 0)) < int(year)):
                continue

            # Process cast and match with actor name if provided
            cast_names = [person['name'] for person in movie_details.get('cast', []) if 'name' in person]
            
            if actor_name and actor_name in cast_names:
                # This movie is a match and should be prioritized
                matched_movies.append(movie_details)
            else:
                # This movie is not a full match but still relevant
                detailed_movies.append({
                    'title': movie_details.get('title'),
                    'year': movie_details.get('year'),
                    'genres': movie_details.get('genres'),
                    'rating': movie_details.get('rating'),
                    'cast': cast_names[:3],
                    'plot': movie_details.get('plot outline'),
                    'movie_id': movie_id
                })
        except Exception as e:
            logging.error("Error fetching movie details for ID %s: %s", movie.movieID, e)

    # Sort matched movies by relevance (year and rating)
    matched_movies.sort(key=lambda x: (x.get('year', 0), x.get('rating', 0)), reverse=True)

    # Add matched movies to the front of the detailed movies list
    for matched_movie in matched_movies[:10]:  # Ensure we don't exceed 10 total
        detailed_movies.insert(0, {
            'title': matched_movie.get('title'),
            'year': matched_movie.get('year'),
            'genres': matched_movie.get('genres'),
            'rating': matched_movie.get('rating'),
            'cast': [person['name'] for person in matched_movie.get('cast', [])[:3] if 'name' in person],
            'plot': matched_movie.get('plot outline'),
            'movie_id': matched_movie.movieID
        })

    # Process actor results
    for actor in actor_results:
        if len(detailed_actors) >= 10:
            break  # Limit to the first 10 actors

        try:
            actor_id = actor.personID
            actor_details = ia.get_person(actor_id)
            detailed_actors.append({
                'name': actor_details.get('name'),
                'actor_id': actor_id,
                'biography': actor_details.get('mini biography', []),
                'filmography': [film['title'] for film in actor_details.get('filmography', {}).get('actor', [])][:3]
            })
        except Exception as e:
            logging.error("Error fetching actor details for ID %s: %s", actor.personID, e)

    # Limit the detailed movies list to the top 10 if necessary
    detailed_movies = detailed_movies[:10]

    # Combine detailed movies with actor results
    combined_results = {
        'Movies': detailed_movies,
        'Actors': detailed_actors
    }

    return combined_results


# Example criteria for a search that could apply to both movies and actors
# criteria = {
#     'movie_name': 'Inception',
#     'actor_name': 'Leonardo DiCaprio',
#     'genre': 'Action',
#     'year': '2010'
# }
criteria = {
    'movie_name': 'Spider',
    'year': '2010'
    }
results = advanced_search_movies_and_actors(criteria)
print(results)



{'Movies': [{'title': 'Spider-Man: Across the Spider-Verse', 'year': 2023, 'genres': ['Animation', 'Action', 'Adventure', 'Fantasy', 'Sci-Fi'], 'rating': 8.6, 'cast': ['Shameik Moore', 'Hailee Steinfeld', 'Brian Tyree Henry'], 'plot': "Miles Morales returns for the next chapter of the Oscar®-winning Spider-Verse saga, an epic adventure that will transport Brooklyn's full-time, friendly neighborhood Spider-Man across the Multiverse to join forces with Gwen Stacy and a new team of Spider-People to face off with a villain more powerful than anything they have ever encountered.", 'movie_id': '9362722'}, {'title': 'Spider-Man: No Way Home', 'year': 2021, 'genres': ['Action', 'Adventure', 'Fantasy', 'Sci-Fi'], 'rating': 8.2, 'cast': ['Tom Holland', 'Zendaya', 'Benedict Cumberbatch'], 'plot': "Peter Parker's secret identity is revealed to the entire world. Desperate for help, Peter turns to Doctor Strange to make the world forget that he is Spider-Man. The spell goes horribly wrong and shatte

In [3]:
print(dir(ia))

['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_defModFunct', '_getRefs', '_get_infoset', '_get_keyword', '_get_list_content', '_get_movie_list', '_get_real_characterID', '_get_real_companyID', '_get_real_movieID', '_get_real_personID', '_get_search_content', '_get_search_movie_advanced_content', '_get_showtimes', '_get_top_bottom_movies', '_get_top_movies_or_tv_by_genres', '_http_logger', '_keywordsResults', '_mdparse', '_normalize_characterID', '_normalize_companyID', '_normalize_movieID', '_normalize_personID', '_purge_seasons_data', '_reraise_exceptions', '_results', '_retrieve', '_searchIMDb', '_search_character', '_search_company', '_search_episode', '_search_keyword', '_search_movie', 

Essential Function


Search

In [4]:
#search by name
ia.search_movie('moon')

[<Movie id:23137904[http] title:_Rebel Moon - Part Two: The Scargiver (2024)_>,
 <Movie id:14998742[http] title:_Rebel Moon - Part One: A Child of Fire (2023)_>,
 <Movie id:5537002[http] title:_Killers of the Flower Moon (2023)_>,
 <Movie id:1896747[http] title:_Fly Me to the Moon (2024)_>,
 <Movie id:1182345[http] title:_Moon (2009)_>,
 <Movie id:10234724[http] title:_"Moon Knight" (2022) (mini)_>,
 <Movie id:1399103[http] title:_Transformers: Dark of the Moon (2011)_>,
 <Movie id:0104779[http] title:_Bitter Moon (1992)_>,
 <Movie id:27688034[http] title:_The Moon (2023)_>,
 <Movie id:8110640[http] title:_In the Shadow of the Moon (2019)_>,
 <Movie id:5320412[http] title:_"Moon Lovers: Scarlet Heart Ryeo" (2016)_>,
 <Movie id:1259571[http] title:_The Twilight Saga: New Moon (2009)_>,
 <Movie id:0096324[http] title:_Two Moon Junction (1988)_>,
 <Movie id:0070510[http] title:_Paper Moon (1973)_>,
 <Movie id:28741495[http] title:_Moon Maidens (2023)_>,
 <Movie id:0125664[http] title:_Man

In [5]:
#search by character
ia.search_person('David')

[<Person id:2810287[http] name:_David Dastmalchian_>,
 <Person id:4825178[http] name:_David Corenswet_>,
 <Person id:0855039[http] name:_David Tennant_>,
 <Person id:0472710[http] name:_David Krumholtz_>,
 <Person id:0000186[http] name:_David Lynch_>,
 <Person id:0001650[http] name:_Michael Rapaport_>,
 <Person id:0000399[http] name:_David Fincher_>,
 <Person id:1314042[http] name:_Karen David_>,
 <Person id:0000667[http] name:_David Thewlis_>,
 <Person id:0004929[http] name:_David Foley_>,
 <Person id:0333701[http] name:_David Graf_>,
 <Person id:1092086[http] name:_David Harbour_>,
 <Person id:0000343[http] name:_David Cronenberg_>,
 <Person id:0500610[http] name:_David Leitch_>,
 <Person id:0103195[http] name:_David Bradley_>,
 <Person id:0004770[http] name:_David Boreanaz_>,
 <Person id:0564724[http] name:_David McCallum_>,
 <Person id:1256136[http] name:_David McCormack_>,
 <Person id:0000179[http] name:_Jude Law_>,
 <Person id:0001016[http] name:_David Carradine_>]

In [6]:
#get detailed info
movie = ia.get_movie('10234724')
print(movie.get('genres', []))
# print(movie.items())

[('localized title', 'Moon Knight'), ('cast', [<Person id:1209966[http] name:_Oscar Isaac_>, <Person id:0000160[http] name:_Ethan Hawke_>, <Person id:2790595[http] name:_May Calamawy_>, <Person id:9339750[http] name:_Michael Benjamin Hernandez_>, <Person id:0000719[http] name:_F. Murray Abraham_>, <Person id:2497049[http] name:_Ann Akinjirin_>, <Person id:3183119[http] name:_Karim El Hakim_>, <Person id:0304414[http] name:_David Ganly_>, <Person id:12713943[http] name:_Antonia Salib_>, <Person id:2043234[http] name:_Khalid Abdalla_>, <Person id:0779785[http] name:_Shaun Scott_>, <Person id:4312699[http] name:_Lucy Thackeray_>, <Person id:6946524[http] name:_Díana Bermudez_>, <Person id:5247776[http] name:_Alexander Cobb_>, <Person id:0360426[http] name:_Declan Hannigan_>, <Person id:6447042[http] name:_Hayley Konadu_>, <Person id:5713754[http] name:_Loic Mabanza_>, <Person id:7368334[http] name:_Nagisa Morimoto_>, <Person id:0880484[http] name:_Gaspard Ulliel_>, <Person id:7143061[http

In [4]:
movie = ia.get_movie('10234724')
print(person for person in movie.get('cast', []))

<generator object <genexpr> at 0x0000013721609430>


In [6]:
from concurrent.futures import ThreadPoolExecutor
from imdb import IMDb
import logging
from functools import lru_cache

ia = IMDb()

@lru_cache(maxsize=512)
def get_movie_details(movie_id):
    return ia.get_movie(movie_id)

@lru_cache(maxsize=512)
def get_actor_details(actor_id):
    return ia.get_person(actor_id)


def process_movies(criteria):
    movie_name = criteria.get('movie_name', '').strip()
    genre = criteria.get('genre', '').strip()
    year = criteria.get('year', '').strip()
    actor_name = criteria.get('actor_name', '').strip()
    movie_results = ia.search_movie(movie_name) if movie_name else []
    detailed_movies = []
    matched_movies = []

    for movie in movie_results:
        if len(detailed_movies) >= 10:
            break
        try:
            movie_id = movie.movieID
            movie_details = get_movie_details(movie_id)
            if genre and genre not in movie_details.get('genres', []):
                continue
            if year and (not movie_details.get('year') or int(movie_details.get('year', 0)) < int(year)):
                continue
            cast_names = [person['name'] for person in movie_details.get('cast', []) if 'name' in person]
            if actor_name and actor_name in cast_names:
                matched_movies.append(movie_details)
            else:
                detailed_movies.append({
                    'title': movie_details.get('title'),
                    'year': movie_details.get('year'),
                    'genres': movie_details.get('genres'),
                    'rating': movie_details.get('rating'),
                    'cast': cast_names[:3],
                    'plot': movie_details.get('plot outline'),
                    'movie_id': movie_id
                })
        except Exception as e:
            logging.error("Error fetching movie details for ID %s: %s", movie.movieID, e)
    matched_movies.sort(key=lambda x: (x.get('year', 0), x.get('rating', 0)), reverse=True)
    for matched_movie in matched_movies[:10]:
        detailed_movies.insert(0, {
            'title': matched_movie.get('title'),
            'year': matched_movie.get('year'),
            'genres': matched_movie.get('genres'),
            'rating': matched_movie.get('rating'),
            'cast': [person['name'] for person in matched_movie.get('cast', [])[:3] if 'name' in person],
            'plot': matched_movie.get('plot outline'),
            'movie_id': matched_movie.movieID
        })
    return detailed_movies[:10]

def process_actors(criteria):
    actor_name = criteria.get('actor_name', '').strip()
    actor_results = ia.search_person(actor_name) if actor_name else []
    detailed_actors = []

    for actor in actor_results:
        if len(detailed_actors) >= 10:
            break
        try:
            actor_id = actor.personID
            actor_details = get_actor_details(actor_id)
            detailed_actors.append({
                'name': actor_details.get('name'),
                'actor_id': actor_id,
                'biography': actor_details.get('mini biography', []),
                'filmography': [film['title'] for film in actor_details.get('filmography', {}).get('actor', [])][:3]
            })
        except Exception as e:
            logging.error("Error fetching actor details for ID %s: %s", actor.personID, e)
    return detailed_actors

def advanced_search_movies_and_actors(criteria):
    with ThreadPoolExecutor(max_workers=2) as executor:
        movie_future = executor.submit(process_movies, criteria)
        actor_future = executor.submit(process_actors, criteria)

    detailed_movies = movie_future.result()
    detailed_actors = actor_future.result()

    return {
        'Movies': detailed_movies,
        'Actors': detailed_actors
    }

criteria = {
    'movie_name': 'Spider',
    'year': '2010'
    }
results = advanced_search_movies_and_actors(criteria)
print(results)


{'Movies': [{'title': 'Spider-Man: Across the Spider-Verse', 'year': 2023, 'genres': ['Animation', 'Action', 'Adventure', 'Fantasy', 'Sci-Fi'], 'rating': 8.6, 'cast': ['Shameik Moore', 'Hailee Steinfeld', 'Brian Tyree Henry'], 'plot': "Miles Morales returns for the next chapter of the Oscar®-winning Spider-Verse saga, an epic adventure that will transport Brooklyn's full-time, friendly neighborhood Spider-Man across the Multiverse to join forces with Gwen Stacy and a new team of Spider-People to face off with a villain more powerful than anything they have ever encountered.", 'movie_id': '9362722'}, {'title': 'Spider-Man: No Way Home', 'year': 2021, 'genres': ['Action', 'Adventure', 'Fantasy', 'Sci-Fi'], 'rating': 8.2, 'cast': ['Tom Holland', 'Zendaya', 'Benedict Cumberbatch'], 'plot': "Peter Parker's secret identity is revealed to the entire world. Desperate for help, Peter turns to Doctor Strange to make the world forget that he is Spider-Man. The spell goes horribly wrong and shatte