## CineMind Toolkit
A streamlined Jupyter Notebook designed to interact with and manage the CineMind database. Includes setup code and modular utility functions for querying, updating, and testing API/database features with ease.


---

### Installing Required Libraries

In [None]:
# pandas for building and processing DataFrames
%pip install pandas

# sqlalchemy: database toolkit for Python (ORM)
%pip install sqlalchemy

# pandasql: sql-type queries on pandas DataFrames
%pip install pandasql

# python-dotenv: for loading environment variables from a .env file
%pip install python-dotenv

### Setup and Database Connection
Sets up access to the TMDB API and connects to the local cinemind.db SQLite database for movie data operations.

In [3]:
import requests
import sqlite3
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()
TMDB_API_KEY = os.getenv("TMDB_API_KEY")
BASE_API_URL = "https://api.themoviedb.org/3/"
IMAGE_BASE_URL = "https://image.tmdb.org/t/p/original" 

# Connect to SQLite database
db_path = "../models/cinemind.db"  # Update with your database path
conn = sqlite3.connect(db_path)

### View Table Schema
Connects to the database and prints the schema of the specified table.

In [None]:
# Connect to SQLite database
conn = sqlite3.connect('../models/cinemind.db')  # Update path if needed
cursor = conn.cursor()


table = 'Movies' # Insert name of the table you want to view here


schema = conn.execute(f"PRAGMA table_info({table})")

print('\n-------BEGIN TABLE SCHEMA------')
for i in schema:
    print(i)
print('-------END TABLE SCHEMA--------\n')

conn.close()

### Define Functions
Defines all the functions that we will need to perform operations on the DB

In [14]:
def fetch_movie_details(movie_id):
    """Fetch movie details from TMDB API."""
    url = f"{BASE_API_URL}movie/{movie_id}?language=en-US"
    headers = {
        "accept": "application/json",
        "Authorization": f"Bearer {TMDB_API_KEY}"
    }
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error fetching movie {movie_id}: {response.status_code}")
        return None

def fetch_movie_videos(movie_id):
    """Fetch videos for a given TMDB movie ID."""
    url = f"{BASE_API_URL}movie/{movie_id}/videos?language=en-US"
    headers = {
        "accept": "application/json",
        "Authorization": f"Bearer {TMDB_API_KEY}"
    }
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        videos = response.json().get("results", [])
        for video in videos:
            if video["site"].lower() == "youtube" and video["type"].lower() == "trailer":
                return f"https://www.youtube.com/watch?v={video['key']}"
    else:
        print(f"Error fetching videos for movie {movie_id}: {response.status_code}")
    return None

def fetch_movie_credits(movie_id):
    """Fetch credits (cast and crew) for a given TMDb movie ID."""
    url = f"{BASE_API_URL}movie/{movie_id}/credits?language=en-US"
    headers = {
        "accept": "application/json",
        "Authorization": f"Bearer {TMDB_API_KEY}"
    }
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        return response.json().get("cast", [])
    else:
        print(f"Error fetching credits for movie {movie_id}: {response.status_code}")
        return []

def fetch_movie_keywords(movie_id):
    """Fetch keywords for a given TMDb movie ID."""
    url = f"{BASE_API_URL}movie/{movie_id}/keywords"
    headers = {
        "accept": "application/json",
        "Authorization": f"Bearer {TMDB_API_KEY}"
    }
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        return response.json().get("keywords", [])
    else:
        print(f"Error fetching keywords for movie {movie_id}: {response.status_code}")
        return []

def get_movie_reviews(movie_id):
    """Fetch reviews for a given TMDb movie ID."""
    reviews_url = f"{BASE_API_URL}movie/{movie_id}/reviews?language=en-US"
    headers = {
        "accept": "application/json",
        "Authorization": f"Bearer {TMDB_API_KEY}"
    }

    response = requests.get(reviews_url, headers=headers)
    if response.status_code == 200:
        return response.json().get("results", [])
    else:
        print(f"Error fetching reviews for movie {movie_id}. Status Code: {response.status_code}")
        return []

def insert_actors_and_cast(movie_id, cast):
    """Insert actors and their roles into the database."""
    for actor in cast[:10]:  # Limit to the first 10 cast members
        actor_name = actor.get("name")
        gender = actor.get("gender", 0)
        character = actor.get("character", "Unknown")

        # Check if actor exists in Cast table 
        cursor.execute("SELECT actor_id FROM Cast WHERE name = ?", (actor_name,))
        actor_id = cursor.fetchone()
        if not actor_id:
            cursor.execute("INSERT INTO Cast (name, gender) VALUES (?, ?)", (actor_name, gender))
            actor_id = cursor.lastrowid
        else:
            actor_id = actor_id[0]

        # Link actor with movie in Movies_Cast table with character field
        cursor.execute("REPLACE INTO Movies_Cast (movie_id, actor_id, character) VALUES (?, ?, ?)",
                      (movie_id, actor_id, character))

def insert_keywords(movie_id, keywords):
    """Insert keywords and link them to the movie."""
    for keyword in keywords:
        keyword_name = keyword.get("name")

        cursor.execute("SELECT keyword_id FROM Keywords WHERE keyword_name = ?", (keyword_name,))
        keyword_id = cursor.fetchone()
        if not keyword_id:
            # Insert the keyword if it doesn't exist
            cursor.execute("INSERT INTO Keywords (keyword_name) VALUES (?)", (keyword_name,))
            keyword_id = cursor.lastrowid
        else:
            keyword_id = keyword_id[0]

        cursor.execute("REPLACE INTO Movie_Keywords (movie_id, keyword_id) VALUES (?, ?)", (movie_id, keyword_id))

def fetch_all_images(movie_id, image_type):
    """Fetch all images (posters or backdrops) for a given TMDb movie ID."""
    images_url = f"{BASE_API_URL}movie/{movie_id}/images"
    headers = {
        "accept": "application/json",
        "Authorization": f"Bearer {TMDB_API_KEY}"
    }

    response = requests.get(images_url, headers=headers)
    if response.status_code == 200:
        images_data = response.json()
        return [f"{IMAGE_BASE_URL}{img['file_path']}" for img in images_data.get(image_type, [])]
    else:
        print(f"Error fetching {image_type} for movie {movie_id}: {response.status_code}")
        return []

def fetch_all_youtube_videos(movie_id):
    """Fetch all YouTube video URLs for a given TMDb movie ID."""
    url = f"{BASE_API_URL}movie/{movie_id}/videos?language=en-US"
    headers = {
        "accept": "application/json",
        "Authorization": f"Bearer {TMDB_API_KEY}"
    }
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        videos = response.json().get("results", [])
        return [f"https://www.youtube.com/watch?v={video['key']}"
                for video in videos
                if video["site"].lower() == "youtube"]
    else:
        print(f"Error fetching videos for movie {movie_id}: {response.status_code}")
        return []

def insert_movie_into_db(movie_data):
    """Insert a new movie and its related data into the database."""
    # Get all media URLs
    posters = fetch_all_images(movie_data['id'], "posters")
    backdrops = fetch_all_images(movie_data['id'], "backdrops")
    videos = fetch_all_youtube_videos(movie_data['id'])
    
    # Get the key poster URL using the original method
    keyposter_url = f"{IMAGE_BASE_URL}{movie_data.get('poster_path')}" if movie_data.get('poster_path') else None
    
    # Get all other URLs as comma-separated strings
    poster_urls = ",".join(posters) if posters else None
    backdrop_urls = ",".join(backdrops) if backdrops else None
    video_urls = ",".join(videos) if videos else None

    # Format numeric values properly
    vote_average = round(float(movie_data.get('vote_average', 0)), 1)  # Round to 1 decimal
    popularity = float(movie_data.get('popularity', 0))      # Keep original popularity value

    # Get reviews
    reviews = get_movie_reviews(movie_data['id'])
    reviews_text = "\n\n".join([f"Author: {review.get('author', 'Unknown')}\n{review.get('content', '')}" 
                               for review in reviews]) if reviews else None

    # Insert movie details with properly formatted values
    cursor.execute("""
        REPLACE INTO Movies (
            id, title, original_title, overview, budget, revenue,
            release_date, runtime, status, tagline, popularity,
            vote_average, vote_count, original_language, homepage,
            poster_url, backdrop_url, video_url, reviews, keyposter_url
        ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
    """, (
        movie_data['id'], movie_data['title'], movie_data['original_title'],
        movie_data['overview'], movie_data.get('budget', 0),
        movie_data.get('revenue', 0), movie_data['release_date'],
        movie_data.get('runtime', 0), movie_data.get('status', ''),
        movie_data.get('tagline', ''), popularity,  # Keep raw popularity value
        vote_average, # Format vote_average
        movie_data.get('vote_count', 0), movie_data['original_language'],
        movie_data['homepage'], poster_urls, backdrop_urls,
        video_urls, reviews_text, keyposter_url  # Added keyposter_url
    ))

    # Insert genres
    for genre in movie_data.get('genres', []):
        cursor.execute("SELECT genre_id FROM Genres WHERE genre_name = ?", (genre['name'],))
        genre_id = cursor.fetchone()
        if not genre_id:
            cursor.execute("INSERT INTO Genres (genre_name) VALUES (?)", (genre['name'],))
            genre_id = cursor.lastrowid
        else:
            genre_id = genre_id[0]
        cursor.execute("REPLACE INTO Movie_Genre (movie_id, genre_id) VALUES (?, ?)",
                      (movie_data['id'], genre_id))

# Insert production countries
    for country in movie_data.get('production_countries', []):
        cursor.execute("SELECT country_id FROM Production_Countries WHERE country_name = ? AND iso_code = ?",
                      (country['name'], country['iso_3166_1']))
        country_id = cursor.fetchone()
        if not country_id:
            cursor.execute("INSERT INTO Production_Countries (country_name, iso_code) VALUES (?, ?)",
                          (country['name'], country['iso_3166_1']))
            country_id = cursor.lastrowid
        else:
            country_id = country_id[0]
        cursor.execute("REPLACE INTO Movie_Production_Countries (movie_id, country_id) VALUES (?, ?)",
                      (movie_data['id'], country_id))

# Insert spoken languages
    for language in movie_data.get('spoken_languages', []):
        cursor.execute("SELECT language_id FROM Spoken_Languages WHERE language_name = ? AND iso_code = ?",                       (language['name'], language['iso_639_1']))
        language_id = cursor.fetchone()
        if not language_id:
            cursor.execute("INSERT INTO Spoken_Languages (language_name, iso_code) VALUES (?, ?)",
                          (language['name'], language['iso_639_1']))
            language_id = cursor.lastrowid
        else:
            language_id = language_id[0]
        cursor.execute("REPLACE INTO Movie_Spoken_Languages (movie_id, language_id) VALUES (?, ?)",
                      (movie_data['id'], language_id))

    # Fetch and insert actors
    cast = fetch_movie_credits(movie_data['id'])
    insert_actors_and_cast(movie_data['id'], cast)

# Fetch and insert keywords
    keywords = fetch_movie_keywords(movie_data['id'])
    insert_keywords(movie_data['id'], keywords)

    conn.commit()
    # print(f"Inserted movie: {movie_data['title']} (ID: {movie_data['id']})")

def fetch_and_insert_new_movies():
    """Fetch popular movies from TMDB API and insert new ones into the database."""
    url = f"{BASE_API_URL}movie/popular?language=en-US&page=1"
    headers = {
        "accept": "application/json",
        "Authorization": f"Bearer {TMDB_API_KEY}"
    }
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        popular_movies = response.json().get('results', [])
        existing_movie_ids = [row[0] for row in cursor.execute("SELECT id FROM Movies").fetchall()]

        for movie in popular_movies:
            if movie['id'] not in existing_movie_ids:
                movie_details = fetch_movie_details(movie['id'])
                if movie_details:
                    insert_movie_into_db(movie_details)
    else:
        print(f"Error fetching popular movies: {response.status_code}")

def test_insert_specific_movie(movie_id):
    """Test inserting a specific movie by its TMDB ID."""
    print(f"Testing insertion for movie ID: {movie_id}")
    existing_movie_ids = [row[0] for row in cursor.execute("SELECT id FROM Movies").fetchall()]

    if movie_id in existing_movie_ids:
        print(f"Movie ID {movie_id} already exists in the database.")
    else:
        movie_details = fetch_movie_details(movie_id)
        if movie_details:
            insert_movie_into_db(movie_details)
            print(f"Movie ID {movie_id} successfully added to the database.")
        else:
            print(f"Failed to fetch details for movie ID {movie_id}.")

def find_and_insert_first_new_movie():
    """Find the first movie not in the database, insert it, and print its ID."""
    url = f"{BASE_API_URL}movie/popular?language=en-US&page=1"
    headers = {
        "accept": "application/json",
        "Authorization": f"Bearer {TMDB_API_KEY}"
    }
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        popular_movies = response.json().get('results', [])
        existing_movie_ids = [row[0] for row in cursor.execute("SELECT id FROM Movies").fetchall()]

        for movie in popular_movies:
            if movie['id'] not in existing_movie_ids:
                movie_details = fetch_movie_details(movie['id'])
                if movie_details:
                    insert_movie_into_db(movie_details)
                    print(f"Inserted new movie with ID: {movie['id']}")
                    return
    else:
        print(f"Error fetching popular movies: {response.status_code}")

def fetch_trending_movies_enhanced(mode='all'):
    """Fetch trending movies with three modes: 'all', 'first', or 'random'.
    Args:
        mode (str): 'all' to fetch all movies, 'first' to fetch just the first movie,
                   'random' to fetch one random movie
    """
    current_page = 1
    total_pages = 1
    movies_processed = 0
    new_movies_added = 0
    movies_updated = 0
    movies_unchanged = 0

    print(f"Starting to fetch trending movies in {mode} mode...\n")
    
    try:
        while True:  # Break when we've processed enough based on mode
            url = f"{BASE_API_URL}trending/movie/week?language=en-US&page={current_page}"
            headers = {
                "accept": "application/json",
                "Authorization": f"Bearer {TMDB_API_KEY}"
            }

            response = requests.get(url, headers=headers)
            if response.status_code != 200:
                print(f"Error: {response.status_code}")
                break

            data = response.json()
            total_pages = min(data['total_pages'], 500)  # Limit to 500 pages maximum
            all_movies = data['results']

            if mode == 'first' and current_page == 1:
                # Only process the first movie from the first page
                movies_to_process = [all_movies[0]]
                process_single = True
            elif mode == 'random' and current_page == 1:
                # Only process one random movie from the first page
                import random
                movies_to_process = [random.choice(all_movies)]
                process_single = True
            else:
                # Process all movies in the page
                movies_to_process = all_movies
                process_single = False

            # Get existing movie IDs and their data
            movie_ids = [movie['id'] for movie in movies_to_process]
            placeholders = ','.join('?' * len(movie_ids))
            cursor.execute(f"""SELECT id, title, original_title, overview, budget, revenue,
                          release_date, runtime, status, tagline, popularity,
                          vote_average, vote_count, original_language, homepage, reviews,
                          keyposter_url
                          FROM Movies WHERE id IN ({placeholders})""", movie_ids)
            existing_movies = {row[0]: row[1:] for row in cursor.fetchall()}

            print(f"\nProcessing page {current_page}/{total_pages}...")

            # Process movies
            for basic_movie in movies_to_process:
                movies_processed += 1
                movie_id = basic_movie['id']
                movie_details = fetch_movie_details(movie_id)

                if not movie_details:
                    print(f"⚠ Failed to fetch details for movie ID {movie_id}")
                    continue

                title = movie_details['title']
                release_year = movie_details['release_date'][:4] if movie_details.get('release_date') else 'N/A'
                movie_info = f"{title} ({release_year}) [ID: {movie_id}]"

                if movie_id not in existing_movies:
                    # Get reviews for new movie
                    reviews = get_movie_reviews(movie_id)
                    reviews_text = "\n\n".join([f"Author: {review.get('author', 'Unknown')}\n{review.get('content', '')}" 
                                              for review in reviews]) if reviews else None
                    movie_details['reviews'] = reviews_text
                    insert_movie_into_db(movie_details)
                    new_movies_added += 1
                    print(f"✓ Added new movie: {movie_info}")
                else:
                    # Check if movie data has changed
                    db_data = existing_movies[movie_id]

                    # Get new reviews
                    reviews = get_movie_reviews(movie_id)
                    reviews_text = "\n\n".join([f"Author: {review.get('author', 'Unknown')}\n{review.get('content', '')}" 
                                              for review in reviews]) if reviews else None
                    movie_details['reviews'] = reviews_text

                    # Compare all data including reviews and keyposter_url
                    api_data = (
                        movie_details['title'],
                        movie_details['original_title'],
                        movie_details['overview'],
                        movie_details['budget'],
                        movie_details['revenue'],
                        movie_details['release_date'],
                        movie_details['runtime'],
                        movie_details['status'],
                        movie_details['tagline'],
                        float(movie_details.get('popularity', 0)),  # Keep raw popularity value
                        float(movie_details.get('vote_average', 0)),    # Format vote_average
                        movie_details['vote_count'],
                        movie_details['original_language'],
                        movie_details['homepage'],
                        reviews_text,
                        f"{IMAGE_BASE_URL}{movie_details.get('poster_path')}" if movie_details.get('poster_path') else None  # Add keyposter_url to comparison
                    )

                    if db_data != api_data:
                        # Movie exists and has changes - update it
                        insert_movie_into_db(movie_details)
                        movies_updated += 1
                        print(f"↻ Updated existing movie: {movie_info}")
                    else:
                        # No changes at all
                        movies_unchanged += 1
                        print(f"• Movie already exists (no changes): {movie_info}")

            # Break conditions
            if mode in ['first', 'random'] or current_page >= total_pages:
                break

            current_page += 1

        print("\n=== Summary ===")
        print(f"Pages processed: {current_page}/{total_pages}")
        print(f"Movies processed: {movies_processed}")
        print(f"New movies added: {new_movies_added}")
        print(f"Existing movies updated: {movies_updated}")
        print(f"Existing movies unchanged: {movies_unchanged}")

    except Exception as e:
        print(f"Error: {str(e)}")

### Update Database

In [11]:
cursor = conn.cursor()

# Example usage:
fetch_trending_movies_enhanced(mode='all')  # 'all', 'first', or 'random'

Starting to fetch trending movies in all mode...


Processing page 1/500...
• Movie already exists (no changes): Mickey 17 (2025) [ID: 696506]
• Movie already exists (no changes): A Minecraft Movie (2025) [ID: 950387]
↻ Updated existing movie: Novocaine (2025) [ID: 1195506]
↻ Updated existing movie: Black Bag (2025) [ID: 1233575]
• Movie already exists (no changes): The Amateur (2025) [ID: 1087891]
↻ Updated existing movie: Novocaine (2025) [ID: 1195506]
↻ Updated existing movie: Black Bag (2025) [ID: 1233575]
• Movie already exists (no changes): The Amateur (2025) [ID: 1087891]
↻ Updated existing movie: A Working Man (2025) [ID: 1197306]
↻ Updated existing movie: A Working Man (2025) [ID: 1197306]


KeyboardInterrupt: 

### Update Keyposters
Updates all existing movies in the database with their keyposter_url values

In [12]:
def update_all_keyposters():
    # Get all movie IDs from the database
    cursor.execute("SELECT id FROM Movies")
    movie_ids = [row[0] for row in cursor.fetchall()]

    total_movies = len(movie_ids)
    updated_count = 0
    already_had_poster = 0

    print(f"Found {total_movies} movies to process...\n")

    for idx, movie_id in enumerate(movie_ids, 1):
        # Check if movie already has a keyposter_url
        cursor.execute("SELECT keyposter_url FROM Movies WHERE id = ?", (movie_id,))
        current_poster = cursor.fetchone()[0]

        if current_poster:
            already_had_poster += 1
            continue

        # Fetch movie details from API
        movie_details = fetch_movie_details(movie_id)
        if not movie_details:
            print(f"⚠ Couldn't fetch details for movie ID {movie_id}")
            continue

        # Generate keyposter_url
        keyposter_url = f"{IMAGE_BASE_URL}{movie_details.get('poster_path')}" if movie_details.get('poster_path') else None

        if keyposter_url:
            # Update the movie with the new keyposter_url
            cursor.execute("UPDATE Movies SET keyposter_url = ? WHERE id = ?", (keyposter_url, movie_id))
            updated_count += 1
            print(f"[{idx}/{total_movies}] ✓ Updated keyposter for: {movie_details['title']}")
        else:
            print(f"[{idx}/{total_movies}] ⚠ No poster path found for: {movie_details['title']}")

        # Commit every 10 movies
        if idx % 10 == 0:
            conn.commit()

    # Final commit
    conn.commit()

    print("\n=== Summary ===")
    print(f"Total movies processed: {total_movies}")
    print(f"Movies already had posters: {already_had_poster}")
    print(f"Movies updated with new posters: {updated_count}")
    print(f"Movies without posters: {total_movies - (updated_count + already_had_poster)}")

# Run the update
cursor = conn.cursor()
update_all_keyposters()

Found 5130 movies to process...

[1/5130] ✓ Updated keyposter for: Four Rooms
[5/5130] ✓ Updated keyposter for: American Beauty
[7/5130] ✓ Updated keyposter for: Dancer in the Dark
[9/5130] ✓ Updated keyposter for: Metropolis
[5/5130] ✓ Updated keyposter for: American Beauty
[7/5130] ✓ Updated keyposter for: Dancer in the Dark
[9/5130] ✓ Updated keyposter for: Metropolis
[10/5130] ✓ Updated keyposter for: My Life Without Me
[13/5130] ✓ Updated keyposter for: Jarhead
[14/5130] ✓ Updated keyposter for: Apocalypse Now
[10/5130] ✓ Updated keyposter for: My Life Without Me
[13/5130] ✓ Updated keyposter for: Jarhead
[14/5130] ✓ Updated keyposter for: Apocalypse Now
[15/5130] ✓ Updated keyposter for: Unforgiven
[16/5130] ✓ Updated keyposter for: The Simpsons Movie
[17/5130] ✓ Updated keyposter for: Eternal Sunshine of the Spotless Mind
[15/5130] ✓ Updated keyposter for: Unforgiven
[16/5130] ✓ Updated keyposter for: The Simpsons Movie
[17/5130] ✓ Updated keyposter for: Eternal Sunshine of the 

### Update Key Videos
Updates all existing movies in the database with their keyvideo_url values (main trailer)

In [None]:
def update_all_keyvideos():
    # Get all movie IDs from the database
    cursor.execute("SELECT id FROM Movies")
    movie_ids = [row[0] for row in cursor.fetchall()]

    total_movies = len(movie_ids)
    updated_count = 0
    already_had_video = 0

    print(f"Found {total_movies} movies to process...\n")

    for idx, movie_id in enumerate(movie_ids, 1):
        # Check if movie already has a keyvideo_url
        cursor.execute("SELECT keyvideo_url FROM Movies WHERE id = ?", (movie_id,))
        current_video = cursor.fetchone()[0]

        if current_video:
            already_had_video += 1
            continue

        # Use fetch_movie_videos to get the main trailer
        keyvideo_url = fetch_movie_videos(movie_id)  # This uses our existing function

        if keyvideo_url:
            # Update the movie with the new keyvideo_url
            cursor.execute("UPDATE Movies SET keyvideo_url = ? WHERE id = ?", (keyvideo_url, movie_id))
            updated_count += 1
            print(f"[{idx}/{total_movies}] ✓ Updated key video for movie ID: {movie_id}")
        else:
            print(f"[{idx}/{total_movies}] ⚠ No trailer found for movie ID: {movie_id}")

        # Commit every 10 movies
        if idx % 10 == 0:
            conn.commit()

    # Final commit
    conn.commit()

    print("\n=== Summary ===")
    print(f"Total movies processed: {total_movies}")
    print(f"Movies already had videos: {already_had_video}")
    print(f"Movies updated with new videos: {updated_count}")
    print(f"Movies without videos: {total_movies - (updated_count + already_had_video)}")

# Run the update
cursor = conn.cursor()
update_all_keyvideos()