# Neo4J
https://neo4j.com/sandbox/

# https://www.themoviedb.org/


In [1]:
import requests
import shutil
import gzip
from datetime import date
import json
import getpass
from neo4j import GraphDatabase
import time
from datetime import date
from dotenv import dotenv_values

## Set up connection information

In [2]:
config = dotenv_values(".env")

In [3]:
tmdb_api_key = config.get("MOVIES-API")

In [4]:
neo4j_password = config.get("NEO4J")

In [5]:
neo4j_database_name = "neo4j"
neo4j_user = "neo4j"
bolt_url = config.get("NEO4J-URL")

In [6]:
driver = GraphDatabase.driver(bolt_url, auth=(neo4j_user, neo4j_password))

## Create Neo4j constraints

In [7]:
with driver.session(database = neo4j_database_name) as session:
    session.run("CREATE CONSTRAINT movie_tmdb_id_node_key IF NOT EXISTS FOR (m:Movie) REQUIRE m.tmdbId IS NODE KEY")

## Import movies

In [8]:
def call_discover_api(page, first_release_date):
    """Get a page of results following a release date.
    Exclude video only releases and adult titles."""
    today = date.today()
    formatted_date = today.strftime("%Y-%m-%d")
    params = {"include_adult":False,
              "include_video":False,
              "language":"en-US",
              "page":page,
              "primary_release_date.gte": first_release_date,
              "primary_release_date.lte": formatted_date,
              "sort_by":"primary_release_date.asc",
              "api_key": tmdb_api_key}
    movie_url = "https://api.themoviedb.org/3/discover/movie"
    api_response = requests.get(movie_url, params=params)
    return json.loads(api_response.text)

In [9]:
def send_to_neo4j(result):
    """Upload movie results to Neo4j"""
    neo4j_keys = ['id', 'overview', 'release_date', 'title']
    neo4j_names = {"id": "tmdbId", "release_date": "releaseDate"}
    neo4j_data = [{neo4j_names.get(k, k): d[k] for k in neo4j_keys} for d in result['results']]
    driver.execute_query("""
    UNWIND $data AS row
    MERGE (m:Movie {tmdbId:row['tmdbId']})
    ON CREATE SET m += row
    """,
                        {"data": neo4j_data})
    

In [10]:
def process_page(page, first_release_date):
    """Get a page of results and send them to Neo4j. 
    If we have hit the API's max number of pages for a release date, start over at the highest released date retrieved so far."""
    page_data = call_discover_api(page, first_release_date)
    movies_returned = len(page_data['results'])
    max_date_processed = first_release_date
    if movies_returned > 0:
        send_to_neo4j(page_data)
        max_date_processed = max([r.get("release_date") for r in page_data['results']])
    return movies_returned, max_date_processed

In [11]:
def process_all_from_date(release_date):
    """Get all possible movies starting at a release date."""
    current_release_date = release_date
    movies_returned = 1
    previous_date = None
    while (current_release_date < date.today().strftime("%Y-%m-%d") 
           and current_release_date != previous_date): 
        print(f"Getting movies starting at {current_release_date}")
        page = 1    
        while movies_returned > 0 and page <= 500:
            movies_returned, max_release_date = process_page(page, current_release_date)
            time.sleep(2)
            page = page + 1
            if page % 5 == 0:
                print(f"Finished page {page} for date {current_release_date} through {max_release_date}")
        previous_date = current_release_date
        current_release_date = max_release_date

In [12]:
process_all_from_date("2023-10-14")

Getting movies starting at 2023-10-14
Finished page 5 for date 2023-10-14 through 2023-10-14
Finished page 10 for date 2023-10-14 through 2023-10-15
Finished page 15 for date 2023-10-14 through 2023-10-16
Finished page 20 for date 2023-10-14 through 2023-10-17
Finished page 25 for date 2023-10-14 through 2023-10-18
Finished page 30 for date 2023-10-14 through 2023-10-19
Finished page 35 for date 2023-10-14 through 2023-10-19
Finished page 40 for date 2023-10-14 through 2023-10-20
Finished page 45 for date 2023-10-14 through 2023-10-20
Finished page 50 for date 2023-10-14 through 2023-10-21
Finished page 55 for date 2023-10-14 through 2023-10-21
Finished page 60 for date 2023-10-14 through 2023-10-22
Finished page 65 for date 2023-10-14 through 2023-10-23
Finished page 70 for date 2023-10-14 through 2023-10-24
Finished page 75 for date 2023-10-14 through 2023-10-25
Finished page 80 for date 2023-10-14 through 2023-10-25
Finished page 85 for date 2023-10-14 through 2023-10-26
Finished pa