In [None]:
import requests
import pandas as pd

def get_movies_table(api_key):
    """
    gets data from movies from TMDb's Discover endpoint and returns a dataframe at least
    30 movies. for each movie, it gathers details on the title, the release year,
    the genres, the average rating, the runtime, the actor starring in it, and the director.
    It is sorted by highest to lowest rating.

    Args: 
        api_key: user's api key from the TMDb API

    Returns:
        df: dataframe of the details of the movies collected

    """
    movie_list = []
    page = 1
    url = "https://api.themoviedb.org/3/discover/movie"
    
    # loops until 30 movies have been collected
    while len(movie_list) < 30:
        params = {
            "api_key": api_key,
            "sort_by": "popularity.desc",
            "page": page,
            "with_original_language": "en"
        }
        response = requests.get(url, params=params)
        data = response.json()
        movies = data.get("results", [])
        
        # gathers desired details about movie
        for movie in movies:
            # uses movie id to get specific movie
            id = movie.get("id")
            
            # gets detailed movie information
            movie_details_url = f"https://api.themoviedb.org/3/movie/{id}"
            details = requests.get(movie_details_url, params={"api_key": api_key}).json()
            
            # gets movie credits to extract the director and starring actor
            credits_url = f"https://api.themoviedb.org/3/movie/{id}/credits"
            credits = requests.get(credits_url, params={"api_key": api_key}).json()
            director = ""
            writer = ""
            actor = ""
            for member in credits.get("crew", []):
                if member.get("job") == "Director":
                    director = member.get("name")
                    break
            
            for member in credits.get("crew", []):
                if member.get("job") == "Writer":
                    writer = member.get("name")
                    break
                    
            for member in credits.get("cast", []):
                if member.get("order") == 0:
                    actor = member.get("name")
                    break

            # gets the release year and genres
            release_date = details.get("release_date", "")
            year = release_date.split("-")[0] if release_date else ""
            genres = ", ".join([genre["name"] for genre in details.get("genres", [])])

            language = details.get("original_language", "")
            plot = details.get("overview", "")
            country = details.get("origin_country", "")
            
            # builds data frame
            movie_data = {
                "Title": details.get("title"),
                "Year": year,
                "Genres": genres,
                "Rating": details.get("vote_average"),
                "Runtime": details.get("runtime"),
                "Director": director,
                "Writer": writer,
                "Actors": actor,
                "Plot": plot,
                "Language": language,
                "Country": country
            }
            movie_list.append(movie_data)

        # moves on to the next page so it is not stuck
        page += 1 
    
    # Convert to DataFrame, sort by rating, and reset the index
    df = pd.DataFrame(movie_list)
    df = df.sort_values("Rating", ascending=False).reset_index(drop=True)
    return df


In [None]:
API_KEY = "7533fe894d6e805ff7405c6c1c62d796"
movies_df = get_movies_table(API_KEY)
# added data to csv so that my API key does not need to be shared
# included csv in submission
movies_df.to_csv("movies_data.csv", index=False)
movies_df