In [1]:
from dotenv import load_dotenv

load_dotenv(".env")

True

In [3]:
import requests
import os 

URL = os.getenv("URL") 
USER_ID = os.getenv("USER_ID")
API_KEY = os.getenv("API_KEY")

def get_amount() -> int: 
    """
    Gets the amount of Movie items in user access 
    
    Returns integer amoutn 
    """
    res = requests.get(f'{URL}/emby/Users/{USER_ID}/Items?StartIndex=0&Limit=1&Recursive=true&IncludeItemTypes=Movie&api_key={API_KEY}&Filters=IsPlayed&Fields=Budget,Genres,Overview,People,Revenue,Studios,Taglines,ProviderIds')
    res.raise_for_status()

    return res.json().get("TotalRecordCount")

def get_chunk(start: int = 0, chunk: int = 20) -> list: 
    """
    Gets a chunk of movie information from Jellyfin
    
    Optional starting point and chunk size 

    Returns a List of dictionaries with movie info
    """
    res = requests.get(f'{URL}/emby/Users/{USER_ID}/Items?StartIndex={start}&Limit={chunk}&Recursive=true&IncludeItemTypes=Movie&api_key={API_KEY}&Filters=IsPlayed&Fields=Budget,Genres,Overview,People,Revenue,Studios,Taglines,ProviderIds')
    res.raise_for_status()

    return res.json().get("Items")

all_movies = list() 

size = get_amount()
start = 0 
chunk = 20 

while start < size: 
    print(f'Getting movies {start} - {start + chunk} of {size}')
    all_movies.extend(get_chunk(start=start, chunk=chunk))
    start += chunk 

Getting movies 0 - 20 of 496
Getting movies 20 - 40 of 496
Getting movies 40 - 60 of 496
Getting movies 60 - 80 of 496
Getting movies 80 - 100 of 496
Getting movies 100 - 120 of 496
Getting movies 120 - 140 of 496
Getting movies 140 - 160 of 496
Getting movies 160 - 180 of 496
Getting movies 180 - 200 of 496
Getting movies 200 - 220 of 496
Getting movies 220 - 240 of 496
Getting movies 240 - 260 of 496
Getting movies 260 - 280 of 496
Getting movies 280 - 300 of 496
Getting movies 300 - 320 of 496
Getting movies 320 - 340 of 496
Getting movies 340 - 360 of 496
Getting movies 360 - 380 of 496
Getting movies 380 - 400 of 496
Getting movies 400 - 420 of 496
Getting movies 420 - 440 of 496
Getting movies 440 - 460 of 496
Getting movies 460 - 480 of 496
Getting movies 480 - 500 of 496


In [4]:
# Columns to keep
columns_to_keep = [
    "Name",
    "PremiereDate",
    "CriticRating",
    "OfficialRating",
    "Overview",
    "Taglines",
    "Genres",
    "CommunityRating",
    "RunTimeTicks",
    "ProductionYear",
    "People",
    "Studios",
    "GenreItems",
    "UserData",
]

# Function to filter a single movie dictionary
def filter_movie(movie):
    filtered = {key: value for key, value in movie.items() if key in columns_to_keep}

    # Filter nested People
    if "People" in filtered:
        filtered["People"] = [
            {k: v for k, v in person.items() if k in ["Name", "Id", "Role", "Type"]}
            for person in filtered["People"]
        ]

    # Filter Studios (keep only 'Name')
    if "Studios" in filtered:
        filtered["Studios"] = [{"Name": studio["Name"]} for studio in filtered["Studios"] if "Name" in studio]

    # Filter GenreItems (keep only 'Name')
    if "GenreItems" in filtered:
        filtered["GenreItems"] = [{"Name": genre["Name"]} for genre in filtered["GenreItems"] if "Name" in genre]

    # Filter UserData (keep only 'IsFavorite')
    if "UserData" in filtered:
        filtered["IsFavorite"] = filtered["UserData"].get("IsFavorite")
        del filtered["UserData"]

    return filtered

# Process all movies
filtered_movies = [filter_movie(movie) for movie in all_movies]