In [38]:
import requests
import pandas as pd
from dotenv import load_dotenv
import os
from datetime import datetime

# Load environment variables from .env file
load_dotenv()

start_year = '2024'
end_year = '2024'
order_by = 'date'
api_key_env_variable = 'NETFLIX_API_KEY'  # New environment variable for API key
limit = 100  # Example: Set the limit for each request

def extract_titles():
    all_data = []
    offset = 0

    # Continue fetching data until there are no more results left
    while True:
        url = f"https://api.apilayer.com/unogs/search/titles?start_year={start_year}&order_by={order_by}&end_year={end_year}&limit={limit}&offset={offset}"
        payload = {}
        headers = {"apikey": os.getenv(api_key_env_variable)}

        response = requests.request("GET", url, headers=headers, data=payload)
        data = response.json()

        # Check if there are no more results left
        if not data["results"]:
            break

        # Extract relevant fields from each result and append to all_data
        for result in data["results"]:
            all_data.append({
                "imdb_id": result["imdb_id"],
                "title": result["title"],
                "rating": result["rating"],
                "year": result["year"],
                "runtime": result["runtime"],
                "top250": result["top250"],
                "top250tv": result["top250tv"],
                "title_date": result["title_date"]
            })

        # Increment the offset for the next request
        offset += limit

    # Create a DataFrame from all_data
    df = pd.DataFrame(all_data)

    # Save the DataFrame as a new CSV file with timestamp
    timestamp = datetime.now().strftime('%Y%m%d%H%M%S')
    csv_file_name = f'Netflix2024_{timestamp}.csv'
    df.to_csv(csv_file_name, index=False)
    print(f"CSV file {csv_file_name} saved successfully.")

    # Display the DataFrame
    return df

# extract_titles()


In [39]:
extract_titles()

CSV file Netflix2024_20240307235501.csv saved successfully.


Unnamed: 0,imdb_id,title,rating,year,runtime,top250,top250tv,title_date
0,,Pokémon Horizons: The Series,,2024,0,0,0,2024-03-07
1,tt2910814,The Signal,6,2024,0,0,0,2024-03-07
2,tt8367814,The Gentlemen,7.8,2024,0,0,0,2024-03-07
3,,ARA San Juan: The Submarine that Disappeared,,2024,0,0,0,2024-03-07
4,tt28867594,Kultus Iblis,0,2024,5903,0,0,2024-03-07
...,...,...,...,...,...,...,...,...
134,,You Are What You Eat: A Twin Experiment,,2024,0,0,0,2024-01-01
135,tt0978808,Fool Me Once,7.3,2024,0,0,0,2024-01-01
136,,Bitconned,,2024,5647,0,0,2024-01-01
137,tt21742340,The Influencer,5.5,2024,0,0,0,2023-12-13


In [77]:
def extract_people():
    all_data = []
    offset = 500

    # Continue fetching data until there are no more results left
    while True:
        url = f"https://api.apilayer.com/unogs/search/people?person_type=Director"
        payload = {}
        headers = {"apikey": os.getenv(api_key_env_variable)}

        response = requests.request("GET", url, headers=headers, data=payload)
        data = response.json()

        # Check if there are no more results left
        if not data["results"]:
            break

        # Extract relevant fields from each result and append to all_data
        for result in data["results"]:
            all_data.append({
                "netflix_id": result["netflix_id"],
                "full_name": result["full_name"],
                "person_type": result["person_type"],
                "title": result["title"]
            })
        
        # Increment the offset for the next request
        offset += limit
        print(f'loaded{offset} data')

    # Create a DataFrame from all_data
    df = pd.DataFrame(all_data)

    # Save the DataFrame as a new CSV file with timestamp
    timestamp = datetime.now().strftime('%Y%m%d%H%M%S')
    csv_file_name = f'Netflix_people_{timestamp}.csv'
    df.to_csv(csv_file_name, index=False)
    print(f"CSV file {csv_file_name} saved successfully.")

    # Display the DataFrame
    return df



In [78]:
extract_people()

loaded600 data
loaded700 data
loaded800 data
loaded900 data




KeyboardInterrupt: 

In [79]:
import requests

person_type='Actor'

url = f"https://api.apilayer.com/unogs/search/people?person_type=Director"

payload = { }

headers= {
    "apikey": "LQpuQgeu4U6qvBAKZd4TcJeDHGDh4RFf"
}

response = requests.get(url, headers=headers, params=payload)

status_code = response.status_code
result = response.text

print(status_code)
print(result)


200
{
    "Object": {
        "total": 515128,
        "limit": 100,
        "offset": 0
    },
    "results": [
        {
            "netflix_id": 70305209,
            "full_name": " Alfred Molina",
            "person_type": "Actor",
            "title": "We&#39;ll Never Have Paris"
        },
        {
            "netflix_id": 70306661,
            "full_name": " Amanda Fuller",
            "person_type": "Actor",
            "title": "Starry Eyes"
        },
        {
            "netflix_id": 70307594,
            "full_name": " Corey Eid",
            "person_type": "Actor",
            "title": "Alien Abduction"
        },
        {
            "netflix_id": 80007406,
            "full_name": " Dan Stevens",
            "person_type": "Actor",
            "title": "A Walk Among the Tombstones"
        },
        {
            "netflix_id": 70307652,
            "full_name": " David Henrie",
            "person_type": "Actor",
            "title": "Little Boy"
        },
     

In [80]:
import requests
import pandas as pd
from dotenv import load_dotenv
import os

# Load environment variables from .env file
load_dotenv()

def load_people_data(titles_df):
    api_key_env_variable = 'NETFLIX_API_KEY'  # New environment variable for API key

    # Extract IMDb IDs from the titles DataFrame
    imdb_ids = titles_df['imdb_id'].tolist()

    all_people_data = []

    # Iterate through each IMDb ID and fetch people associated with the title
    for imdb_id in imdb_ids:
        url = f"https://api.apilayer.com/unogs/search/people"
        payload = {"imdb_id": imdb_id}
        headers = {"apikey": os.getenv(api_key_env_variable)}

        response = requests.get(url, headers=headers, params=payload)
        data = response.json()

        # Append people data to the list
        all_people_data.extend(data["results"])

    # Create a DataFrame from all_people_data
    people_df = pd.DataFrame(all_people_data)

    return people_df

# Example usage:
# Load titles data
titles_df = pd.read_csv('Netflix2024_20240307235501.csv')

# Load people's data based on the titles
people_df = load_people_data(titles_df)

# Save the DataFrame as a new CSV file
timestamp = datetime.now().strftime('%Y%m%d%H%M%S')
csv_file_name = f'Netflix_people_{timestamp}.csv'
people_df.to_csv(csv_file_name, index=False)
print(f"CSV file {csv_file_name} saved successfully.")


FileNotFoundError: [Errno 2] No such file or directory: 'Netflix2024_YYYYMMDDHHMMSS.csv'