In [1]:
import json
import requests
import os
import pandas as pd
import time
from title_processing import process_titles_data_batch_worker
from multiprocessing import Pool, Process, Manager

In [2]:
def request_netflix_titles(
        params,
        cookies, 
        headers,
        genre_id = "34399",
        from_ix = 0,
        to_ix = 100,
    ):

    genre_item_summ_path = f'["genres",{genre_id},"az",{{"from":{from_ix},"to":{to_ix}}},"itemSummary"]'
    genre_item_ref_summ_path = f'["genres",{genre_id},"az",{{"from":{from_ix},"to":{to_ix}}},"reference",["availability","episodeCount","inRemindMeList","queue","summary"]]'
    data = {
        'path': [
            genre_item_summ_path,
            genre_item_ref_summ_path,
        ]
    }

    response = requests.post(
        'https://www.netflix.com/nq/website/memberapi/vc63e5850/pathEvaluator',
        params=params,
        cookies=cookies,
        headers=headers,
        data=data,
    )

    print(f"fetch status code --> {response.status_code}")
    return response



In [3]:
def process_titles_data_batch(genre_id, summary_data, batch_size=6, wait_time=15):
    title_info_list = []

    with Manager() as manager:
        result_list = manager.list()

        items = list(summary_data["jsonGraph"]["genres"][genre_id]['az'].values())
        batches = [items[i:i + batch_size] for i in range(0, len(items), batch_size)]

        processes = []

        for i, batch in enumerate(batches, start=1):
            print(f"Processing Batch {i}/{len(batches)}")
            process = Process(target=process_titles_data_batch_worker, args=(batch, result_list))
            processes.append(process)
            process.start()

            # Sleep for 15 seconds before starting the next process
            print(f"Time to take a break for {wait_time} seconds while work is being done in the background. Be patient!")
            time.sleep(wait_time)

        for process in processes:
            process.join()

        title_info_list.extend(filter(None, result_list))

    return title_info_list


In [6]:
def main():

    genre_id = "34399"
    from_ix = 4001
    to_ix = 5000
    batch_size = 60
    wait_time = 36
    
    params = {
        'webp': 'true',
        'drmSystem': 'widevine',
        'isVolatileBillboardsEnabled': 'true',
        'routeAPIRequestsThroughFTL': 'false',
        'hasVideoMerchInBob': 'true',
        'hasVideoMerchInJaw': 'true',
        'falcor_server': '0.1.0',
        'withSize': 'true',
        'materialize': 'true',
        'original_path': '/shakti/mre/pathEvaluator',
    }

    cookies = {
        'SecureNetflixId': '<your_netflix_id>',
        'NetflixId': '<your_netflix_id>',
        'profilesNewSession': '0'
    }

    headers = {
        'authority': 'www.netflix.com',
        'accept': '*/*',
        'accept-language': 'en-GB,en;q=0.7',
        'content-type': 'application/x-www-form-urlencoded',
    }

    response = request_netflix_titles(
        params=params,
        cookies=cookies, 
        headers=headers, 
        from_ix=from_ix, 
        to_ix=to_ix,
    )
    
    if response.status_code == 200:
        summary_data = response.json()
        title_info_list = process_titles_data_batch(
            genre_id, 
            summary_data, 
            batch_size=batch_size,
            wait_time=wait_time,
        )
        df = pd.DataFrame.from_records(title_info_list)

        if not df.empty:

            csv_foldername = "csv_files"
            if not os.path.isdir(csv_foldername):
                os.makedirs(csv_foldername)
                
    
            csv_filename = os.path.join(
                csv_foldername, 
                f"netflix_movies_from_{from_ix}_to_{to_ix}.csv"
            )
            df.to_csv(csv_filename, index=False)
            print(f"CSV file: {csv_filename} saved successfully!")
            return df
        else:
            print("nothing to save...")
            return

    else:
        print("failed to get data. Input correct Netflix Id or check on with correct genre_id and index")

In [7]:
%%time
main()

fetch status code --> 200
Processing Batch 1/17
Time to take a break for 36 seconds while work is being done in the background. Be patient!
Processing Batch 2/17
Time to take a break for 36 seconds while work is being done in the background. Be patient!
Processing Batch 3/17
Time to take a break for 36 seconds while work is being done in the background. Be patient!
Processing Batch 4/17
Time to take a break for 36 seconds while work is being done in the background. Be patient!
Processing Batch 5/17
Time to take a break for 36 seconds while work is being done in the background. Be patient!
Processing Batch 6/17
Time to take a break for 36 seconds while work is being done in the background. Be patient!
Processing Batch 7/17
Time to take a break for 36 seconds while work is being done in the background. Be patient!
Processing Batch 8/17
Time to take a break for 36 seconds while work is being done in the background. Be patient!
Processing Batch 9/17
Time to take a break for 36 seconds whil