In [9]:
import json
import requests
import warnings
warnings.filterwarnings("ignore")


# Global constants for headers and config file path
HEADERS = {
    'authority': 'www.hotstar.com',
    'accept': 'application/json, text/plain, */*',
    'accept-language': 'eng',
    'referer': 'https://www.hotstar.com/in/browse/editorial?tray_id=8554&tray_title=Free%2B-%2BNewly%2BAdded&tray_type=editorial',
    'sec-ch-ua': '"Not A(Brand";v="99", "Brave";v="121", "Chromium";v="121"',
    'sec-ch-ua-mobile': '?0',
    'sec-fetch-mode': 'cors',
    'sec-fetch-site': 'same-origin',
    'sec-gpc': '1',
    'user-agent': 'HelloRosenta',
    'x-country-code': 'in',
    'x-hs-accept-language': 'eng',
    'x-hs-platform': 'web',
    'x-hs-usertoken': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhcHBJZCI6IiIsImF1ZCI6InVtX2FjY2VzcyIsImV4cCI6MTcxNTQzMzc3NywiaWF0IjoxNzE1MzQ3Mzc3LCJpc3MiOiJUUyIsImp0aSI6IjllN2RlZTc0NzI3NzQ2MjA5MzhhZjI4MTNhODhiNzA4Iiwic3ViIjoie1wiaElkXCI6XCI5MTFlYWFlMDY2Yzc0N2JkOGUzZjA2NTc3ZTc5MjVkN1wiLFwicElkXCI6XCJhNGJjMWNkOWVlNTg0YjE4OGMyY2NiZjk2N2JkYWNjNVwiLFwibmFtZVwiOlwiWW91XCIsXCJpcFwiOlwiMTY1LjIyNS4xMjEuNTJcIixcImNvdW50cnlDb2RlXCI6XCJpblwiLFwiY3VzdG9tZXJUeXBlXCI6XCJudVwiLFwidHlwZVwiOlwiZ3Vlc3RcIixcImlzRW1haWxWZXJpZmllZFwiOmZhbHNlLFwiaXNQaG9uZVZlcmlmaWVkXCI6ZmFsc2UsXCJkZXZpY2VJZFwiOlwiN2ExY2IyLTcxOTQxYS01Yjk3ZmItNjIxNVwiLFwicHJvZmlsZVwiOlwiQURVTFRcIixcInZlcnNpb25cIjpcInYyXCIsXCJzdWJzY3JpcHRpb25zXCI6e1wiaW5cIjp7fX0sXCJpc3N1ZWRBdFwiOjE3MTUzNDczNzc5MDMsXCJtYXR1cml0eUxldmVsXCI6XCJBXCIsXCJkcGlkXCI6XCJhNGJjMWNkOWVlNTg0YjE4OGMyY2NiZjk2N2JkYWNjNVwiLFwic3RcIjoxLFwiZGF0YVwiOlwiQ2dRSUFCSUFDZ3dJQUNJSWtBSFExNGpMempFS0JBZ0FPZ0FLQkFnQU1nQUtsQUVJQUNxUEFRb0NDZ0FLQkFvQ0NBSUthUW9IQ0FFVkFBQUFRQklLQ2dOb2FXNGx2eGNBUHhJS0NnTjBZVzBsK2ZRS1BoSUtDZ04wWld3bHBlemtQUklLQ2dOaVpXNGxkaWVjUFJJS0NnTnRZV3dsTWN1VFBSSUtDZ050WVhJbGNDZEVQUklLQ2dObGJtY2wxZjA1UFJJS0NnTnJZVzRsU3pFclBBb0xDZ0lJQXhJRkNnTm9hVzRLQ3dvQ0NBUVNCUW9EYUdsdUNpNElBRUlxQ2loQ1pXRTFZakl5WldNd01tSTVOREkyTm1GbVpUZ3paamt5WldFeVl6ZzVOREJoVUVack1sbHRcIn0iLCJ2ZXJzaW9uIjoiMV8wIn0.0Li6AhHcm-O7b0uAk1XY6cA2ApPHOqVpH4XIjNcfJ_4',
    'x-request-id': '68674c-7bbd00-762e4b-43a054',
}

CONFIG_FILE_PATH = "./config.json"

In [10]:
def start_page_items(tray_id: str, tray_type: str) -> dict:
    """
    Fetches items from the start page for the given tray.

    Args:
    - tray_id: ID of the tray
    - tray_type: Type of the tray

    Returns:
    - Dictionary of content titles and their corresponding image URLs
    """
    content_vertical_img_urls = {}
    json_data = {
        'deeplink_url': f'/in/browse/{tray_type}?tray_id={tray_id}',
        'app_launch_count': 100,
    }
    response = requests.post('https://www.hotstar.com/api/internal/bff/v2/start',
                             headers=HEADERS,
                             json=json_data,
                             verify=False,
                             )
    if response.status_code == 200:
        response_data = response.json()
        if response_data.get('success'):
            for widget_data in response_data["success"]["page"]["spaces"]["tray"]["widget_wrappers"][4]["widget"]["data"]["items"]:
                content_poster_data = widget_data["horizontal_content_card"]["data"]
                # print(content_poster_data["actions"]["on_click"][-1]["page_navigation"]["page_slug"])

                image_suffix = content_poster_data["image"]["src"]
                content_title = content_poster_data["expanded_content_poster"]["content_info"]["title"]
                image_url = f"https://img10.hotstar.com/image/upload/{image_suffix}"
                content_vertical_img_urls[content_title] = image_url
    else:
        print("***** Data retrieval was not successful! *****")

    
    return content_vertical_img_urls


def get_tokenised_items(tray_id: str, tray_type: str, offset_start: int, offset_limit: int, token_data: dict = {}, ipv_mode: str = "sort") -> dict:
    """
    Fetches tokenised items for the given tray.

    Args:
    - tray_id: ID of the tray
    - tray_type: Type of the tray
    - offset_start: Start offset
    - offset_limit: Limit of items to fetch
    - token_data: Data for tokenisation
    - ipv_mode: Mode for ipv

    Returns:
    - Dictionary of content titles and their corresponding image URLs
    """
    content_vertical_img_urls = {}
    if not token_data:
        token_data = {
            "offset": offset_start,
            "limit": offset_limit,
            "collectionId": tray_id,
        }
        token_data = json.dumps(token_data)

    params = {
        'token': token_data,
        'tray_id': tray_id,
        'tray_type': tray_type,
        "ipv_mode": ipv_mode
    }

    response = requests.get('https://www.hotstar.com/api/internal/bff/v2/pages/2032/spaces/7455/widgets/54233/items',
                            params=params,
                            headers=HEADERS,
                            verify=False,
                            )
    try:
        if response.status_code == 200:
            response_data = response.json()
            for widget_data in response_data["success"]["widget_wrapper"]["widget"]["data"]["items"]:
                content_poster_data = widget_data["vertical_content_poster"]["data"]
                # print(content_poster_data["actions"]["on_click"][-1]["page_navigation"]["page_slug"])
                image_suffix = content_poster_data["image"]["src"]
                content_title = content_poster_data["expanded_content_poster"]["content_info"]["title"]
                image_url = f"https://img10.hotstar.com/image/upload/{image_suffix}"
                content_vertical_img_urls[content_title] = image_url
    except Exception as e:
        print(response_data)
        print(f"params: {params}")
        raise e

    return content_vertical_img_urls


In [11]:
def process_tray(tray_config: dict) -> dict:
    """
    Process a tray based on the configuration.

    Args:
    - tray_config: Configuration of the tray

    Returns:
    - Dictionary of content titles and their corresponding image URLs
    """
    tray_title = tray_config["tray_title"]
    tray_id = tray_config["tray_id"]
    tray_type = tray_config["tray_type"]
    offset_iteration = tray_config.get("offset_iteration", 1)
    max_offset_limit = tray_config.get("offset_limit", None)
    tokens = tray_config.get("tokens", [])

    print(f"\nProcessing Data for Bucket: {tray_title}")
    content_vertical_img_urls = {}

    if max_offset_limit is not None:
        start = 0
        for ix in range(offset_iteration):
            end = len(content_vertical_img_urls) + int(max_offset_limit / offset_iteration)
            print(f"*************** ix {ix + 1}: offset from {start} to {end}")
            content_vertical_img_urls.update(
                get_tokenised_items(
                    tray_id=tray_id,
                    tray_type=tray_type,
                    offset_start=start,
                    offset_limit=end,
                )
            )
            start = end
    else:
        content_vertical_img_urls.update(
            start_page_items(
                tray_id=tray_id,
                tray_type=tray_type,
            )
        )

        for token in tokens:
            content_vertical_img_urls.update(
                get_tokenised_items(
                    tray_id=tray_id,
                    tray_type=tray_type,
                    token_data=token,
                    offset_start=0,
                    offset_limit=10,
                )
            )

    print(f"Count of images for bucket for bucket: '{tray_title}' -> {len(content_vertical_img_urls)}")
    return {tray_title: content_vertical_img_urls}


def process_trays(tray_config_data_list: list) -> dict:
    """
    Process multiple trays based on their configurations.

    Args:
    - tray_config_data_list: List of tray configurations

    Returns:
    - Dictionary with tray titles as keys and their corresponding content titles and image URLs
    """
    buckets_contents = {}
    for tray_config in tray_config_data_list:
        bucket_content = process_tray(tray_config)
        buckets_contents.update(bucket_content)
        print("#" * 150)
    return buckets_contents


In [12]:

if __name__ == "__main__":
    tray_config_data_list = json.load(open(CONFIG_FILE_PATH))
    buckets_contents = process_trays(tray_config_data_list)
    bucket_content_img_url_file_path = "./buckets_contents_image_urls.json"
    with open(bucket_content_img_url_file_path, "w+") as f:
        json.dump(buckets_contents, f)
    print(f"Saved {bucket_content_img_url_file_path} Successfully!")



Processing Data for Bucket: Free - Newly Added
*************** ix 1: offset from 0 to 100
Count of images for bucket for bucket: 'Free - Newly Added' -> 37
######################################################################################################################################################

Processing Data for Bucket: Hand-Picked For Couples
*************** ix 1: offset from 0 to 100
Count of images for bucket for bucket: 'Hand-Picked For Couples' -> 20
######################################################################################################################################################

Processing Data for Bucket: Popular Shows
Count of images for bucket for bucket: 'Popular Shows' -> 160
######################################################################################################################################################

Processing Data for Bucket: Movies from the 2010s
*************** ix 1: offset from 0 to 100
Count of images for buck

In [22]:
all_bkt_vertical_links = {}

for v in buckets_contents.values():
    all_bkt_vertical_links.update(v)

with open("all_bkt_vertical_links.json", "w+") as f:
    json.dump(all_bkt_vertical_links, f)
