In [1]:
import requests
import json
import random
import time
from pathlib import Path

In [2]:
def load_config(new=False):
    if new:
        config_path = Path('config_extract_new_car.json')
    else:
        config_path = Path('config_extract.json')
    if config_path.exists():
        with open(config_path) as f:
            return json.load(f)
    else:
        raise FileNotFoundError("Config file not found. Ensure 'config_extract.json' is present and correctly configured.")


In [3]:
def get_response_json(first_page, last_page, rescraping=False, new=False):
    config = load_config(new=new)
    url = config["url"]
    headers = config["headers"]
    payload_template = config["payload_template"]

    for page in range(first_page, last_page + 1):
        # Update payload with the current page number
        payload = payload_template.copy()
        payload["pagination"]["page"] = page
        payload = json.dumps(payload)

        response = requests.post(url, headers=headers, data=payload)

        if response.status_code != 200:
            print(f"Page {page} had an issue. Status code: {response.status_code}")
            continue
        else:
            print(f"\nPage: {page} | Status code: {response.status_code}\n")

        re = 're' if rescraping else ''
        new_car = 'new_car'if new else ''
        with open(f'./01_{new_car}_{re}scraping_json_files/page_{page}.json', 'w') as fp:
            json.dump(response.json()['items'], fp)

        delay = random.uniform(3, 5)
        print(f"Waiting {delay:.2f} seconds before the next request.")
        time.sleep(delay)

    return "script completed"

In [4]:
# get_response_json(1, 260,rescraping=False, new=True)


Page: 1 | Status code: 200

Waiting 3.65 seconds before the next request.

Page: 2 | Status code: 200

Waiting 3.33 seconds before the next request.

Page: 3 | Status code: 200

Waiting 4.12 seconds before the next request.

Page: 4 | Status code: 200

Waiting 4.67 seconds before the next request.

Page: 5 | Status code: 200

Waiting 3.59 seconds before the next request.

Page: 6 | Status code: 200

Waiting 4.91 seconds before the next request.

Page: 7 | Status code: 200

Waiting 4.33 seconds before the next request.

Page: 8 | Status code: 200

Waiting 3.40 seconds before the next request.

Page: 9 | Status code: 200

Waiting 3.02 seconds before the next request.

Page: 10 | Status code: 200

Waiting 4.89 seconds before the next request.

Page: 11 | Status code: 200

Waiting 4.78 seconds before the next request.

Page: 12 | Status code: 200

Waiting 4.31 seconds before the next request.

Page: 13 | Status code: 200

Waiting 3.17 seconds before the next request.

Page: 14 | Status co

'script completed'