In [None]:
# Establish some constants

from pathlib import Path

DATA_FILE = Path("data/raw_data.jsonl.xz")

In [None]:
# Get current ledger

import json
import lzma
import requests

if DATA_FILE.exists():
    with lzma.open(DATA_FILE, 'r') as file:
        for line in file:
            pass
    obj = json.loads(line)
    current_ledger = obj['ledger_index'] - 1
else:
    response = requests.get("https://api.xrpscan.com/api/v1/ledgers").json()
    current_ledger = response['current_ledger']

print(f"Starting from ledger {current_ledger}")

In [None]:
from time import sleep

# Write to LZMA file
with lzma.open(DATA_FILE, 'a') as file:
    # Get the ledgers
    retry_count = 0
    while True:
        try:
            # Get a new ledger, breaking if xrpscan refused the request
            ledger_response = requests.get(f"https://api.xrpscan.com/api/v1/ledger/{current_ledger}?origin=mlabs.city")
            retry_count = 0
            code = ledger_response.status_code
            if code == 500:
                print(f"Internal server error when fetching ledger {current_ledger + 1}")
                current_ledger -= 1
                continue
            if code != 200:
                print(f"Received response code of {code} for ledger {current_ledger + 1}")
                break

            # Record the ledger object
            ledger = ledger_response.json()
            file.write(json.dumps(ledger).encode())
            file.write(b"\n")
            current_ledger -= 1

        # One keyboard interrupts, kill the loop gracefully
        except KeyboardInterrupt:
            break

        # On all other errors, sleep for 10 seconds, then try again a maximum of 10 times
        except:
            if retry_count >= 10:
                print("Error: connection refused too many times")
                break
            sleep(10)
            retry_count += 1