In [1]:
import warnings
import polars as pl
import numpy as np
import requests
import json
from requests.adapters import HTTPAdapter, Retry
from pathlib import Path
from tqdm import tqdm
import os
import sys

In [2]:
s = requests.Session()
retry = Retry(
    total=10,
    backoff_factor=0.5,
    status_forcelist=[500, 502, 503, 504],
allowed_methods=['GET']
)
s.mount('https://', HTTPAdapter(max_retries=retry))
s.headers['Accept'] = "application/json;odata.metadata=none"

In [3]:
indicators_response = s.get("https://ghoapi.azureedge.net/api/Indicator?$format=json", timeout=60)
indicators = [row['IndicatorCode'] for row in indicators_response.json()['value']]

In [4]:
existing = os.listdir('data')

for indicator in tqdm(indicators):
	if f"{indicator}.parquet" in existing:
			continue

	url = f"https://ghoapi.azureedge.net/api/{indicator}"

	for attempt in range(3):
		try:
			new_request = s.get(url)
			break
		except (requests.exceptions.ChunkedEncodingError, requests.exceptions.ConnectionError):
			if attempt == 2: sys.exit(f"failed on {indicator}")
			continue

	try:
		extract = new_request.json()
	except json.decoder.JSONDecodeError:
		continue

	if len(extract['value']) == 0:
		continue

	new_df = pl.from_dicts(extract['value'], infer_schema_length=None)
	new_df.write_parquet(f'data/{indicator}.parquet', compression='snappy')

100%|██████████| 3001/3001 [1:42:46<00:00,  2.05s/it]  
