In [2]:
import json
from pathlib import Path
import requests
from datetime import datetime
import logging

In [3]:
def get_and_store_radon_data(
    url: str = 'https://radon.nauka.gov.pl/opendata/polon/employees/?resultNumbers=100',
    output_file: str = 'data/employees.json'
):
    # Set up logging to track the data collection process
    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger(__name__)
    
    # Create the output directory if it doesn't exist
    Path(output_file).parent.mkdir(parents=True, exist_ok=True)
    
    # Initialize our tracking variables
    maxCount = 1
    num_records = 0
    records = []
    token = None
    raw_url = url
    
    logger.info(f"Starting data collection at {datetime.now()}")
    
    # Collect data from the API
    while num_records < maxCount:
        current_url = f'{raw_url}&token={token}' if token else raw_url
        
        try:
            response = requests.get(current_url)
            response.raise_for_status()  # This will raise an exception for bad status codes
            
            data = response.json()
            records.extend(data['results'])  # extend is more efficient than append in a loop
            
            token = data['pagination']['token']
            maxCount = data['pagination']['maxCount']
            num_records += len(data['results'])
            
            logger.info(f"Collected {num_records} of {maxCount} records")
            
        except requests.exceptions.RequestException as e:
            logger.error(f"Error fetching data: {str(e)}")
            logger.info(f"Last token: {token}")
            logger.info(f"Records collected so far: {len(records)}")
            return None
    
    # Store the collected data
    try:
        with open(output_file, 'w', encoding='utf-8') as f:
            # Store as an array of objects, which works well with YARRRML
            json.dump(records, f, indent=2, ensure_ascii=False)
        
        logger.info(f"Successfully stored {len(records)} records to {output_file}")
        
        # Return both the records and the file path for convenience
        return {
            'records': records,
            'file_path': output_file,
            'count': len(records)
        }
        
    except IOError as e:
        logger.error(f"Error storing data: {str(e)}")
        return None

In [9]:
get_and_store_radon_data(
    url='https://radon.nauka.gov.pl/opendata/polon/promotions/?resultNumbers=100',
    output_file='data/promotions.json'
)

INFO:__main__:Starting data collection at 2024-12-05 16:56:04.556879
INFO:__main__:Collected 100 of 19918 records
INFO:__main__:Collected 200 of 19918 records
INFO:__main__:Collected 300 of 19918 records
INFO:__main__:Collected 400 of 19918 records
INFO:__main__:Collected 500 of 19918 records
INFO:__main__:Collected 600 of 19918 records
INFO:__main__:Collected 700 of 19918 records
INFO:__main__:Collected 800 of 19918 records
INFO:__main__:Collected 900 of 19918 records
INFO:__main__:Collected 1000 of 19918 records
INFO:__main__:Collected 1100 of 19918 records
INFO:__main__:Collected 1200 of 19918 records
INFO:__main__:Collected 1300 of 19918 records
INFO:__main__:Collected 1400 of 19918 records
INFO:__main__:Collected 1500 of 19918 records
INFO:__main__:Collected 1600 of 19918 records
INFO:__main__:Collected 1700 of 19918 records
INFO:__main__:Collected 1800 of 19918 records
INFO:__main__:Collected 1900 of 19918 records
INFO:__main__:Collected 2000 of 19918 records
INFO:__main__:Collec

{'records': [{'application': None,
   'promotionData': {'degreeKind': 'w zakresie nauki',
    'degreeKindCode': 1,
    'initiationDate': '2023-09-26',
    'promotionStatus': 'nadano',
    'promotionStatusCode': 2,
    'grantingDate': '2024-05-28',
    'decisionNumber': '057/2023/2024/RNDP',
    'domain': {'domainCode': 'DZ0105N',
     'domainName': 'Dziedzina nauk społecznych'},
    'disciplines': [{'disciplineCode': 'DS010509N',
      'disciplineName': 'pedagogika'}],
    'grantingInstitution': {'institutionUuid': '9eec5244-9085-4de9-9ad9-a77b7ad693bc',
     'institutionName': 'Uniwersytet im. Adama Mickiewicza w Poznaniu'},
    'coLeadingInstitutions': [],
    'additionalNotes': None,
    'deprivationDegreeDate': None,
    'deprivationDegreeDecisionNumber': None,
    'deprivationDegreeKind': None,
    'deprivationTitleDate': None},
   'dissertationData': {'dissertationTitle': 'Profilaktyka lęku przed matematyką. Eksperyment pedagogiczny',
    'dissertationSubmissionDate': '2023-09-26