### Importing the libraries 

In [21]:
import requests
import pandas as pd
import xml.etree.ElementTree as ET
from dotenv import load_dotenv
import os
import time

In [22]:
# Loading environment variables from .env file
load_dotenv()

# Getting the API key from environment variables
api_key = os.getenv('IATI_API_KEY')

# API endpoint
url = 'https://api.iatistandard.org/datastore/activity/iati'

# parameters 
params = {
    'q': '*:*',  # Query all records
    'rows': 5 ,   # Limit the number of results
    'fq': 'iati_identifier:"CH-4-1980001679"',    # Filter for specific identifier
}

# headers
headers = {
    'Accept': 'application/json',
    'Ocp-Apim-Subscription-Key': api_key 
}

# Making the GET request with parameters and headers
response = requests.get(url, params=params, headers=headers)

# response content for debugging
print(response.content)

if response.status_code == 200:
    try:
        # Parsing the XML response
        root = ET.fromstring(response.content)
        
        # Extracting data from XML
        activities = []
        for activity in root.findall('.//iati-activity'):
            activity_data = {
                'identifier': activity.find('iati-identifier').text if activity.find('iati-identifier') is not None else None,
                'title': activity.find('.//title/narrative').text if activity.find('.//title/narrative') is not None else None,
                'description': activity.find('.//description/narrative').text if activity.find('.//description/narrative') is not None else None,
                'status': activity.find('.//activity-status').attrib.get('code') if activity.find('.//activity-status') is not None else None,
                'start_date': activity.find('.//activity-date[@type="1"]').attrib.get('iso-date') if activity.find('.//activity-date[@type="1"]') is not None else None,
                'end_date': activity.find('.//activity-date[@type="4"]').attrib.get('iso-date') if activity.find('.//activity-date[@type="4"]') is not None else None,
                'reporting_org': activity.find('.//reporting-org/narrative').text if activity.find('.//reporting-org/narrative') is not None else None,
                'recipient_country': activity.find('.//recipient-country/narrative').text if activity.find('.//recipient-country/narrative') is not None else None,
                'sector': activity.find('.//sector/narrative').text if activity.find('.//sector/narrative') is not None else None,
                'budget': activity.find('.//budget/value').text if activity.find('.//budget/value') is not None else None,
                'transaction_type': activity.find('.//transaction/transaction-type').attrib.get('code') if activity.find('.//transaction/transaction-type') is not None else None,
                'transaction_date': activity.find('.//transaction/transaction-date').attrib.get('iso-date') if activity.find('.//transaction/transaction-date') is not None else None,
                'transaction_value': activity.find('.//transaction/value').text if activity.find('.//transaction/value') is not None else None,
                'transaction_description': activity.find('.//transaction/description/narrative').text if activity.find('.//transaction/description/narrative') is not None else None,
                'provider_org': activity.find('.//transaction/provider-org/narrative').text if activity.find('.//transaction/provider-org/narrative') is not None else None,
                'receiver_org': activity.find('.//transaction/receiver-org/narrative').text if activity.find('.//transaction/receiver-org/narrative') is not None else None,
                'finance_type': activity.find('.//transaction/finance-type').attrib.get('code') if activity.find('.//transaction/finance-type') is not None else None
            }
            activities.append(activity_data)
        
        # Converting to DF
        df = pd.DataFrame(activities)
        display(df)
    except ET.ParseError as e:
        print("Error parsing XML:", e)
else:
    print(f"Error: {response.status_code}")


b'{ "statusCode": 401, "message": "Access denied due to invalid subscription key. Make sure to provide a valid key for an active subscription." }'
Error: 401


pagination, since even if i delete row limitation on the code above I get max 10 rows.

In [23]:
# Load environment variables from .env file
load_dotenv()

# Get the API key from environment variables
api_key = os.getenv('IATI_API_KEY')

# API endpoint
url = 'https://api.iatistandard.org/datastore/activity/iati'

# Function to fetch data with pagination
def fetch_data(api_key, url, rows_per_page=100, max_pages=10):
    all_activities = []
    start = 0
    for page in range(max_pages):
        # Parameters with pagination
        params = {
            'q': '*:*',  # Query all records
            'rows': rows_per_page,  # Number of results per page
            'start': start,  # Starting point for each page
            'fq': [
                'iati_identifier:"CH-4-1980001679"', 
                #'reporting_org_ref:"CH-4"'  
            ]
        }

        # Headers
        headers = {
            'Accept': 'application/json',
            'Ocp-Apim-Subscription-Key': api_key 
        }

        # Making the GET request with parameters and headers
        response = requests.get(url, params=params, headers=headers)

        if response.status_code == 200:
            try:
                # Parsing the XML response
                root = ET.fromstring(response.content)

                # Extracting data from XML
                activities = []
                for activity in root.findall('.//iati-activity'):
                    activity_data = {
                        'identifier': activity.find('iati-identifier').text if activity.find('iati-identifier') is not None else None,
                        'title': activity.find('.//title/narrative').text if activity.find('.//title/narrative') is not None else None,
                        'description': activity.find('.//description/narrative').text if activity.find('.//description/narrative') is not None else None,
                        'status': activity.find('.//activity-status').attrib.get('code') if activity.find('.//activity-status') is not None else None,
                        'start_date': activity.find('.//activity-date[@type="1"]').attrib.get('iso-date') if activity.find('.//activity-date[@type="1"]') is not None else None,
                        'end_date': activity.find('.//activity-date[@type="4"]').attrib.get('iso-date') if activity.find('.//activity-date[@type="4"]') is not None else None,
                        'reporting_org': activity.find('.//reporting-org/narrative').text if activity.find('.//reporting-org/narrative') is not None else None,
                        'recipient_country': activity.find('.//recipient-country/narrative').text if activity.find('.//recipient-country/narrative') is not None else None,
                        'sector': activity.find('.//sector/narrative').text if activity.find('.//sector/narrative') is not None else None,
                        'budget': activity.find('.//budget/value').text if activity.find('.//budget/value') is not None else None,
                        'transaction_type': activity.find('.//transaction/transaction-type').attrib.get('code') if activity.find('.//transaction/transaction-type') is not None else None,
                        'transaction_date': activity.find('.//transaction/transaction-date').attrib.get('iso-date') if activity.find('.//transaction/transaction-date') is not None else None,
                        'transaction_value': activity.find('.//transaction/value').text if activity.find('.//transaction/value') is not None else None,
                        'transaction_description': activity.find('.//transaction/description/narrative').text if activity.find('.//transaction/description/narrative') is not None else None,
                        'provider_org': activity.find('.//transaction/provider-org/narrative').text if activity.find('.//transaction/provider-org/narrative') is not None else None,
                        'receiver_org': activity.find('.//transaction/receiver-org/narrative').text if activity.find('.//transaction/receiver-org/narrative') is not None else None,
                        'finance_type': activity.find('.//transaction/finance-type').attrib.get('code') if activity.find('.//transaction/finance-type') is not None else None
                    }
                    activities.append(activity_data)

                all_activities.extend(activities)

                # Increment the start for the next page
                start += rows_per_page

                # Break if no more results
                if len(activities) < rows_per_page:
                    break

            except ET.ParseError as e:
                print("Error parsing XML:", e)
                break
        else:
            print(f"Error: {response.status_code}")
            break

    return all_activities

# Fetching data
activities = fetch_data(api_key, url, rows_per_page=100, max_pages=10)

# Converting to DataFrame
df_ch4 = pd.DataFrame(activities)


Error: 401


In [24]:
load_dotenv()


api_key = os.getenv('IATI_API_KEY')

# API endpoint
url = 'https://api.iatistandard.org/datastore/activity/iati'

# Function to fetch data with pagination
def fetch_data(api_key, url, rows_per_page=100, max_pages=10):
    all_activities = []
    start = 0
    for page in range(max_pages):
        # Parameters with pagination
        params = {
            'q': '*:*',  
            'rows': rows_per_page,  
            'start': start, 
            # 'fq': [
            #     'iati_identifier:"CH-4-1980001679"',  
            #     'reporting_org_ref:"CH-4"'  
            # ]
        }

        # Headers
        headers = {
            'Accept': 'application/json',
            'Ocp-Apim-Subscription-Key': api_key 
        }

        # Making the GET request with parameters and headers
        response = requests.get(url, params=params, headers=headers)

        if response.status_code == 200:
            try:
                # Parsing the XML response
                root = ET.fromstring(response.content)

                # Extracting data from XML
                activities = []
                for activity in root.findall('.//iati-activity'):
                    activity_data = {
                        'identifier': activity.find('iati-identifier').text if activity.find('iati-identifier') is not None else None,
                        'title': activity.find('.//title/narrative').text if activity.find('.//title/narrative') is not None else None,
                        'description': activity.find('.//description/narrative').text if activity.find('.//description/narrative') is not None else None,
                        'status': activity.find('.//activity-status').attrib.get('code') if activity.find('.//activity-status') is not None else None,
                        'start_date': activity.find('.//activity-date[@type="1"]').attrib.get('iso-date') if activity.find('.//activity-date[@type="1"]') is not None else None,
                        'end_date': activity.find('.//activity-date[@type="4"]').attrib.get('iso-date') if activity.find('.//activity-date[@type="4"]') is not None else None,
                        'reporting_org': activity.find('.//reporting-org/narrative').text if activity.find('.//reporting-org/narrative') is not None else None,
                        'recipient_country': activity.find('.//recipient-country/narrative').text if activity.find('.//recipient-country/narrative') is not None else None,
                        'sector': activity.find('.//sector/narrative').text if activity.find('.//sector/narrative') is not None else None,
                        'budget': activity.find('.//budget/value').text if activity.find('.//budget/value') is not None else None,
                        'transaction_type': activity.find('.//transaction/transaction-type').attrib.get('code') if activity.find('.//transaction/transaction-type') is not None else None,
                        'transaction_date': activity.find('.//transaction/transaction-date').attrib.get('iso-date') if activity.find('.//transaction/transaction-date') is not None else None,
                        'transaction_value': activity.find('.//transaction/value').text if activity.find('.//transaction/value') is not None else None,
                        'transaction_description': activity.find('.//transaction/description/narrative').text if activity.find('.//transaction/description/narrative') is not None else None,
                        'provider_org': activity.find('.//transaction/provider-org/narrative').text if activity.find('.//transaction/provider-org/narrative') is not None else None,
                        'receiver_org': activity.find('.//transaction/receiver-org/narrative').text if activity.find('.//transaction/receiver-org/narrative') is not None else None,
                        'finance_type': activity.find('.//transaction/finance-type').attrib.get('code') if activity.find('.//transaction/finance-type') is not None else None
                    }
                    activities.append(activity_data)

                all_activities.extend(activities)

                # Increment the start for the next page
                start += rows_per_page

                # Break if no more results
                if len(activities) < rows_per_page:
                    break

            except ET.ParseError as e:
                print("Error parsing XML:", e)
                break
        else:
            print(f"Error: {response.status_code}")
            break

    return all_activities


activities = fetch_data(api_key, url, rows_per_page=100, max_pages=10)

# Converting to DataFrame
df = pd.DataFrame(activities)


Error: 401


### Adding delay

In [25]:
def fetch_data(api_key, url, rows_per_page=100, max_pages=10, delay=1):
    all_activities = []
    start = 0
    for page in range(max_pages):
        # Parameters with pagination and filter query
        params = {
            'q': '*:*',  
            'rows': rows_per_page, 
            'start': start,  
            'fq': [
                'reporting_org_ref:"CH-4"'  
            ]
        }

        # Headers
        headers = {
            'Accept': 'application/json',
            'Ocp-Apim-Subscription-Key': api_key 
        }

        while True:
            # Making the GET request with parameters and headers
            response = requests.get(url, params=params, headers=headers)

            if response.status_code == 200:
                try:
                    # Parsing the XML response
                    root = ET.fromstring(response.content)

                    # Extracting data from XML
                    activities = []
                    for activity in root.findall('.//iati-activity'):
                        activity_data = {
                            'identifier': activity.find('iati-identifier').text if activity.find('iati-identifier') is not None else None,
                            'title': activity.find('.//title/narrative').text if activity.find('.//title/narrative') is not None else None,
                            'description': activity.find('.//description/narrative').text if activity.find('.//description/narrative') is not None else None,
                            'status': activity.find('.//activity-status').attrib.get('code') if activity.find('.//activity-status') is not None else None,
                            'start_date': activity.find('.//activity-date[@type="1"]').attrib.get('iso-date') if activity.find('.//activity-date[@type="1"]') is not None else None,
                            'end_date': activity.find('.//activity-date[@type="4"]').attrib.get('iso-date') if activity.find('.//activity-date[@type="4"]') is not None else None,
                            'reporting_org': activity.find('.//reporting-org/narrative').text if activity.find('.//reporting-org/narrative') is not None else None,
                            'recipient_country': activity.find('.//recipient-country/narrative').text if activity.find('.//recipient-country/narrative') is not None else None,
                            'sector': activity.find('.//sector/narrative').text if activity.find('.//sector/narrative') is not None else None,
                            'budget': activity.find('.//budget/value').text if activity.find('.//budget/value') is not None else None,
                            'transaction_type': activity.find('.//transaction/transaction-type').attrib.get('code') if activity.find('.//transaction/transaction-type') is not None else None,
                            'transaction_date': activity.find('.//transaction/transaction-date').attrib.get('iso-date') if activity.find('.//transaction/transaction-date') is not None else None,
                            'transaction_value': activity.find('.//transaction/value').text if activity.find('.//transaction/value') is not None else None,
                            'transaction_description': activity.find('.//transaction/description/narrative').text if activity.find('.//transaction/description/narrative') is not None else None,
                            'provider_org': activity.find('.//transaction/provider-org/narrative').text if activity.find('.//transaction/provider-org/narrative') is not None else None,
                            'receiver_org': activity.find('.//transaction/receiver-org/narrative').text if activity.find('.//transaction/receiver-org/narrative') is not None else None,
                            'finance_type': activity.find('.//transaction/finance-type').attrib.get('code') if activity.find('.//transaction/finance-type') is not None else None
                        }
                        activities.append(activity_data)

                    all_activities.extend(activities)

                    # Increment the start for the next page
                    start += rows_per_page

                    # Break if no more results
                    if len(activities) < rows_per_page:
                        break

                    # Break the while loop to move to the next page
                    break

                except ET.ParseError as e:
                    print("Error parsing XML:", e)
                    break
            elif response.status_code == 429:
                retry_after = int(response.headers.get("Retry-After", delay))
                print(f"Rate limited. Retrying after {retry_after} seconds...")
                time.sleep(retry_after)
            else:
                print(f"Error: {response.status_code}")
                break

    return all_activities

# Fetch data with the specified parameters
activities = fetch_data(api_key, url, rows_per_page=100, max_pages=30, delay=1)

# Converting to DataFrame
df = pd.DataFrame(activities)

# Display the DataFrame
print(df)


Error: 401
Error: 401
Error: 401
Error: 401
Error: 401
Error: 401
Error: 401
Error: 401
Error: 401
Error: 401
Error: 401
Error: 401
Error: 401
Error: 401
Error: 401
Error: 401
Error: 401
Error: 401
Error: 401
Error: 401
Error: 401
Error: 401
Error: 401
Error: 401
Error: 401
Error: 401
Error: 401
Error: 401
Error: 401
Error: 401
Empty DataFrame
Columns: []
Index: []
