In [11]:
import requests
import pandas as pd
import xml.etree.ElementTree as ET
from dotenv import load_dotenv
import os
import time
from datetime import datetime
from requests.packages.urllib3.util.retry import Retry
from requests.adapters import HTTPAdapter
import concurrent.futures

In [12]:
load_dotenv()
api_key = os.getenv('IATI_API_KEY')

if not api_key:
    raise ValueError("API key not found. Please make sure it is set in the .env file or update it if necessary.")

### Transactions/Apache-Solr default

Note: Transactions in XML format is not accessible from Datastore API

In [13]:
def requests_retry_session(retries=3, backoff_factor=0.3, status_forcelist=(500, 502, 504), session=None):
    session = session or requests.Session()
    retry = Retry(
        total=retries,
        read=retries,
        connect=retries,
        backoff_factor=backoff_factor,
        status_forcelist=status_forcelist,
    )
    adapter = HTTPAdapter(max_retries=retry)
    session.mount('http://', adapter)
    session.mount('https://', adapter)
    return session

def fetch_page(start):
    params = {
        'q': '(sector_code:(11250 OR 12240 OR 31110 OR 31120 OR 31130 OR 31140 OR 31150 OR 31161 OR 31162 OR 31163 OR 31164 OR 31165 OR 31166 OR 31181 OR 31182 OR 31191 OR 31192 OR 31193 OR 31194 OR 31195 OR 31210 OR 31220 OR 31261 OR 31281 OR 31282 OR 31291 OR 31310 OR 31320 OR 31381 OR 31382 OR 31391 OR 32161 OR 32162 OR 43040 OR 43071 OR 43072 OR 43073 OR 52010) OR sector_vocabulary:2 AND sector_code:(311 OR 312 OR 313)) OR (description_narrative:("food security" OR "food insecurity"))',
        'fl': 'iati_identifier,transaction_value,transaction_date_iso_date,sector_code,recipient_country_code',
        'fq': 'transaction_transaction_date_iso_date:[2021-01-01T00:00:00Z TO *]',  
        'rows': 1000,
        'start': start
    }
    headers = {'Ocp-Apim-Subscription-Key': api_key}
    
    for attempt in range(5):  
        try:
            response = requests_retry_session().get(base_url, headers=headers, params=params, timeout=30)
            response.raise_for_status()
            return response.json()['response']['docs']
        except requests.exceptions.HTTPError as e:
            if response.status_code == 429:
                wait = 2 ** attempt  # exponential backoff
                print(f"Rate limit hit. Waiting for {wait} seconds.")
                time.sleep(wait)
            else:
                print(f"HTTP error occurred: {e}")
        except Exception as e:
            print(f"An error occurred: {e}")
        
    print(f"Failed to fetch data for start={start} after 5 attempts")
    return []

def get_total_results():
    params = {
        'q': '(sector_code:(11250 OR 12240 OR 31110 OR 31120 OR 31130 OR 31140 OR 31150 OR 31161 OR 31162 OR 31163 OR 31164 OR 31165 OR 31166 OR 31181 OR 31182 OR 31191 OR 31192 OR 31193 OR 31194 OR 31195 OR 31210 OR 31220 OR 31261 OR 31281 OR 31282 OR 31291 OR 31310 OR 31320 OR 31381 OR 31382 OR 31391 OR 32161 OR 32162 OR 43040 OR 43071 OR 43072 OR 43073 OR 52010) OR sector_vocabulary:2 AND sector_code:(311 OR 312 OR 313)) OR (description_narrative:("food security" OR "food insecurity"))',
        'rows': 0
    }
    headers = {'Ocp-Apim-Subscription-Key': api_key}
    response = requests_retry_session().get(base_url, headers=headers, params=params)
    if response.status_code == 200:
        return response.json()['response']['numFound']
    else:
        print(f"Error: {response.status_code}")
        return 0

# Update base URL for transaction collection
base_url = "https://api.iatistandard.org/datastore/transaction/select"

total_results = get_total_results()
all_transactions = []

print(f"Total results to fetch: {total_results}")

with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor: 
    futures = [executor.submit(fetch_page, i) for i in range(0, total_results, 1000)]
    for i, future in enumerate(concurrent.futures.as_completed(futures)):
        all_transactions.extend(future.result())
        print(f"Fetched page {i+1}/{len(futures)}")
        time.sleep(1) 

df_transactions = pd.DataFrame(all_transactions)

Total results to fetch: 256130
Fetched page 1/257
Fetched page 2/257
Fetched page 3/257
Fetched page 4/257
Fetched page 5/257
Fetched page 6/257
Fetched page 7/257
Fetched page 8/257
Fetched page 9/257
Fetched page 10/257
Fetched page 11/257
Fetched page 12/257
Fetched page 13/257
Fetched page 14/257
Fetched page 15/257
Fetched page 16/257
Fetched page 17/257
Fetched page 18/257
Fetched page 19/257
Fetched page 20/257
Fetched page 21/257
Fetched page 22/257
Fetched page 23/257
Fetched page 24/257
Fetched page 25/257
Fetched page 26/257
Fetched page 27/257
Fetched page 28/257
Fetched page 29/257
Fetched page 30/257
Fetched page 31/257
Fetched page 32/257
Fetched page 33/257
Fetched page 34/257
Fetched page 35/257
Fetched page 36/257
Fetched page 37/257
Fetched page 38/257
Fetched page 39/257
Fetched page 40/257
Fetched page 41/257
Fetched page 42/257
Fetched page 43/257
Fetched page 44/257
Fetched page 45/257
Fetched page 46/257
Fetched page 47/257
Fetched page 48/257
Fetched page 49/25

In [14]:
df_transactions 

Unnamed: 0,sector_code,iati_identifier,recipient_country_code,transaction_value
0,"[312, 31210, 31220, 31261, 31281, 31282, 31291...",NL-KVK-41155305-WL,"[BO, CO, CD, GH, ID, SR, VN, ET]",[4303957.0]
1,"[312, 31210, 31220, 31261, 31281, 31282, 31291...",NL-KVK-41155305-WL,"[BO, CO, CD, GH, ID, SR, VN, ET]",[4102797.0]
2,"[312, 31210, 31220, 31261, 31281, 31282, 31291...",NL-KVK-41155305-WL,"[BO, CO, CD, GH, ID, SR, VN, ET]",[2856056.0]
3,"[312, 31210, 31220, 31261, 31281, 31282, 31291...",NL-KVK-41155305-WL-CoInt,[NL],[442719.0]
4,"[312, 31210, 31220, 31261, 31281, 31282, 31291...",NL-KVK-41155305-WL-CoInt,[NL],[519175.0]
...,...,...,...,...
70806,,SE-0-SE-6-10462-10462A0106-H10565,,[938000.0]
70807,,SE-0-SE-6-10462-10462A0107-H,,[-5222495.0]
70808,,SE-0-SE-6-10462-10462A0107-H,,[-6496686.0]
70809,,SE-0-SE-6-10462-10462A0107-H,,[-136542.0]
