In [1]:
import requests
import pandas as pd
import xml.etree.ElementTree as ET
from dotenv import load_dotenv
import os
import time
from datetime import datetime



In [2]:
load_dotenv()
api_key = os.getenv('IATI_API_KEY')

if not api_key:
    raise ValueError("API key not found. Please make sure it is set in the .env file or update it if necessary.")

In [4]:
import concurrent.futures
import requests
import pandas as pd
import time
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry

def requests_retry_session(
    retries=3,
    backoff_factor=0.3,
    status_forcelist=(500, 502, 504),
    session=None,
):
    session = session or requests.Session()
    retry = Retry(
        total=retries,
        read=retries,
        connect=retries,
        backoff_factor=backoff_factor,
        status_forcelist=status_forcelist,
    )
    adapter = HTTPAdapter(max_retries=retry)
    session.mount('http://', adapter)
    session.mount('https://', adapter)
    return session

def fetch_page(start):
    params = {
        'q': '(sector_code:(11250 OR 12240 OR 31110 OR 31120 OR 31130 OR 31140 OR 31150 OR 31161 OR 31162 OR 31163 OR 31164 OR 31165 OR 31166 OR 31181 OR 31182 OR 31191 OR 31192 OR 31193 OR 31194 OR 31195 OR 31210 OR 31220 OR 31261 OR 31281 OR 31282 OR 31291 OR 31310 OR 31320 OR 31381 OR 31382 OR 31391 OR 32161 OR 32162 OR 43040 OR 43071 OR 43072 OR 43073 OR 52010) OR sector_vocabulary:2 AND sector_code:(311 OR 312 OR 313)) OR (title_narrative:("food security" OR "food insecurity") OR description_narrative:("food security" OR "food insecurity"))',
        'fl': 'iati_identifier,title_narrative,description_narrative,sector_code,transaction_value,transaction_date,recipient_country_code',
        'rows': 1000,
        'start': start
    }
    headers = {'Ocp-Apim-Subscription-Key': api_key}
    
    for attempt in range(5):  # Try up to 5 times
        try:
            response = requests_retry_session().get(base_url, headers=headers, params=params, timeout=30)
            response.raise_for_status()
            return response.json()['response']['docs']
        except requests.exceptions.HTTPError as e:
            if response.status_code == 429:
                wait = 2 ** attempt  # exponential backoff
                print(f"Rate limit hit. Waiting for {wait} seconds.")
                time.sleep(wait)
            else:
                print(f"HTTP error occurred: {e}")
        except Exception as e:
            print(f"An error occurred: {e}")
        
    print(f"Failed to fetch data for start={start} after 5 attempts")
    return []

def get_total_results():
    params = {
        'q': '(sector_code:(11250 OR 12240 OR 31110 OR 31120 OR 31130 OR 31140 OR 31150 OR 31161 OR 31162 OR 31163 OR 31164 OR 31165 OR 31166 OR 31181 OR 31182 OR 31191 OR 31192 OR 31193 OR 31194 OR 31195 OR 31210 OR 31220 OR 31261 OR 31281 OR 31282 OR 31291 OR 31310 OR 31320 OR 31381 OR 31382 OR 31391 OR 32161 OR 32162 OR 43040 OR 43071 OR 43072 OR 43073 OR 52010) OR sector_vocabulary:2 AND sector_code:(311 OR 312 OR 313)) OR (title_narrative:("food security" OR "food insecurity") OR description_narrative:("food security" OR "food insecurity"))',
        'rows': 0
    }
    headers = {'Ocp-Apim-Subscription-Key': api_key}
    response = requests_retry_session().get(base_url, headers=headers, params=params)
    if response.status_code == 200:
        return response.json()['response']['numFound']
    else:
        print(f"Error: {response.status_code}")
        return 0

base_url = "https://api.iatistandard.org/datastore/activity/select"
total_results = get_total_results()
all_activities = []

print(f"Total results to fetch: {total_results}")

with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:  # Reduced number of workers
    futures = [executor.submit(fetch_page, i) for i in range(0, total_results, 1000)]
    for i, future in enumerate(concurrent.futures.as_completed(futures)):
        all_activities.extend(future.result())
        print(f"Fetched page {i+1}/{len(futures)}")
        time.sleep(1)  # Add a delay between requests

df = pd.DataFrame(all_activities)
print(df.head())
print(f"Total activities fetched: {len(df)}")

Total results to fetch: 27724
Fetched page 1/28
Fetched page 2/28
Fetched page 3/28
Fetched page 4/28
Fetched page 5/28
Fetched page 6/28
Fetched page 7/28
Fetched page 8/28
Fetched page 9/28
Fetched page 10/28
Fetched page 11/28
Fetched page 12/28
Fetched page 13/28
Fetched page 14/28
Rate limit hit. Waiting for 1 seconds.
Fetched page 15/28
Fetched page 16/28
Fetched page 17/28
Fetched page 18/28
Fetched page 19/28
Fetched page 20/28
Fetched page 21/28
Fetched page 22/28
Fetched page 23/28
Fetched page 24/28
Fetched page 25/28
Fetched page 26/28
Fetched page 27/28
Fetched page 28/28
                    sector_code          iati_identifier  \
0                       [52010]           NL-1-PPR-24967   
1                         [313]  XM-DAC-41108-1100001659   
2  [31120, 31163, 31130, 43030]          GB-1-202522-102   
3  [31120, 31163, 31130, 43030]          GB-1-202522-103   
4                       [31110]      XM-DAC-41301-746566   

                                     title_narr

In [5]:
df 

Unnamed: 0,sector_code,iati_identifier,title_narrative,transaction_value,description_narrative,recipient_country_code
0,[52010],NL-1-PPR-24967,[JAK Food Security in SCPP],"[280000.0, 280000.0, 308000.0, 190236.0, 81764...",[The project aims to achieve food security of ...,[ID]
1,[313],XM-DAC-41108-1100001659,[Fisheries: Inland Fisheries and Aquaculture P...,"[5490000.0, 115120.57, 124001.35, 199841.2, 13...",[The overall objective of PD-PAC is to bring a...,[CG]
2,"[31120, 31163, 31130, 43030]",GB-1-202522-102,[South Sudan Food Security and Livelihoods - P...,"[7315000.0, 500000.0, 480000.0, 3000000.0, 133...",[This activity (South Sudan Food Security and ...,[SS]
3,"[31120, 31163, 31130, 43030]",GB-1-202522-103,[South Sudan Food Security and Livelihoods - C...,"[5250000.0, 750000.0, 1200000.0, -480000.0, 20...",[This activity (South Sudan Food Security and ...,[SS]
4,[31110],XM-DAC-41301-746566,[Towards a Food Security Strategy in Libya],"[428000.0, 428000.0, 13735.16]",[Provide support to Libya Goverment to elabora...,[LY]
...,...,...,...,...,...,...
27719,,SE-0-SE-6-10462-10462A0106-H10562,[Syria crisis 2022 - Swedish Mission Council- ...,"[5000000.0, 5000000.0]",[The proposed programme by the Swedish Mission...,
27720,,SE-0-SE-6-10462-10462A0106-H10563,[Method/Capacity/Other Support 2022 - Swedish ...,"[662000.0, 662000.0]",[Den 14 mars 2017 tog Sidas humanitära enhet b...,
27721,,SE-0-SE-6-10462-10462A0106-H10565,[Method/Capacity/Other Support 2022 - Swedish ...,"[938000.0, 938000.0]",[The proposed programme by the Swedish Mission...,
27722,,SE-0-SE-6-10462-10462A0107-H,[Svenska missionsrådets (SMR) humanitära progr...,"[-11855723.0, -5222495.0, -6496686.0, -136542.0]",[The proposed programme by the Swedish Mission...,
