In [1]:
import os
import requests
import pandas as pd
from datetime import datetime
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Getting the API key from environment variables
api_key = os.getenv('IATI_API_KEY')




In [2]:
def fetch_data(api_key, start, rows):
    url = 'https://api.iatistandard.org/datastore/activity/iati'
    params = {
        'q': '*:*',
        'fq': 'activity_date_iso_date:[2021-01-01T00:00:00Z TO *]',
        'rows': rows,
        'start': start
    }
    headers = {
        'Accept': 'application/xml',
        'Ocp-Apim-Subscription-Key': api_key
    }

    response = requests.get(url, headers=headers, params=params)
    if response.status_code == 200:
        return response.content
    elif response.status_code == 429:
        print("Rate limit exceeded. Waiting for a while before retrying...")
        time.sleep(5)
        return fetch_data(api_key, start, rows)
    else:
        print(f"Failed to retrieve data: {response.status_code} - {response.text}")
        return None

# Test fetch_data function
response_content = fetch_data(api_key, start=0, rows=10)
print(response_content[:500])  # Print the first 500 characters of the response


b'<?xml version="1.0" encoding="UTF-8"?><iati-activities generated-datetime="2024-08-08T19:54:07.654Z" version="2.03"><iati-activity xml:lang="en" default-currency="EUR" last-updated-datetime="2024-05-13T11:01:56.810Z"><iati-identifier>XI-IATI-EBRD-47110</iati-identifier><reporting-org ref="XI-IATI-EBRD" type="40"><narrative>European Bank for Reconstruction and Development</narrative></reporting-org><title><narrative>Erste Bank Hungary Equity Investment</narrative></title><description type="1"><na'


In [3]:
response_content

b'<?xml version="1.0" encoding="UTF-8"?><iati-activities generated-datetime="2024-08-08T19:54:07.654Z" version="2.03"><iati-activity xml:lang="en" default-currency="EUR" last-updated-datetime="2024-05-13T11:01:56.810Z"><iati-identifier>XI-IATI-EBRD-47110</iati-identifier><reporting-org ref="XI-IATI-EBRD" type="40"><narrative>European Bank for Reconstruction and Development</narrative></reporting-org><title><narrative>Erste Bank Hungary Equity Investment</narrative></title><description type="1"><narrative>EBRD is providing an equity investment in Erste Bank Hungary alongside the  State of Hungary. Erste Group Bank will remain the controlling shareholder.  The Project will contribute to strengthening of the capitalisation of the bank  and its strategy of increased engagement in supporting the real economy in  Hungary.</narrative></description><participating-org type="40" ref="XI-IATI-EBRD" role="1"><narrative>European Bank for Reconstruction and Development</narrative></participating-org

In [4]:
import xml.etree.ElementTree as ET

def parse_xml(response_content):
    try:
        root = ET.fromstring(response_content)
        print("XML parsed successfully.")
        return root
    except ET.ParseError as e:
        print(f"XML ParseError: {e}")
        return None

# Test parse_xml function
if response_content:
    root = parse_xml(response_content)
    if root:
        print("XML parsed and root element obtained.")
    else:
        print("Failed to parse XML.")


XML parsed successfully.
XML parsed and root element obtained.


In [5]:
def print_xml_structure(root, depth=0):
    for elem in root:
        print("  " * depth + elem.tag, elem.attrib)
        print_xml_structure(elem, depth + 1)

# Test print_xml_structure function
if root:
    print_xml_structure(root[0])  # Print the structure of the first iati-activity element


iati-identifier {}
reporting-org {'ref': 'XI-IATI-EBRD', 'type': '40'}
  narrative {}
title {}
  narrative {}
description {'type': '1'}
  narrative {}
participating-org {'type': '40', 'ref': 'XI-IATI-EBRD', 'role': '1'}
  narrative {}
participating-org {'type': '70', 'role': '4'}
  narrative {}
activity-status {'code': '2'}
activity-date {'iso-date': '2016-06-20', 'type': '1'}
activity-date {'iso-date': '2016-06-20', 'type': '2'}
activity-date {'iso-date': '2023-12-12', 'type': '4'}
contact-info {'type': '1'}
  organisation {}
    narrative {}
  telephone {}
  email {}
  website {}
  mailing-address {}
    narrative {}
activity-scope {'code': '4'}
recipient-country {'code': 'HU', 'percentage': '100'}
location {}
  location-id {'vocabulary': 'G1', 'code': '719819'}
  name {}
    narrative {}
  point {'srsName': 'http://www.opengis.net/def/crs/EPSG/0/4326'}
    pos {}
  exactness {'code': '2'}
  location-class {'code': '1'}
sector {'code': '24030', 'percentage': '100'}
country-budget-ite

In [6]:
def filter_data_date(root):
    data = []
    for activity in root.findall('.//iati-activity'):
        iati_identifier = activity.find('iati-identifier').text if activity.find('iati-identifier') is not None else None
        title = activity.find('title/narrative').text if activity.find('title/narrative') is not None else None
        description = activity.find('description/narrative').text if activity.find('description/narrative') is not None else None
        start_date = None

        for date_elem in activity.findall('activity-date'):
            if date_elem.attrib.get('type') == '1':  # Actual start date
                start_date = date_elem.attrib.get('iso-date')
                print(f"Found start date: {start_date} for activity {iati_identifier}")

        if start_date and start_date >= '2021-01-01':
            data.append({
                'iati_identifier': iati_identifier,
                'title': title,
                'description': description,
                'start_date': start_date,
            })
    return data

# Test filter_data_date function
if root:
    filtered_data_date = filter_data_date(root)
    print(f"Filtered data by date contains {len(filtered_data_date)} activities.")
    if filtered_data_date:
        save_data(filtered_data_date, 'iati_activities_filtered_date.csv')
    else:
        print("No activities matched the date filtering criteria.")


Found start date: 2016-06-20 for activity XI-IATI-EBRD-47110
Found start date: 2015-03-31 for activity XI-IATI-EBRD-46147
Found start date: 2016-05-13 for activity XI-IATI-EBRD-47477
Found start date: 2016-05-26 for activity XI-IATI-EBRD-47396
Found start date: 2015-05-12 for activity XI-IATI-EBRD-46830
Found start date: 2016-11-10 for activity XI-IATI-EBRD-47398
Found start date: 2017-06-29 for activity XI-IATI-EBRD-48414
Found start date: 2015-12-18 for activity XI-IATI-EBRD-47537
Found start date: 2015-12-10 for activity XI-IATI-EBRD-46274
Found start date: 2015-12-18 for activity XI-IATI-EBRD-47388
Filtered data by date contains 0 activities.
No activities matched the date filtering criteria.


In [7]:
def filter_data_date(root):
    data = []
    for activity in root.findall('.//iati-activity'):
        iati_identifier = activity.find('iati-identifier').text if activity.find('iati-identifier') is not None else None
        title = activity.find('title/narrative').text if activity.find('title/narrative') is not None else None
        description = activity.find('description/narrative').text if activity.find('description/narrative') is not None else None
        start_date = None

        for date_elem in activity.findall('activity-date'):
            if date_elem.attrib.get('type') == '1':  # Actual start date
                start_date = date_elem.attrib.get('iso-date')
                if start_date:
                    print(f"Found start date: {start_date} for activity {iati_identifier}")

        # Check if start_date is valid and meets the condition
        if start_date:
            try:
                if start_date >= '2021-01-01':
                    data.append({
                        'iati_identifier': iati_identifier,
                        'title': title,
                        'description': description,
                        'start_date': start_date,
                    })
            except Exception as e:
                print(f"Error processing date {start_date} for activity {iati_identifier}: {e}")
    
    return data

# Test filter_data_date function
if root:
    filtered_data_date = filter_data_date(root)
    print(f"Filtered data by date contains {len(filtered_data_date)} activities.")
    if filtered_data_date:
        save_data(filtered_data_date, 'iati_activities_filtered_date.csv')
    else:
        print("No activities matched the date filtering criteria.")


Found start date: 2016-06-20 for activity XI-IATI-EBRD-47110
Found start date: 2015-03-31 for activity XI-IATI-EBRD-46147
Found start date: 2016-05-13 for activity XI-IATI-EBRD-47477
Found start date: 2016-05-26 for activity XI-IATI-EBRD-47396
Found start date: 2015-05-12 for activity XI-IATI-EBRD-46830
Found start date: 2016-11-10 for activity XI-IATI-EBRD-47398
Found start date: 2017-06-29 for activity XI-IATI-EBRD-48414
Found start date: 2015-12-18 for activity XI-IATI-EBRD-47537
Found start date: 2015-12-10 for activity XI-IATI-EBRD-46274
Found start date: 2015-12-18 for activity XI-IATI-EBRD-47388
Filtered data by date contains 0 activities.
No activities matched the date filtering criteria.
