In [1]:
#-------------------------
# import packages
#-------------------------

import requests
import pycountry
import pandas as pd
import time  # for sleep/backoff

#--------------------------
# retrieve IMF trade data
#--------------------------

# get list of country codes 
country_codes = [country.alpha_2 for country in pycountry.countries]

# base API URL
base_url = 'http://dataservices.imf.org/REST/SDMX_JSON.svc/'

def get_imf_dots_data(reporter,
                      indicator='TXG_FOB_USD',
                      freq='Q',
                      startPeriod='1980',
                      endPeriod='2024',
                      max_retries=5,
                      backoff_factor=3):
    """
    Retrieve IMF DOTS data for a given reporter country, with basic retry logic.
    
    Parameters:
    -----------
    reporter : str
        ISO alpha-2 code for the reporter country (e.g., 'GB', 'US', 'CN', etc.)
    indicator : str
        IMF indicator code ('TMG_CIF_USD' for imports, 'TXG_FOB_USD' for exports)
    freq : str
        Frequency ('A' for annual, 'Q' for quarterly, 'M' for monthly)
    startPeriod : str
        Starting period (e.g., '1947')
    endPeriod : str
        Ending period (e.g., '2024')
    max_retries : int
        Maximum number of retry attempts for transient errors
    backoff_factor : int or float
        Multiplier for the backoff (sleep time) between retries
    
    Returns:
    --------
    data : dict or list or None
        Parsed JSON series data, or None if we can't retrieve valid data.
    """
    # construct the key - here, partner is omitted to retrieve all partners
    key = f'CompactData/DOT/{freq}.{reporter}.{indicator}'
    # remove extra space around startPeriod
    full_url = f'{base_url}{key}?startPeriod={startPeriod}&endPeriod={endPeriod}'
    
    for attempt in range(max_retries):
        print(f"Attempt {attempt+1} for {reporter}: {full_url}")
        try:
            # make the request
            response = requests.get(full_url, headers={"Accept": "application/json"}, timeout=30)
            
            # if the response code is not 200, raise for status
            # (this will trigger the except block to handle it)
            response.raise_for_status()
            
            # try to parse the JSON
            json_data = response.json()
            
            # navigate into the JSON to extract the series element
            data = json_data['CompactData']['DataSet']['Series']
            return data

        except requests.exceptions.RequestException as e:
            # for network issues or non-200 status codes
            # if it's the last attempt, return None
            if attempt == max_retries - 1:
                print(f"Final network error (no more retries) for {reporter}: {e}")
                return None
            else:
                # otherwise, wait before retrying
                wait_seconds = backoff_factor * (attempt + 1)
                print(f"Network error for {reporter}, retrying in {wait_seconds} seconds: {e}")
                time.sleep(wait_seconds)
        
        except KeyError as e:
            # if the JSON structure is missing expected keys,
            # don't retry because if the data truly doesn't exist, just return None
            print(f"JSON structure error (missing keys) for {reporter}: {e}")
            return None
        
        except ValueError as e:
            # JSON parsing error
            # possibly a rate-limiting or server error returning invalid JSON
            if attempt == max_retries - 1:
                print(f"Final JSON parse error for {reporter}: {e}")
                return None
            else:
                wait_seconds = backoff_factor * (attempt + 1)
                print(f"JSON parse error for {reporter}, retrying in {wait_seconds} seconds: {e}")
                time.sleep(wait_seconds)
    
    # if all attempts fail for some reason, return None
    return None

# list of reporter country codes you want to retrieve data for
reporters = country_codes  # adjust this list as needed

# dictionary to store the results
data_dict = {}

for rep in reporters:
    print(f"\nFetching data for {rep}...")
    series_data = get_imf_dots_data(rep)
    if series_data is not None:
        data_dict[rep] = series_data
    else:
        print(f"No data returned (or could not retrieve) for {rep}.")

#--------------------------------
# format data as table and save
#--------------------------------

records = []

for rep, series in data_dict.items():
    if isinstance(series, list):
        for s in series:
            cp = s.get('@COUNTERPART_AREA')
            obs = s.get('Obs')
            if isinstance(obs, list):
                for o in obs:
                    records.append({
                        'reporter': rep,
                        'counterpart_area': cp,
                        'time_period': o.get('@TIME_PERIOD'),
                        'value': o.get('@OBS_VALUE')
                    })
            elif isinstance(obs, dict):
                records.append({
                    'reporter': rep,
                    'counterpart_area': cp,
                    'time_period': obs.get('@TIME_PERIOD'),
                    'value': obs.get('@OBS_VALUE')
                })
    elif isinstance(series, dict):
        cp = series.get('@COUNTERPART_AREA')
        obs = series.get('Obs')
        if isinstance(obs, list):
            for o in obs:
                records.append({
                    'reporter': rep,
                    'counterpart_area': cp,
                    'time_period': o.get('@TIME_PERIOD'),
                    'value': o.get('@OBS_VALUE')
                })
        elif isinstance(obs, dict):
            records.append({
                'reporter': rep,
                'counterpart_area': cp,
                'time_period': obs.get('@TIME_PERIOD'),
                'value': obs.get('@OBS_VALUE')
            })

df = pd.DataFrame(records)
df['time_period'] = pd.to_datetime(df['time_period'], errors='coerce')

df.to_csv("../../data/raw/imf_exports_quarterly.csv", index=False)



Fetching data for AW...
Attempt 1 for AW: http://dataservices.imf.org/REST/SDMX_JSON.svc/CompactData/DOT/Q.AW.TXG_FOB_USD?startPeriod=1980&endPeriod=2024

Fetching data for AF...
Attempt 1 for AF: http://dataservices.imf.org/REST/SDMX_JSON.svc/CompactData/DOT/Q.AF.TXG_FOB_USD?startPeriod=1980&endPeriod=2024

Fetching data for AO...
Attempt 1 for AO: http://dataservices.imf.org/REST/SDMX_JSON.svc/CompactData/DOT/Q.AO.TXG_FOB_USD?startPeriod=1980&endPeriod=2024

Fetching data for AI...
Attempt 1 for AI: http://dataservices.imf.org/REST/SDMX_JSON.svc/CompactData/DOT/Q.AI.TXG_FOB_USD?startPeriod=1980&endPeriod=2024

Fetching data for AX...
Attempt 1 for AX: http://dataservices.imf.org/REST/SDMX_JSON.svc/CompactData/DOT/Q.AX.TXG_FOB_USD?startPeriod=1980&endPeriod=2024
JSON structure error (missing keys) for AX: 'Series'
No data returned (or could not retrieve) for AX.

Fetching data for AL...
Attempt 1 for AL: http://dataservices.imf.org/REST/SDMX_JSON.svc/CompactData/DOT/Q.AL.TXG_FOB_USD?

  df['time_period'] = pd.to_datetime(df['time_period'], errors='coerce')
