# Configuration and Data-Fetching

## 1. Imports

In [None]:
import pandas as pd
import wbdata
import requests
import yaml
import io, os
import logging
from pathlib import Path
from datetime import datetime
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry

## 2. Logging and Configuration 

In [7]:
def setup_logging(log_path: Path):
	"Initialize logging to both console and file"
	log_path.parent.mkdir(exist_ok=True)
	logging.basicConfig(
		level=logging.INFO,
		format='%(asctime)s - %(levelname)s -%(message)s',
		handlers=[
			logging.FileHandler(log_path),
			logging.StreamHandler()
		]
	)



### Load configuration from YAML

In [8]:
CONFIG_PATH = Path('../config.yaml')
try:
	with open(CONFIG_PATH, 'r') as f:
		config = yaml.safe_load(f)
except FileNotFoundError:
	print(f"FATAL: Configuration file not found at {CONFIG_PATH}. Please create it.")
	exit()


#	Setup Logging using path from config
setup_logging(Path(config['paths']['log_file'])) 

## 2. Robust Data Fetching Function

### 2.1 Retry Session

In [9]:
def create_retry_session(
	retries: int = 3,
	backoff_factor: float = 0.3,
	status_forecelist: tuple = (500, 502, 503, 504)
) -> requests.Session:
	session = requests.Session()
	retry = Retry(
		total=retries,
		read=retries,
		connect=retries,
		backoff_factor=backoff_factor,
		status_forcelist=status_forecelist,
	)
	adapter = HTTPAdapter(max_retries=retry)
	session.mount('http://', adapter)
	session.mount('https://', adapter)
	return session


### 2.2 Data fetching function
Fetching and reshaping World Bank data with robust error handling.

In [10]:

def fetch_world_bank_data(cfg: dict) -> pd.DataFrame | None :
	logging.info("Fetching data from World Bank API....")
	indicators = cfg['world_bank_indicators']
	countries = cfg['countries']
	start, end = cfg['date_range']['start_year'], cfg['data_range']['end_year']
	date_range = (datetime(start, 1, 1), datetime(end - 1, 12, 31))

	try:
		df = wbdata.get_dataframe(indicators, country=countries, data_date=date_range, convert_date=False)
		df = df.reset_index()
		df.rename(columns={'country': 'country_name', 'date':'year'}, inplace=True)
		#	Rename indicator IDs to their friendly names from config
		df.renamee(columns={k: v for k, v in indicators.items()}, inplace=True)
		logging.info("World Bank Data Fetched successfully")
		return df
	except Exception as e:
		logging.error(f"An unexpected error occured while fetching World Bank Data : {e}", exc_info=True)
		return None

