In [7]:
import requests
import sqlite3
import pathlib
from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import Optional, Tuple
import pycountry
import os
import pathlib

In [8]:
UN_MEMBERS = {
    'AFG', 'ALB', 'DZA', 'AND', 'AGO', 'ATG', 'ARG', 'ARM', 'AUS', 'AUT',
    'AZE', 'BHS', 'BHR', 'BGD', 'BRB', 'BLR', 'BEL', 'BLZ', 'BEN', 'BTN',
    'BOL', 'BIH', 'BWA', 'BRA', 'BRN', 'BGR', 'BFA', 'BDI', 'CPV', 'KHM',
    'CMR', 'CAN', 'CAF', 'TCD', 'CHL', 'CHN', 'COL', 'COM', 'COG', 'COD',
    'CRI', 'CIV', 'HRV', 'CUB', 'CYP', 'CZE', 'DNK', 'DJI', 'DMA', 'DOM',
    'ECU', 'EGY', 'SLV', 'GNQ', 'ERI', 'EST', 'SWZ', 'ETH', 'FJI', 'FIN',
    'FRA', 'GAB', 'GMB', 'GEO', 'DEU', 'GHA', 'GRC', 'GRD', 'GTM', 'GIN',
    'GNB', 'GUY', 'HTI', 'HND', 'HUN', 'ISL', 'IND', 'IDN', 'IRN', 'IRQ',
    'IRL', 'ISR', 'ITA', 'JAM', 'JPN', 'JOR', 'KAZ', 'KEN', 'KIR', 'PRK',
    'KOR', 'KWT', 'KGZ', 'LAO', 'LVA', 'LBN', 'LSO', 'LBR', 'LBY', 'LIE',
    'LTU', 'LUX', 'MDG', 'MWI', 'MYS', 'MDV', 'MLI', 'MLT', 'MHL', 'MRT',
    'MUS', 'MEX', 'FSM', 'MDA', 'MCO', 'MNG', 'MNE', 'MAR', 'MOZ', 'MMR',
    'NAM', 'NRU', 'NPL', 'NLD', 'NZL', 'NIC', 'NER', 'NGA', 'MKD', 'NOR',
    'OMN', 'PAK', 'PLW', 'PAN', 'PNG', 'PRY', 'PER', 'PHL', 'POL', 'PRT',
    'QAT', 'ROU', 'RUS', 'RWA', 'KNA', 'LCA', 'VCT', 'WSM', 'SMR', 'STP',
    'SAU', 'SEN', 'SRB', 'SYC', 'SLE', 'SGP', 'SVK', 'SVN', 'SLB', 'SOM',
    'ZAF', 'SSD', 'ESP', 'LKA', 'SDN', 'SUR', 'SWE', 'CHE', 'SYR', 'TJK',
    'TZA', 'THA', 'TLS', 'TGO', 'TON', 'TTO', 'TUN', 'TUR', 'TKM', 'TUV',
    'UGA', 'UKR', 'ARE', 'GBR', 'USA', 'URY', 'UZB', 'VUT', 'VAT', 'VEN',
    'VNM', 'YEM', 'ZMB', 'ZWE'
}

In [None]:
project_root = next((p for p in [pathlib.Path.cwd()] + list(pathlib.Path.cwd().parents) 
                     if (p / 'notebooks' / 'WorldBank_acquisition.ipynb').exists()),
                    pathlib.Path.cwd())
os.chdir(project_root)

DATABASE_PATH = str(project_root / "data" / "database" / "tb_data.db")
RAW_DATA_DIR = project_root / "data" / "raw"

print(f" Project: {project_root}")
print(f" Database: {DATABASE_PATH}")

✓ Project: /Users/joshua/datascienceproject/notebooks
✓ Database: /Users/joshua/datascienceproject/notebooks/data/database/tb_data.db


In [10]:
db_path = pathlib.Path(DATABASE_PATH)
db_path.parent.mkdir(parents=True, exist_ok=True)
conn = sqlite3.connect(db_path)
cursor = conn.cursor()

cursor.execute("DROP TABLE IF EXISTS country_data")

cursor.execute("""
    CREATE TABLE IF NOT EXISTS country_data (
        id INTEGER PRIMARY KEY AUTOINCREMENT, 
        country_code TEXT, 
        gdp_value_US REAL,
        poverty_rate REAL,
        population REAL,
        FOREIGN KEY (country_code) REFERENCES country_metadata(country_code)
    )
""")
conn.commit()

In [11]:
un_member_codes = list(UN_MEMBERS)
results = []
max_workers = 30

def process_country(country):
    if country not in UN_MEMBERS:
        return None
    
    print(f'Processing: {country}')
    
    def fetch_indicator(indicator):
        try:
            url = f'http://api.worldbank.org/v2/country/{country}/indicator/{indicator}?format=json&date=2020:2024&per_page=10'
            response = requests.get(url, timeout=30)
            json_data = response.json()
            
            if len(json_data) > 1 and json_data[1]:
                total = sum(item['value'] if item['value'] is not None else 0 for item in json_data[1])
                return total / len(json_data[1]) if json_data[1] else None
            return None
        except:
            return None
    
    avg_GDP = fetch_indicator("NY.GDP.MKTP.CD")
    avg_pov = fetch_indicator("SI.POV.DDAY")
    avg_pop = fetch_indicator("SP.POP.TOTL")
    
    return (country, avg_GDP, avg_pov, avg_pop)

with ThreadPoolExecutor(max_workers=max_workers) as executor:
    futures = {executor.submit(process_country, country): country for country in un_member_codes}

    for future in as_completed(futures):
        try:
            result = future.result()
            if result is not None:
                results.append(result)
        except Exception as e:
            country = futures[future]
            print(f'Error processing {country}: {e}')

if results:
    print(f'Full insertion beginning. {len(results)} records to insert.')
    cursor.executemany("INSERT INTO country_data(country_code, gdp_value_US, poverty_rate, population) VALUES (?, ?, ?, ?)", results)
    conn.commit()
    print('Completed Insertion.')
else:
    print('No data to insert.')

Processing: ECU
Processing: STP
Processing: ZWE
Processing: MLT
Processing: SEN
Processing: ATG
Processing: ESP
Processing: TGO
Processing: VEN
Processing: CPV
Processing: TZA
Processing: VNM
Processing: AUT
Processing: HTI
Processing: BRN
Processing: AGO
Processing: KNA
Processing: IRQ
Processing: SVN
Processing: VUT
Processing: KEN
Processing: HRV
Processing: LTU
Processing: COM
Processing: BRB
Processing: THA
Processing: WSM
Processing: PRT
Processing: AFG
Processing: ZMB
Processing: GRC
Processing: PER
Processing: ISL
Processing: UGA
Processing: CZE
Processing: COG
Processing: BEN
Processing: ISR
Processing: TJK
Processing: DOM
Processing: SWZ
Processing: KHM
Processing: GNB
Processing: VCT
Processing: SDN
Processing: CAN
Processing: SRB
Processing: NAM
Processing: NIC
Processing: MDA
Processing: LBR
Processing: MRT
Processing: LIE
Processing: EGY
Processing: CIV
Processing: LCA
Processing: BOL
Processing: GHA
Processing: SYR
Processing: SYC
Processing: NZL
Processing: GNQ
Processi

In [13]:
cursor.execute("SELECT * FROM country_data")

<sqlite3.Cursor at 0x103ed0ac0>