In [143]:
import pandas as pd
from pathlib import Path
import requests
import pandas as pd
import time
from typing import List, Dict, Optional
import json

In [None]:
class PIPDataDownloader:
    """
    Downloads poverty and inequality data from World Bank's Poverty and Inequality Platform (PIP) API
    """
    
    def __init__(self, base_url: str = "https://api.worldbank.org/pip/v1"):
        self.base_url = base_url
        self.session = requests.Session()

    def _unused_get_countries(self) -> pd.DataFrame:
        """Get list of all available countries"""
        # Use aux endpoint to get country metadata
        url = f"{self.base_url}/aux"
        params = {"format": "json"}
        
        try:
            response = self.session.get(url, params=params)
            response.raise_for_status()
            data = response.json()
            
            # Try to extract country information from various possible structures
            countries = []
            
            # The aux endpoint might return country info in different formats
            if isinstance(data, dict):
                # Look for country data in common locations
                country_data = data.get('country', data.get('countries', []))
                if not country_data and 'pip_countries' in data:
                    country_data = data['pip_countries']
                
                for country in country_data:
                    countries.append({
                        'country_code': country.get('country_code'),
                        'country_name': country.get('country_name'),
                        'region_name': country.get('region_name'),
                        'income_group': country.get('income_group')
                    })
            elif isinstance(data, list):
                # If data is directly a list of countries
                for country in data:
                    if isinstance(country, dict):
                        countries.append({
                            'country_code': country.get('country_code'),
                            'country_name': country.get('country_name'),
                            'region_name': country.get('region_name'),
                            'income_group': country.get('income_group')
                        })
            
            return pd.DataFrame(countries)
            
        except requests.RequestException as e:
            print(f"Error fetching countries from aux endpoint: {e}")
            # Fallback: get countries from a sample query
            return self._get_countries_fallback()
    
    def _unused_get_countries_fallback(self) -> pd.DataFrame:
        """Fallback method to get countries from sample poverty data"""
        url = f"{self.base_url}/pip"
        params = {
            "format": "json",
            "poverty_line": 2.15,
            "fill_gaps": "false"  # Get only survey years
        }
        
        try:
            response = self.session.get(url, params=params)
            response.raise_for_status()
            data = response.json()
            
            countries = []
            country_set = set()
            
            if isinstance(data, list):
                for item in data:
                    country_code = item.get('country_code')
                    if country_code and country_code not in country_set:
                        countries.append({
                            'country_code': country_code,
                            'country_name': item.get('country_name'),
                            'region_name': item.get('region_name'),
                            'income_group': item.get('income_group')
                        })
                        country_set.add(country_code)
            
            return pd.DataFrame(countries)
            
        except requests.RequestException as e:
            print(f"Error in fallback country retrieval: {e}")
            return pd.DataFrame()
    
    
    def _unused_get_survey_data_only(self,
                        country_codes: Optional[List[str]] = None,
                        poverty_lines: List[float] = [2.15,]) -> pd.DataFrame:
        """
        Get only survey-based data (no interpolation/extrapolation)
        
        Args:
            country_codes: List of country codes (None for all)
            poverty_lines: List of poverty lines to retrieve
        """
        all_data = []
        
        if country_codes is None:
            countries_df = self.get_countries()
            if countries_df.empty:
                print("Could not get country list")
                return pd.DataFrame()
            country_codes = countries_df['country_code'].dropna().tolist()
        
        print(f"Downloading survey data for {len(country_codes)} countries...")
        
        for i, country_code in enumerate(country_codes):
            if i % 10 == 0:
                print(f"Processing country {i+1}/{len(country_codes)}: {country_code}")
            
            for poverty_line in poverty_lines:
                poverty_data = self.get_poverty_data(
                    country_code=country_code,
                    poverty_line=poverty_line,
                    fill_gaps=False  # Only survey data
                )
                
                if not poverty_data.empty:
                    poverty_data['poverty_line'] = poverty_line
                    all_data.append(poverty_data)
            
            # Rate limiting
            time.sleep(0.1)
        
        if all_data:
            combined_df = pd.concat(all_data, ignore_index=True)
            return combined_df
        else:
            return pd.DataFrame()
    
    def get_complete_data(self,
                        country_codes: Optional[List[str]] = None,
                        poverty_lines: List[float] = [2.15, 3.65, 6.85],
                        include_interpolated: bool = True) -> pd.DataFrame:
        """
        Get complete poverty data with option to include interpolated estimates
        
        Args:
            country_codes: List of country codes (None for all)
            poverty_lines: List of poverty lines to retrieve
            include_interpolated: Whether to include gap-filled estimates
        """
        all_data = []
        
        if country_codes is None:
            countries_df = self.get_countries()
            if countries_df.empty:
                print("Could not get country list")
                return pd.DataFrame()
            country_codes = countries_df['country_code'].dropna().tolist()
        
        print(f"Downloading complete data for {len(country_codes)} countries...")
        print(f"Include interpolated data: {include_interpolated}")
        
        for i, country_code in enumerate(country_codes):
            print(f"Processing country {i+1}/{len(country_codes)}: {country_code}")
            
            for poverty_line in poverty_lines:
                poverty_data = self.get_poverty_data(
                    country_code=country_code,
                    poverty_line=poverty_line,
                    fill_gaps=include_interpolated
                )
                
                if not poverty_data.empty:
                    poverty_data['poverty_line'] = poverty_line
                    poverty_data['data_type'] = 'interpolated' if include_interpolated else 'survey_only'
                    all_data.append(poverty_data)
            
            # Rate limiting
            time.sleep(0.1)
        
        if all_data:
            combined_df = pd.concat(all_data, ignore_index=True)
            return combined_df
        else:
            return pd.DataFrame()


    def get_poverty_data(self, 
                        country_code: Optional[str] = None,
                        poverty_line: float = 2.15,
                        year: Optional[int] = None,
                        fill_gaps: bool = False,
                        ppp_version: str = "2017") -> pd.DataFrame:
        """
        Get poverty data for specified parameters
        
        Args:
            country_code: ISO3 country code (None for all countries)
            poverty_line: Poverty line in PPP USD
            year: Specific year (None for all available years)
            fill_gaps: Whether to include interpolated/extrapolated estimates
            ppp_version: PPP version (2017 or 2011)
        """
        url = f"{self.base_url}/pip"
        
        params = {
            "format": "json",
            "poverty_line": poverty_line,
            "fill_gaps": "true" if fill_gaps else "false",
            "ppp_version": ppp_version
        }
        
        if country_code:
            params["country"] = country_code
        if year:
            params["year"] = year
            
        try:
            response = self.session.get(url, params=params)
            response.raise_for_status()
            data = response.json()
            
            if isinstance(data, list):
                return pd.DataFrame(data)
            else:
                print(f"Unexpected response format: {type(data)}")
                return pd.DataFrame()
                
        except requests.RequestException as e:
            print(f"Error fetching poverty data: {e}")
            return pd.DataFrame() 

    
    def unused_get_country_time_series(self, country_code: str, poverty_line: float = 2.15) -> pd.DataFrame:
        """Get all available data for a specific country"""
        survey_data = self.get_poverty_data(
            country_code=country_code,
            poverty_line=poverty_line,
            fill_gaps=False
        )
        
        interpolated_data = self.get_poverty_data(
            country_code=country_code,
            poverty_line=poverty_line,
            fill_gaps=True
        )
        
        # Mark data types
        if not survey_data.empty:
            survey_data['data_type'] = 'survey'
        if not interpolated_data.empty:
            interpolated_data['data_type'] = 'interpolated'
        
        # Combine and return
        data_frames = [df for df in [survey_data, interpolated_data] if not df.empty]
        if data_frames:
            return pd.concat(data_frames, ignore_index=True)
        else:
            return pd.DataFrame()


In [168]:
import pandas as pd
import numpy as np
from typing import Tuple, Optional

def add_poverty_rates(country_list: pd.DataFrame, 
                     pip_data: pd.DataFrame,
                     poverty_line: float = 2.15) -> pd.DataFrame:
    """
    Add poverty rates to country_list using pip_data with interpolation logic.
    
    Args:
        country_list: DataFrame with columns 'Country', 'Year', 'Country Code'
        pip_data: DataFrame with PIP poverty data (non-interpolated)
        poverty_line: Poverty line to use (default 2.15)
    
    Returns:
        DataFrame with added 'poverty_rate' and 'estimate_type' columns
    """
    
    # Filter pip_data for the specified poverty line
    pip_filtered = pip_data[pip_data['poverty_line'] == poverty_line].copy()
    
    # Create a copy of country_list to avoid modifying the original
    result = country_list.copy()
    result['poverty_rate'] = np.nan
    result['estimate_type'] = ''
    
    # Group pip data by country for efficient lookup
    pip_by_country = pip_filtered.groupby('country_code')
    
    for idx, row in result.iterrows():
        country_code = row['Country Code']
        try:
            skip = np.isnan(row['Year'])
        except TypeError:
            skip  =False
        if skip:
            poverty_rate = np.nan
            estimate_type = 'no year specified'
        else:
            target_year = int(row['Year'])  # Ensure target_year is an integer
            
            # Get pip data for this country
            if country_code not in pip_by_country.groups:
                result.loc[idx, 'estimate_type'] = 'no data available'
                continue
                
            country_data = pip_by_country.get_group(country_code).copy()
            country_data = country_data.sort_values('reporting_year')
            
            # Check for exact year match
            exact_match = country_data[country_data['reporting_year'] == target_year]
            if not exact_match.empty:
                result.loc[idx, 'poverty_rate'] = exact_match.iloc[0]['headcount']
                result.loc[idx, 'estimate_type'] = 'exact year'
                continue
            
            # No exact match - need to interpolate or use nearest
            poverty_rate, estimate_type = get_poverty_estimate(country_data, target_year)
        result.loc[idx, 'poverty_rate'] = poverty_rate
        result.loc[idx, 'estimate_type'] = estimate_type
    
    return result

def get_poverty_estimate(country_data: pd.DataFrame, target_year: int) -> Tuple[Optional[float], str]:
    """
    Get poverty estimate for a target year using interpolation or nearest year logic.
    
    Args:
        country_data: PIP data for a single country, sorted by year
        target_year: Year for which to estimate poverty rate
    
    Returns:
        Tuple of (poverty_rate, estimate_type)
    """
    
    if country_data.empty:
        return None, 'no data available'
    
    # Ensure target_year is an integer and years are integers
    target_year = int(target_year)
    years = country_data['reporting_year'].astype(int).values
    rates = country_data['headcount'].values
    
    # Find years before and after target year
    years_before = years[years < target_year]
    years_after = years[years > target_year]
    
    # Case 1: Can interpolate (have years both before and after)
    if len(years_before) > 0 and len(years_after) > 0:
        # Get closest years before and after
        year_before = years_before.max()
        year_after = years_after.min()
        
        # Get corresponding poverty rates
        rate_before = country_data[country_data['reporting_year'] == year_before]['headcount'].iloc[0]
        rate_after = country_data[country_data['reporting_year'] == year_after]['headcount'].iloc[0]
        
        # Linear interpolation
        weight = (target_year - year_before) / (year_after - year_before)
        interpolated_rate = rate_before + weight * (rate_after - rate_before)
        
        estimate_type = f'interpolated using {year_before} and {year_after}'
        return interpolated_rate, estimate_type
    
    # Case 2: Only extrapolation possible - use nearest year instead
    else:
        # Find nearest year
        year_distances = np.abs(years - target_year)
        nearest_idx = np.argmin(year_distances)
        nearest_year = years[nearest_idx]
        nearest_rate = rates[nearest_idx]
        
        estimate_type = f'from nearest year: {nearest_year}'
        return nearest_rate, estimate_type

def analyze_estimate_quality(result_df: pd.DataFrame) -> pd.DataFrame:
    """
    Analyze the quality and types of estimates in the result.
    
    Args:
        result_df: DataFrame returned by add_poverty_rates
    
    Returns:
        Summary DataFrame with estimate type counts and statistics
    """
    
    # Count estimate types
    estimate_counts = result_df['estimate_type'].value_counts()
    
    # Calculate statistics for each type
    summary_data = []
    
    for estimate_type, count in estimate_counts.items():
        subset = result_df[result_df['estimate_type'] == estimate_type]
        
        if estimate_type == 'exact year':
            avg_rate = subset['poverty_rate'].mean()
            summary_data.append({
                'estimate_type': estimate_type,
                'count': count,
                'percentage': count / len(result_df) * 100,
                'avg_poverty_rate': avg_rate,
                'notes': 'Direct survey data'
            })
        elif 'interpolated' in estimate_type:
            avg_rate = subset['poverty_rate'].mean()
            summary_data.append({
                'estimate_type': 'interpolated (all)',
                'count': count,
                'percentage': count / len(result_df) * 100,
                'avg_poverty_rate': avg_rate,
                'notes': 'Linear interpolation between surveys'
            })
        elif 'nearest year' in estimate_type:
            avg_rate = subset['poverty_rate'].mean()
            summary_data.append({
                'estimate_type': 'nearest year (all)',
                'count': count,
                'percentage': count / len(result_df) * 100,
                'avg_poverty_rate': avg_rate,
                'notes': 'Nearest survey year (no extrapolation)'
            })
        elif estimate_type == 'no data available':
            summary_data.append({
                'estimate_type': estimate_type,
                'count': count,
                'percentage': count / len(result_df) * 100,
                'avg_poverty_rate': np.nan,
                'notes': 'Country not in PIP dataset'
            })
    
    return pd.DataFrame(summary_data)


In [187]:
wb_non_pip_data = pd.read_csv(
    '/Users/leo/Documents/gpl/eop/data/world_bank_extreme_poverty_rates/API_11_DS2_en_csv_v2_3119.csv'
)

country_list = pd.read_csv(
    '/Users/leo/Documents/gpl/eop/data/world_bank_pip/relevant_country_list.csv',
    dtype=str
)
country_name_to_code = wb_non_pip_data[['Country Name', 'Country Code']].drop_duplicates()
if False:
    wb_pip_data = pd.read_csv(
        '/Users/leo/Documents/gpl/eop/data/world_bank_pip/cp-download.csv'
    )
    wb_pip_data = pd.merge(
        wb_pip_data, country_name_to_code, left_on='country_code', right_on='Country Code', how='left'
    ).drop(columns='Country Code')


country_list = pd.merge(
    country_list, country_name_to_code, left_on='Country', right_on='Country Name', how='left'
).drop(columns='Country Name')

In [None]:
countries = country_list['Country Code'].values
downloader = PIPDataDownloader()

pip_data = downloader.get_complete_data(
    country_codes=countries,
    poverty_lines=[2.15],
    include_interpolated=False
)

if not pip_data.empty:
    print(f"Complete data: {len(pip_data)} records")    

Downloading complete data for 48 countries...
Include interpolated data: False
Processing country 1/48: NGA
Processing country 2/48: COD
Processing country 3/48: IND
Processing country 4/48: TZA
Processing country 5/48: MOZ
Processing country 6/48: MDG
Processing country 7/48: UGA
Processing country 8/48: ETH
Processing country 9/48: MWI
Processing country 10/48: VEN
Processing country 11/48: NER
Processing country 12/48: ZAF
Processing country 13/48: SDN
Processing country 14/48: SSD
Processing country 15/48: KEN
Processing country 16/48: SOM
Error fetching poverty data: 404 Client Error: Not Found for url: https://api.worldbank.org/pip/v1/pip?format=json&poverty_line=2.15&fill_gaps=false&ppp_version=2017&country=SOM
Processing country 17/48: MEX
Processing country 18/48: BGD
Processing country 19/48: PAK
Processing country 20/48: ZWE
Processing country 21/48: IDN
Processing country 22/48: GHA
Processing country 23/48: TCD
Processing country 24/48: COL
Processing country 25/48: PHL
Pr

  combined_df = pd.concat(all_data, ignore_index=True)


In [None]:
pip_data.to_csv(
    '/Users/leo/Documents/gpl/eop/data/world_bank_pip/selected_countries_from_api.csv',
    index=False
)

In [184]:
pip_data = pd.read_csv(
    '/Users/leo/Documents/gpl/eop/data/world_bank_pip/selected_countries_from_api.csv'
)

In [188]:
# Add poverty rates
result = add_poverty_rates(country_list, pip_data)

print("\nResult with poverty rates:")
display(result)

result[['Country', 'poverty_rate', 'estimate_type']].to_csv(
    '/Users/leo/Documents/gpl/eop/data/world_bank_pip/relevant_countries_with_pip_poverty_rates.csv',
    index=False
)

# Analyze estimate quality
# quality_analysis = analyze_estimate_quality(result)
# print("\nEstimate quality analysis:")
# print(quality_analysis)



Result with poverty rates:


Unnamed: 0,Country,Year,Country Code,poverty_rate,estimate_type
0,Malawi,2020.0,MWI,0.7006,from nearest year: 2019
1,India,2022.0,IND,0.0235,exact year
2,"Congo, Dem. Rep.",2016.0,COD,0.74315,interpolated using 2012 and 2020
3,Nigeria,2018.0,NGA,0.3086,exact year
4,Bangladesh,2016.0,BGD,0.1347,exact year
5,Ethiopia,2021.0,ETH,0.3196,exact year
6,Togo,2018.0,TGO,0.2843,exact year
7,Tanzania,2020.0,TZA,0.4495,from nearest year: 2018
8,Uganda,2019.0,UGA,0.4212,exact year
9,Albania,2012.0,ALB,0.0062,exact year


# Archive

In [130]:
downloader = PIPDataDownloader()
ethiopia_data = downloader.get_country_time_series("ETH", poverty_line=2.15)
if not ethiopia_data.empty:
    print(f"Ethiopia data: {len(ethiopia_data)} records")
    print(f"Years: {ethiopia_data['reporting_year'].min()} - {ethiopia_data['reporting_year'].max()}")
    print(f"Data types: {ethiopia_data['data_type'].unique()}")
    print(ethiopia_data[['reporting_year', 'headcount', 'data_type']])

complete_data = downloader.get_complete_data(
    country_codes=['ETH'],
    poverty_lines=[2.15],
    include_interpolated=False
)


  return pd.concat(data_frames, ignore_index=True)


Ethiopia data: 52 records
Years: 1981 - 2025
Data types: ['survey' 'interpolated']
    reporting_year  headcount     data_type
0             1981     0.7126        survey
1             1995     0.6921        survey
2             1999     0.5803        survey
3             2004     0.3458        survey
4             2010     0.3075        survey
5             2015     0.2698        survey
6             2021     0.3196        survey
7             1981     0.6170  interpolated
8             1982     0.6127  interpolated
9             1983     0.6127  interpolated
10            1984     0.6534  interpolated
11            1985     0.6731  interpolated
12            1986     0.6405  interpolated
13            1987     0.6280  interpolated
14            1988     0.6435  interpolated
15            1989     0.6529  interpolated
16            1990     0.6785  interpolated
17            1991     0.7237  interpolated
18            1992     0.7316  interpolated
19            1993     0.7185  interp

In [None]:
if False:

    """Example usage demonstrating different approaches"""

    downloader = PIPDataDownloader()

    print("=== Testing single country (Ethiopia) ===")
    ethiopia_data = downloader.get_country_time_series("ETH", poverty_line=2.15)
    if not ethiopia_data.empty:
        print(f"Ethiopia data: {len(ethiopia_data)} records")
        print(f"Years: {ethiopia_data['reporting_year'].min()} - {ethiopia_data['reporting_year'].max()}")
        print(f"Data types: {ethiopia_data['data_type'].unique()}")
        print(ethiopia_data[['reporting_year', 'headcount', 'data_type']].head(10))

    print("\n=== Getting survey data only (no interpolation) ===")
    # Test with a few countries first
    test_countries = ["BRA", "IND", "USA", "CHN", "NGA"]
    survey_data = downloader.get_survey_data_only(
        country_codes=test_countries,
        poverty_lines=[2.15]
    )

    if not survey_data.empty:
        print(f"Survey data: {len(survey_data)} records")
        print(f"Countries: {survey_data['country_name'].nunique()}")
        print(f"Year range: {survey_data['reporting_year'].min()} - {survey_data['reporting_year'].max()}")
        
        # Save survey data
        survey_data.to_csv("pip_survey_data_only.csv", index=False)
        print("Survey data saved to pip_survey_data_only.csv")

    print("\n=== Getting complete data with interpolation ===")
    complete_data = downloader.get_complete_data(
        country_codes=test_countries,
        poverty_lines=[2.15],
        include_interpolated=True
    )

    if not complete_data.empty:
        print(f"Complete data: {len(complete_data)} records")
        complete_data.to_csv("pip_complete_data.csv", index=False)
        print("Complete data saved to pip_complete_data.csv")
        
        # Compare survey vs interpolated counts
        data_type_counts = complete_data['data_type'].value_counts()
        print(f"Data type breakdown: {data_type_counts.to_dict()}")



In [95]:
poverty_data.columns

Index(['region_name', 'region_code', 'country_name', 'country_code',
       'reporting_year', 'reporting_level', 'survey_acronym',
       'survey_coverage', 'survey_year', 'welfare_type',
       'survey_comparability', 'comparable_spell', 'poverty_line', 'headcount',
       'poverty_gap', 'poverty_severity', 'watts', 'mean', 'median', 'mld',
       'gini', 'polarization', 'decile1', 'decile2', 'decile3', 'decile4',
       'decile5', 'decile6', 'decile7', 'decile8', 'decile9', 'decile10',
       'cpi', 'ppp', 'reporting_pop', 'reporting_gdp', 'reporting_pce',
       'is_interpolated', 'distribution_type', 'estimation_type', 'spl', 'spr',
       'pg', 'estimate_type'],
      dtype='object')

In [96]:
poverty_data[poverty_data.country_name == 'Ethiopia'][['reporting_year', 'is_interpolated', 'headcount']]

Unnamed: 0,reporting_year,is_interpolated,headcount
2160,1981,True,0.6725
2161,1982,True,0.6678
2162,1983,True,0.6677
2163,1984,True,0.7073
2164,1985,True,0.7233
2165,1986,True,0.6957
2166,1987,True,0.6829
2167,1988,True,0.6977
2168,1989,True,0.707
2169,1990,True,0.7261


In [97]:
eth_poverty_data = pd.read_csv(
    '/Users/leo/Documents/gpl/eop/data/world_bank_pip/ETH_PovertyRate_20250401_2017_01_02_PROD_2025-06-11.csv'
)

In [None]:
eth_poverty_data

Unnamed: 0,Years,Poverty rate (%),Population living in poverty (Million),Poverty Line
0,1995,69.21,40.15,2.15
1,1999,58.03,38.53,2.15
2,2004,34.58,26.7,2.15
3,2010,30.75,28.24,2.15
4,2015,26.98,28.41,2.15
5,2021,31.96,39.04,2.15


In [None]:
for _, row in country_list.iterrows():
    country_data = wb_rates[wb_rates['Country Name'].str.lower() == row['Country'].lower()]
    year = int(row['Year']) if not pd.isna(row['Year']) else None
    if len(country_data) > 0 and year and str(year) in country_data.columns:
        poverty_rate = country_data[str(year)].values[0]
        if pd.isna(poverty_rate):
            # Interpolate missing values
            numeric_data = country_data.loc[:, '1960':'2024'].T
            numeric_data.columns = ['Value']
            numeric_data = numeric_data.dropna().reset_index()
            numeric_data['index'] = numeric_data['index'].astype(int)
            interpolated_value = numeric_data.set_index('index').reindex(range(1960, 2025)).interpolate(method='linear').loc[year, 'Value']
            poverty_rate = interpolated_value if not pd.isna(interpolated_value) else None
        country_list.loc[country_list['Country'] == row['Country'], 'interpolated poverty Rate'] = poverty_rate
        country_list.loc[country_list['Country'] == row['Country'], 'Year of interpolated poverty rate'] = str(year)
    else:
        poverty_rate = None
        print(f'No data found for {row["Country"]}')

No data found for Indonesia
No data found for Chad
No data found for Vietnam


In [None]:
for _, row in country_list.iterrows():
    country_data = wb_rates[wb_rates['Country Name'].str.lower() == row['Country'].lower()]
    year = int(row['Year']) if not pd.isna(row['Year']) else None
    if len(country_data) > 0 and year and str(year) in country_data.columns:
        poverty_rate = country_data[str(year)].values[0]
        if pd.isna(poverty_rate):
            # Interpolate missing values
            numeric_data = country_data.loc[:, '1960':'2024'].T
            numeric_data.columns = ['Value']
            numeric_data = numeric_data.dropna().reset_index()
            numeric_data['index'] = numeric_data['index'].astype(int)
            interpolated_value = numeric_data.set_index('index').reindex(range(1960, 2025)).interpolate(method='linear').loc[year, 'Value']
            poverty_rate = interpolated_value if not pd.isna(interpolated_value) else None
        country_list.loc[country_list['Country'] == row['Country'], 'interpolated poverty Rate'] = poverty_rate
        country_list.loc[country_list['Country'] == row['Country'], 'Year of interpolated poverty rate'] = str(year)
    else:
        poverty_rate = None
        print(f'No data found for {row["Country"]}')

In [None]:
country_list.to_csv(
    '/Users/leo/Documents/gpl/eop/data/world_bank_extreme_poverty_rates/relevant_country_list_with_poverty_values.csv',
    index=False
)