In [1]:
# Librairies
import pandas as pd
import numpy as np
import requests

In [2]:
# Funtion to get the data needed for the analysis from the api

def get_spi_data():
    api_url = "http://api.worldbank.org/v2/country/all/indicator/SPI.INDEX"
    

    # Parameters for the API
    params = {
        'format': 'json',     # We want the response in json format
        'per_page': '10000'   # We are setting this number high so that we can retrieve all data in one request
    }
    
    # Sending a a get request to the api
    response = requests.get(api_url, params=params)
    data = response.json()
    
    # Checking if the request was successful
    if isinstance(data, list) and len(data) > 1:
        entries = data[1]
    else:
        raise ValueError("Data not found")
    
    # Let's initialize an empty list to store the results
    records = []
    
    # Let's loop over the entries
    for entry in entries:
        if 'country' in entry and 'date' in entry and 'value' in entry:
            # Add to needed field to a dictionnary
            records.append({
                'Country': entry['country']['value'],  # Country name
                'Year': entry['date'],                # Year
                'SPI.INDEX': entry['value']           # SPI index value
            })
    
    # Let's create a dataframe from the records list
    performance_indicator = pd.DataFrame(records)
    
    # Return the DataFrame
    return performance_indicator

# Using the function to get the data and creating a dataframe
performance_indicator = get_spi_data()

# Diplaying random rows from the dataframe
performance_indicator.head(10)

Unnamed: 0,Country,Year,SPI.INDEX
0,Afghanistan,2022,58.014167
1,Afghanistan,2021,58.014167
2,Afghanistan,2020,54.396667
3,Afghanistan,2019,49.75625
4,Afghanistan,2018,49.845
5,Afghanistan,2017,42.577917
6,Afghanistan,2016,37.222917
7,Afghanistan,2015,
8,Afghanistan,2014,
9,Afghanistan,2013,


In [3]:
performance_indicator.rename(columns={'SPI.INDEX': 'Performance'}, inplace=True)

In [4]:
# Let's count the number of missing values
missing_values_count = performance_indicator.isna().sum()

print(missing_values_count)

Country           0
Year              0
Performance    2886
dtype: int64


In [5]:
# Sorting the dataframe by Country and Year
performance_indicator.sort_values(by=['Country', 'Year'], ascending=[True, False], inplace=True)

# Forward fill and backward fill to replace missing values
performance_indicator['Performance'] = performance_indicator.groupby('Country')['Performance'].ffill().bfill()

# Diplaying random rows from the dataframe
performance_indicator.head(10)

Unnamed: 0,Country,Year,Performance
0,Afghanistan,2022,58.014167
1,Afghanistan,2021,58.014167
2,Afghanistan,2020,54.396667
3,Afghanistan,2019,49.75625
4,Afghanistan,2018,49.845
5,Afghanistan,2017,42.577917
6,Afghanistan,2016,37.222917
7,Afghanistan,2015,37.222917
8,Afghanistan,2014,37.222917
9,Afghanistan,2013,37.222917


In [6]:
# Converting country to upper cases
performance_indicator['Country'] = performance_indicator['Country'].str.upper()

# Printing the first 5 rows
performance_indicator.head(5)

Unnamed: 0,Country,Year,Performance
0,AFGHANISTAN,2022,58.014167
1,AFGHANISTAN,2021,58.014167
2,AFGHANISTAN,2020,54.396667
3,AFGHANISTAN,2019,49.75625
4,AFGHANISTAN,2018,49.845


In [7]:
performance_indicator.to_csv('performance_indicator.csv')