In [1]:
import pandas as pd, requests

In [2]:
url = "https://api.worldbank.org/v2/country/{}/indicator/SL.UEM.TOTL.ZS?date=2015:2020&format=json"
# This url is the World Bank API for unemployment rates for the period 2015-2020 in JSON format.
# We leave the county with accolades in place in order to do a 'for loop' later on in the code.

In [3]:
scrapedCountries = ["USA","CHN","DEU","IND","JPN","PER","PHL","TUR","VNM","CHE","NGA"]
# These are the country codes for: the US, China, Germany, India, Japan, Peru, Philippines, Turkey, Vietman and Switzerland.
# I.e., the countries that we scraped from Yahoo finance.

In [4]:
unclean={}
for i in scrapedCountries:
  iterationUrl = url.format(i)
  unclean[i] = requests.get(iterationUrl).json()
unclean
# This creates an API URL for each country code in the list of scraped countries that we made.
# It then requests the information from each of these pages in JSON format.

{'CHE': [{'lastupdated': '2021-12-16',
   'page': 1,
   'pages': 1,
   'per_page': 50,
   'sourceid': '2',
   'sourcename': 'World Development Indicators',
   'total': 6},
  [{'country': {'id': 'CH', 'value': 'Switzerland'},
    'countryiso3code': 'CHE',
    'date': '2020',
    'decimal': 1,
    'indicator': {'id': 'SL.UEM.TOTL.ZS',
     'value': 'Unemployment, total (% of total labor force) (modeled ILO estimate)'},
    'obs_status': '',
    'unit': '',
    'value': 4.94},
   {'country': {'id': 'CH', 'value': 'Switzerland'},
    'countryiso3code': 'CHE',
    'date': '2019',
    'decimal': 1,
    'indicator': {'id': 'SL.UEM.TOTL.ZS',
     'value': 'Unemployment, total (% of total labor force) (modeled ILO estimate)'},
    'obs_status': '',
    'unit': '',
    'value': 4.39},
   {'country': {'id': 'CH', 'value': 'Switzerland'},
    'countryiso3code': 'CHE',
    'date': '2018',
    'decimal': 1,
    'indicator': {'id': 'SL.UEM.TOTL.ZS',
     'value': 'Unemployment, total (% of total labo

In [5]:
years = [0, 1, 2, 3, 4, 5]
# The value for the unemployment rate for each country code is listed 6 times (for each year in 2015-2020 inclusive).
# Thus, we create a list of the numbers 0 to 5 inclusive in order to create a secondary loop.
# E.g., the 0th element is 2020, the 1st is 2019 ... the 5th element is 2015.

In [6]:
unempRates=[]
for i in scrapedCountries:
  for j in years:
   unemployment=unclean[i][1][j]['value']
   unempRates.append(unemployment)
unempRates
# We are extracting panel data, thus we need a 'for loop' inside of another 'for loop'.
# It therefore loops through each country code for each year.
# We have a datapoint for each country code and year (hence 11x6 = 66 datapoints).
# Each unemployment rate is found in the first element of the outermost list - [1].

[8.31,
 3.67,
 3.9,
 4.36,
 4.87,
 5.28,
 5,
 4.6,
 4.3,
 4.4,
 4.5,
 4.6,
 4.31,
 3.14,
 3.38,
 3.75,
 4.12,
 4.62,
 7.11,
 5.27,
 5.33,
 5.41,
 5.51,
 5.56,
 2.97,
 2.4,
 2.4,
 2.8,
 3.1,
 3.4,
 6.24,
 3.03,
 3.18,
 3.35,
 3.38,
 2.92,
 3.36,
 2.24,
 2.34,
 2.55,
 2.69,
 3.07,
 13.92,
 13.67,
 10.89,
 10.82,
 10.84,
 10.24,
 2.27,
 2.04,
 1.16,
 1.87,
 1.85,
 1.85,
 4.94,
 4.39,
 4.71,
 4.8,
 4.92,
 4.8,
 9.01,
 8.53,
 8.45,
 8.39,
 7.06,
 4.31]

In [7]:
dataCountries=[]
for i in scrapedCountries:
  for j in years:
   names=unclean[i][1][j]['country']['value']
   dataCountries.append(names)
dataCountries
# Here we perform exactly the same double-loop, but for the country names.
# This is found in exactly the same place, but this 'value' is different.
# This 'value' is found in an additional JSON object under the key: 'country'.

['United States',
 'United States',
 'United States',
 'United States',
 'United States',
 'United States',
 'China',
 'China',
 'China',
 'China',
 'China',
 'China',
 'Germany',
 'Germany',
 'Germany',
 'Germany',
 'Germany',
 'Germany',
 'India',
 'India',
 'India',
 'India',
 'India',
 'India',
 'Japan',
 'Japan',
 'Japan',
 'Japan',
 'Japan',
 'Japan',
 'Peru',
 'Peru',
 'Peru',
 'Peru',
 'Peru',
 'Peru',
 'Philippines',
 'Philippines',
 'Philippines',
 'Philippines',
 'Philippines',
 'Philippines',
 'Turkey',
 'Turkey',
 'Turkey',
 'Turkey',
 'Turkey',
 'Turkey',
 'Vietnam',
 'Vietnam',
 'Vietnam',
 'Vietnam',
 'Vietnam',
 'Vietnam',
 'Switzerland',
 'Switzerland',
 'Switzerland',
 'Switzerland',
 'Switzerland',
 'Switzerland',
 'Nigeria',
 'Nigeria',
 'Nigeria',
 'Nigeria',
 'Nigeria',
 'Nigeria']

In [8]:
myDates=[]
for i in scrapedCountries:
  for j in years:
   dates=unclean[i][1][j]['date']
   myDates.append(dates)
myDates
# Again this double-loop is identical to what we did to get the unemployment rates...
# apart from the fact that the dates are now headed under 'date' instead.

['2020',
 '2019',
 '2018',
 '2017',
 '2016',
 '2015',
 '2020',
 '2019',
 '2018',
 '2017',
 '2016',
 '2015',
 '2020',
 '2019',
 '2018',
 '2017',
 '2016',
 '2015',
 '2020',
 '2019',
 '2018',
 '2017',
 '2016',
 '2015',
 '2020',
 '2019',
 '2018',
 '2017',
 '2016',
 '2015',
 '2020',
 '2019',
 '2018',
 '2017',
 '2016',
 '2015',
 '2020',
 '2019',
 '2018',
 '2017',
 '2016',
 '2015',
 '2020',
 '2019',
 '2018',
 '2017',
 '2016',
 '2015',
 '2020',
 '2019',
 '2018',
 '2017',
 '2016',
 '2015',
 '2020',
 '2019',
 '2018',
 '2017',
 '2016',
 '2015',
 '2020',
 '2019',
 '2018',
 '2017',
 '2016',
 '2015']

In [9]:
inflationDF = pd.DataFrame(unempRates, dataCountries)
inflationDF
yearsDF = pd.DataFrame(myDates)
yearsDF
bestDF = inflationDF.reset_index().join(yearsDF, rsuffix='lol')
bestDF
# Here I join the three columns together in a dataframe, and reset the index.

Unnamed: 0,index,0,0lol
0,United States,8.31,2020
1,United States,3.67,2019
2,United States,3.90,2018
3,United States,4.36,2017
4,United States,4.87,2016
...,...,...,...
61,Nigeria,8.53,2019
62,Nigeria,8.45,2018
63,Nigeria,8.39,2017
64,Nigeria,7.06,2016


In [10]:
bestDF.rename(columns={'index': 'Country_name'}, inplace=True)
bestDF.rename(columns={'0lol': 'Year'}, inplace=True)
bestDF.rename(columns={'0':'Unemployment_rate'}, inplace=True)
bestDF
# Here I rename each of the columns to something clearer.

Unnamed: 0,Country_name,Unemployment_rate,Year
0,United States,8.31,2020
1,United States,3.67,2019
2,United States,3.90,2018
3,United States,4.36,2017
4,United States,4.87,2016
...,...,...,...
61,Nigeria,8.53,2019
62,Nigeria,8.45,2018
63,Nigeria,8.39,2017
64,Nigeria,7.06,2016


In [11]:
bestDF.to_csv('worldBankUnemploymentAPI.csv')
# This then saves my dataframe as a CSV file.