## Importing all packages needed for BeautifulSoup

In [8]:
import json
from bs4 import BeautifulSoup
import requests
import wbdata
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pycountry
 
from scipy import stats



## Using BeautifulSoup to retrieve additional data from the website of OECD

In [9]:

response = requests.get('https://stats.oecd.org/restsdmx/sdmx.ashx/GetData/GIDDB2014/ALL.AUS+AUT+BEL+CAN+CHL+CZE+DNK+EST+FIN+FRA+DEU+GRC+HUN+ISL+IRL+ISR+ITA+JPN+KOR+LVA+LTU+LUX+MEX+NLD+NZL+NOR+POL+PRT+SVK+SVN+ESP+SWE+CHE+TUR+GBR+USA+NMEC+AFG+ALB+DZA+AGO+ARG+ARM+AZE+BHR+BGD+BLR+BEN+BTN+BOL+BIH+BWA+BRA+BGR+BFA+BDI+KHM+CMR+CAF+TCD+CHN+COL+COG+COD+CRI+CIV+HRV+CUB+CYP+DOM+ECU+EGY+SLV+GNQ+ERI+ETH+FJI+MKD+GAB+GMB+GEO+GHA+GTM+GIN+GNB+HTI+HND+HKG+IND+IDN+IRN+IRQ+JAM+JOR+KAZ+KEN+PRK+KWT+KGZ+LAO+LBN+LSO+LBR+LBY+MDG+MWI+MYS+MLI+MRT+MUS+MDA+MNG+MAR+MOZ+MMR+NAM+NPL+NIC+NER+NGA+PSE+OMN+PAK+PAN+PNG+PRY+PER+PHL+QAT+ROU+RUS+RWA+SAU+SEN+SRB+SLE+SGP+SOM+ZAF+LKA+SDN+SWZ+SYR+TJK+TZA+THA+TLS+TGO+TTO+TUN+TKM+UGA+UKR+ARE+URY+UZB+VEN+VNM+YEM+ZMB+ZWE.AIC.FC_2/all?startTime=2014&endTime=2014')
data = response.content
soup = BeautifulSoup(data, "lxml")

#print(soup)

### Using List comprehension and 'soup.find_all'  to create lists  in order to make a dataframe

In [10]:

list_country = [x.get_attribute_list('value')[0] for x in soup.find_all("value", attrs={ "concept" : "LOCATION"})]
list_date = [x.text for x in soup.find_all("time")]
list_variable = [x.get_attribute_list('value')[0] for x in soup.find_all("value", attrs={ "concept" : "VAR"})]
list_value = [x.get_attribute_list('value')[0] for x in soup.find_all("obsvalue", attrs={ "value" : True})]



### Make new list with countries from countries from old list, 'list_country' converting the ISO-two digit countrycode to ISO-three digit country code

In [11]:
new_countrylist = [ pycountry.countries.get(alpha_3=i).alpha_2 for i in list_country ]
new_countrylist

['AU',
 'AT',
 'BE',
 'CA',
 'CZ',
 'DK',
 'FI',
 'FR',
 'DE',
 'GR',
 'HU',
 'IS',
 'IE',
 'IT',
 'JP',
 'KR',
 'LU',
 'MX',
 'NL',
 'NZ',
 'NO',
 'PL',
 'PT',
 'SK',
 'ES',
 'SE',
 'CH',
 'TR',
 'GB',
 'US',
 'AF',
 'AL',
 'DZ',
 'AO',
 'AR',
 'AM',
 'AZ',
 'BH',
 'BD',
 'BY',
 'BJ',
 'BT',
 'BO',
 'BA',
 'BW',
 'BR',
 'BG',
 'BF',
 'BI',
 'KH',
 'CM',
 'CF',
 'TD',
 'CL',
 'CN',
 'CO',
 'CG',
 'CR',
 'CI',
 'HR',
 'CU',
 'CY',
 'KP',
 'CD',
 'DO',
 'EC',
 'EG',
 'SV',
 'GQ',
 'ER',
 'EE',
 'ET',
 'FJ',
 'GA',
 'GM',
 'GE',
 'GH',
 'GT',
 'GN',
 'GW',
 'HT',
 'HN',
 'HK',
 'IN',
 'ID',
 'IR',
 'IQ',
 'IL',
 'JM',
 'JO',
 'KZ',
 'KE',
 'KW',
 'KG',
 'LA',
 'LV',
 'LB',
 'LS',
 'LR',
 'LY',
 'LT',
 'MK',
 'MG',
 'MW',
 'MY',
 'ML',
 'MR',
 'MU',
 'MD',
 'MN',
 'MA',
 'MZ',
 'MM',
 'NA',
 'NP',
 'NI',
 'NE',
 'NG',
 'PS',
 'OM',
 'PK',
 'PA',
 'PG',
 'PY',
 'PE',
 'PH',
 'QA',
 'RO',
 'RU',
 'RW',
 'SA',
 'SN',
 'SL',
 'SG',
 'SI',
 'SO',
 'ZA',
 'LK',
 'SD',
 'SZ',
 'SY',
 'TJ',
 'TZ',

### Creating the 'first' DataFrame

In [12]:
OECD_df = pd.DataFrame({'country':new_countrylist,'date':list_date,'indicator':list_variable, 'value':list_value})
OECD_df

Unnamed: 0,country,date,indicator,value
0,AU,2014,FC_2,0.01
1,AT,2014,FC_2,0
2,BE,2014,FC_2,0.01
3,CA,2014,FC_2,0.03
4,CZ,2014,FC_2,0
5,DK,2014,FC_2,0
6,FI,2014,FC_2,0.01
7,FR,2014,FC_2,0.01
8,DE,2014,FC_2,0
9,GR,2014,FC_2,0.04


### Changing the type of column 'value' into float and changing the value for the column 'indicator' 

In [13]:
# Convert the values to type 'float' and multiply with 100 (to show in percentage units)
OECD_df['value'] = OECD_df.value.astype(float).round(6)
OECD_df['value'] = OECD_df['value'] *100
OECD_df['indicator']= 'Percentage of women married between 15-19 years of age'  
OECD_df

Unnamed: 0,country,date,indicator,value
0,AU,2014,Percentage of women married between 15-19 year...,1.0
1,AT,2014,Percentage of women married between 15-19 year...,0.0
2,BE,2014,Percentage of women married between 15-19 year...,1.0
3,CA,2014,Percentage of women married between 15-19 year...,3.0
4,CZ,2014,Percentage of women married between 15-19 year...,0.0
5,DK,2014,Percentage of women married between 15-19 year...,0.0
6,FI,2014,Percentage of women married between 15-19 year...,1.0
7,FR,2014,Percentage of women married between 15-19 year...,1.0
8,DE,2014,Percentage of women married between 15-19 year...,0.0
9,GR,2014,Percentage of women married between 15-19 year...,4.0
