# Import Labraries

In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

# Extraction

In [2]:
url = 'https://en.wikipedia.org/wiki/Economy_of_Nigeria'

In [3]:
response = requests.get(url)

In [4]:
response

<Response [200]>

In [5]:
soup_obj = BeautifulSoup(response.text, 'html.parser')

In [6]:
tables = soup_obj.find_all('table')
len(tables)

17

In [7]:
table = tables[1]

In [8]:
table.find_all('th')

[<th>Year
 </th>,
 <th>GDP
 <p><small>(in Bil. US$PPP)</small>
 </p>
 </th>,
 <th>GDP per capita
 <p><small>(in US$ PPP)</small>
 </p>
 </th>,
 <th>GDP
 <p><small>(in Bil. US$nominal)</small>
 </p>
 </th>,
 <th>GDP per capita
 <p><small>(in US$ nominal)</small>
 </p>
 </th>,
 <th>GDP growth
 <p><small>(real)</small>
 </p>
 </th>,
 <th>Inflation rate
 <p><small>(in Percent)</small>
 </p>
 </th>,
 <th>Unemployment
 <p><small>(in Percent)</small>
 </p>
 </th>,
 <th>Government debt
 <p><small>(inÂ % of GDP)</small>
 </p>
 </th>]

In [9]:
column_headers = table.find_all('th')
headers = []

for colname in column_headers:
    headers.append(colname.text.strip().replace('\n', '', ).replace('\xa0', ' '))
    
headers

['Year',
 'GDP(in Bil. US$PPP)',
 'GDP per capita(in US$ PPP)',
 'GDP(in Bil. US$nominal)',
 'GDP per capita(in US$ nominal)',
 'GDP growth(real)',
 'Inflation rate(in Percent)',
 'Unemployment(in Percent)',
 'Government debt(in % of GDP)']

In [10]:
df = pd.DataFrame(columns = headers)
df

Unnamed: 0,Year,GDP(in Bil. US$PPP),GDP per capita(in US$ PPP),GDP(in Bil. US$nominal),GDP per capita(in US$ nominal),GDP growth(real),Inflation rate(in Percent),Unemployment(in Percent),Government debt(in % of GDP)


In [11]:
rows = table.find_all('tr')[1:]
for row in rows:
    data = row.find_all('td')
    single_row_data = [i.text.strip() for i in data]
    
    length = len(df)
    df.loc[length] = single_row_data

In [12]:
df.head()

Unnamed: 0,Year,GDP(in Bil. US$PPP),GDP per capita(in US$ PPP),GDP(in Bil. US$nominal),GDP per capita(in US$ nominal),GDP growth(real),Inflation rate(in Percent),Unemployment(in Percent),Government debt(in % of GDP)
0,1990,174.0,1827.1,62.2,652.9,,,,71.7%
1,1991,178.9,1831.2,60.1,615.7,-0.6%,,4.1%,75.0%
2,1992,186.9,1866.4,52.3,521.9,2.2%,,4.1%,70.2%
3,1993,194.4,1892.6,56.8,553.1,1.6%,,4.1%,71.0%
4,1994,199.0,1890.3,80.1,761.0,0.3%,,4.1%,55.9%


# Transformation

In [13]:
df.dtypes

Year                              object
GDP(in Bil. US$PPP)               object
GDP per capita(in US$ PPP)        object
GDP(in Bil. US$nominal)           object
GDP per capita(in US$ nominal)    object
GDP growth(real)                  object
Inflation rate(in Percent)        object
Unemployment(in Percent)          object
Government debt(in % of GDP)      object
dtype: object

In [15]:
#removing '%'
df['GDP growth(real)'] = df['GDP growth(real)'].str.replace('%', '')
df['Unemployment(in Percent)'] = df['Unemployment(in Percent)'].str.replace('%', '')
df['Government debt(in % of GDP)'] = df['Government debt(in % of GDP)'].str.replace('%', '')
df['GDP(in Bil. US$PPP)'] = df['GDP(in Bil. US$PPP)'].str.replace(',', '')
df['GDP per capita(in US$ PPP)'] = df['GDP per capita(in US$ PPP)'].str.replace(',', '')
df['GDP(in Bil. US$nominal)'] = df['GDP(in Bil. US$nominal)'].str.replace(',', '')
df['GDP per capita(in US$ nominal)'] = df['GDP per capita(in US$ nominal)'].str.replace(',', '')



In [16]:
df.head()

Unnamed: 0,Year,GDP(in Bil. US$PPP),GDP per capita(in US$ PPP),GDP(in Bil. US$nominal),GDP per capita(in US$ nominal),GDP growth(real),Inflation rate(in Percent),Unemployment(in Percent),Government debt(in % of GDP)
0,1990,174.0,1827.1,62.2,652.9,,,,71.7
1,1991,178.9,1831.2,60.1,615.7,-0.6,,4.1,75.0
2,1992,186.9,1866.4,52.3,521.9,2.2,,4.1,70.2
3,1993,194.4,1892.6,56.8,553.1,1.6,,4.1,71.0
4,1994,199.0,1890.3,80.1,761.0,0.3,,4.1,55.9


In [18]:
df['Year'] = df['Year'].astype('int')
df['GDP(in Bil. US$PPP)'] = df['GDP(in Bil. US$PPP)'].astype('float')
df['GDP per capita(in US$ PPP)'] = df['GDP per capita(in US$ PPP)'].astype('float')
df['GDP(in Bil. US$nominal)'] = df['GDP(in Bil. US$nominal)'].astype('float')
df['GDP per capita(in US$ nominal)'] = df['GDP per capita(in US$ nominal)'].astype('float')
df['GDP growth(real)'] = df['GDP growth(real)'].astype('string')
df['Inflation rate(in Percent)'] = df['Inflation rate(in Percent)'].astype('string')
df['Unemployment(in Percent)'] = df['Unemployment(in Percent)'].astype('string')
df['Government debt(in % of GDP)'] = df['Government debt(in % of GDP)'].astype('float')

In [19]:
df.dtypes

Year                                int32
GDP(in Bil. US$PPP)               float64
GDP per capita(in US$ PPP)        float64
GDP(in Bil. US$nominal)           float64
GDP per capita(in US$ nominal)    float64
GDP growth(real)                   string
Inflation rate(in Percent)         string
Unemployment(in Percent)           string
Government debt(in % of GDP)      float64
dtype: object

# Load Process

In [21]:
df.to_csv('Webscrapped_Nigeria_Economy.csv', index = False)