# Web Scraping
Scraping data from https://covid19.ncdc.gov.ng to show COVID19 report in Nigeria - States Affected, No. of Cases (Lab Confirmed), No. of Cases (on admission), No. Discharged and No. of Deaths

In [1]:
#import libraries
import numpy as np
import pandas as pd
import csv
from bs4 import BeautifulSoup
import requests

In [2]:
#define class and functions
class WebScraper:
    results = []

    def fetch(self, url):
        return requests.get(url)
    
    def parse(self, html):
        content = BeautifulSoup(html, 'html.parser')
        covid_data = content.find('table')
        rows = covid_data.findAll('tr')
        self.results.append([header.text for header in rows[0].findAll('th')])
        
        for row in rows:
            if len(row.findAll('td')):
                self.results.append([data.text for data in row.findAll('td')])
    
    def to_csv(self):
        with open('covid19_data.csv', 'w') as csv_file:
            writer = csv.writer(csv_file)
            writer.writerows(self.results)
    
    def run(self):
        response = self.fetch('https://covid19.ncdc.gov.ng/')
        self.parse(response.text)
        self.to_csv()

if __name__ == '__main__':
    scraper = WebScraper()
    scraper.run()

In [3]:
df = pd.read_csv('covid19_data.csv')

In [4]:
df

Unnamed: 0,States Affected,No. of Cases (Lab Confirmed),No. of Cases (on admission),No. Discharged,No. of Deaths
0,\r\nLagos\r\n,"9,482\r\n","7,881\r\n","1,475\r\n",126
1,\r\nFCT\r\n,"1,656\r\n","1,110\r\n",514\r\n,32
2,\r\nKano\r\n,"1,191\r\n",347\r\n,793\r\n,51
3,\r\nOyo\r\n,"1,188\r\n",842\r\n,337\r\n,9
4,\r\nRivers\r\n,936\r\n,427\r\n,473\r\n,36
5,\r\nEdo\r\n,851\r\n,562\r\n,257\r\n,32
6,\r\nOgun\r\n,733\r\n,263\r\n,453\r\n,17
7,\r\nDelta\r\n,715\r\n,534\r\n,159\r\n,22
8,\r\nKaduna\r\n,668\r\n,346\r\n,312\r\n,10
9,\r\nBauchi\r\n,493\r\n,103\r\n,378\r\n,12


## Data Cleaning 

In [5]:
df_cleaned = df.replace(to_replace ="\r\n", value ="", regex=True) 
df_cleaned

Unnamed: 0,States Affected,No. of Cases (Lab Confirmed),No. of Cases (on admission),No. Discharged,No. of Deaths
0,Lagos,9482,7881,1475,126
1,FCT,1656,1110,514,32
2,Kano,1191,347,793,51
3,Oyo,1188,842,337,9
4,Rivers,936,427,473,36
5,Edo,851,562,257,32
6,Ogun,733,263,453,17
7,Delta,715,534,159,22
8,Kaduna,668,346,312,10
9,Bauchi,493,103,378,12


In [6]:
df_cleaned.columns

Index(['States Affected', 'No. of Cases (Lab Confirmed)',
       'No. of Cases (on admission)', 'No. Discharged', 'No. of Deaths'],
      dtype='object')

In [7]:
num_of_states = len(list(df_cleaned['States Affected']))
print(f'There are {num_of_states} states')

There are 36 states


In [8]:
df_cleaned.to_csv('covid19_data_cleaned.csv', index=False, encoding='utf-8')