In [1]:
# Required packages
import requests 
from bs4 import BeautifulSoup 
import pandas as pd
import re

# Url for the global covid19 data
URL = "https://www.worldometers.info/coronavirus/"
r = requests.get(URL)

# Getting the html source code from the above request "r"
soup = BeautifulSoup(r.content, 'html5lib')
# Retriving the table element with attirbute 'id' = 'main_table_countries_today'
table = soup.find('table', attrs = {'id': 'main_table_countries_today'})


In [2]:
heads = []
# Getting all the table header elements and listing them in to a list.
for th in table.findAll('th'):
    heads.append(th.text)

In [3]:
# Heading of the table
heads

['Country,Other',
 'TotalCases',
 'NewCases',
 'TotalDeaths',
 'NewDeaths',
 'TotalRecovered',
 'ActiveCases',
 'Serious,Critical',
 'Tot\xa0Cases/1M pop',
 'Deaths/1M pop',
 'TotalTests',
 'Tests/\n1M pop\n ',
 'Continent']

In [4]:
# Creating empty data frame with heads as column names
# df = pd.DataFrame(columns = heads)

In [5]:
# Creating empty dictionary with heads as keys

data = {key:[] for key in heads}
data

{'Country,Other': [],
 'TotalCases': [],
 'NewCases': [],
 'TotalDeaths': [],
 'NewDeaths': [],
 'TotalRecovered': [],
 'ActiveCases': [],
 'Serious,Critical': [],
 'Tot\xa0Cases/1M pop': [],
 'Deaths/1M pop': [],
 'TotalTests': [],
 'Tests/\n1M pop\n ': [],
 'Continent': []}

In [6]:
# Getting tbody elments with its childs
for tbody in table.findAll('tbody'):
    # Getting all tr elements from tbody element.
    for tr in tbody.findAll('tr', style=''):
        # Getting text content from tr>td elements and splitting it by new line (\n) character
        row = tr.text.split('\n')
        # Removing the spaces present in the first and last positions of the list
        row = row[1:-1]
        
        # Storing the above row values into the data dictionary 
        for k in range(len(heads)):
            if k in range(1,12):
                # Using Regex to retrive only numbers. 
                data[heads[k]].append(''.join(re.split('[^0-9]*',  row[k])))
            else:
                data[heads[k]].append(row[k])

In [7]:
# Creating a dataframe with above created data dictionary
df = pd.DataFrame(data)
df.head()

Unnamed: 0,"Country,Other",TotalCases,NewCases,TotalDeaths,NewDeaths,TotalRecovered,ActiveCases,"Serious,Critical",Tot Cases/1M pop,Deaths/1M pop,TotalTests,Tests/\n1M pop\n,Continent
0,World,2408352,1777.0,165107,76.0,629101,1614144,54215,309,212,,,All
1,USA,764265,,40565,,71012,652688,13566,2309,123,3861596.0,11666.0,North America
2,Spain,198674,,20453,,77357,100864,7371,4249,437,930230.0,19896.0,Europe
3,Italy,178972,,23660,,47055,108257,2635,2960,391,1356541.0,22436.0,Europe
4,France,152894,,19718,,36578,96598,5744,2342,302,463662.0,7103.0,Europe


In [8]:
# Creating a csv file with the above dataset for further analysis
df.to_csv('./global.csv', index=False)