<a href="https://colab.research.google.com/github/rudycav/Web-Scraping-CoronaVirus-Cases/blob/master/Project/%20CoronaVirus_Cases.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [575]:
import requests
from bs4 import BeautifulSoup
from urllib.request import urlopen
from pandas import DataFrame
import pandas as pd
from datetime import datetime
import numpy as np
import re
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)


def webscrape(url = 'https://www.worldometers.info/coronavirus/'):
  header = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36'}
  link = requests.get(url, header)
  bs = BeautifulSoup(link.content,'lxml')
  title_numbers = bs.find_all(['h1','span'])
  numbers = bs.find_all(class_='maincounter-number')
  data_table = bs.find_all('table', class_='main_table_countries')
   
  data = []

  for table in data_table:
      headers = []
      rows = table.find_all('tr')
      for header in table.find('tr').find_all('th'):
          headers.append(header.text)
      for row in table.find_all('tr')[1:]:
          values = []
          for column in row.find_all(['th', 'td']):
              values.append(column.text)
          if values:
              dt = {headers[i]: values[i] for i in range(len(values))}
              data.append(dt)
              
  df = pd.DataFrame(data).rename(columns={"1stcase": "FirstCase", "Serious,Critical": "Critical"})
  return df


date = datetime.now()
date_time = date.strftime("%m/%d/%Y %H:%M:%S")

print('COVID-19 CORONAVIRUS PANDEMIC')
print('As of', date_time)
print()
print('Cases : ', title_numbers[5].text)
print('Deaths : ', title_numbers[7].text)
print('Recovered : ', title_numbers[9].text)

COVID-19 CORONAVIRUS PANDEMIC
As of 10/06/2022 04:02:50

Cases :  624,918,177 
Deaths :  6,555,411
Recovered :  604,737,533


In [578]:
df = webscrape()

def punctuation_removal(df):
    try:
        #removes N/A, commas, and + symbol, converts empty cells into 0s from the dataframe
        df = df.str.replace('N/A','').str.replace(',','').replace(r'^\s*$', np.nan, regex=True).replace(np.nan, 0).astype(float).astype(int)
    except:
        pass
    return df

df = df.apply(punctuation_removal)


In [580]:
#remove newline in Country column
df['Country,Other'] = df['Country,Other'].replace(r'\n',' ', regex=True) 

In [591]:
df.dtypes

#                       int64
Country,Other          object
TotalCases              int64
NewCases                int64
TotalDeaths             int64
NewDeaths               int64
TotalRecovered          int64
NewRecovered            int64
ActiveCases             int64
Critical                int64
Tot Cases/1M pop        int64
Deaths/1M pop           int64
TotalTests              int64
Tests/\n1M pop\n        int64
Population              int64
Continent              object
1 Caseevery X ppl       int64
1 Deathevery X ppl      int64
1 Testevery X ppl       int64
New Cases/1M pop        int64
New Deaths/1M pop       int64
Active Cases/1M pop     int64
dtype: object

In [582]:
df.head()

Unnamed: 0,#,"Country,Other",TotalCases,NewCases,TotalDeaths,NewDeaths,TotalRecovered,NewRecovered,ActiveCases,Critical,...,TotalTests,Tests/\n1M pop\n,Population,Continent,1 Caseevery X ppl,1 Deathevery X ppl,1 Testevery X ppl,New Cases/1M pop,New Deaths/1M pop,Active Cases/1M pop
0,0,North America,116665220,0,1542172,0,111708347,2623,3414701,7937,...,0,0,0,North America,0,0,0,0,0,0
1,0,Asia,190450390,28906,1481258,29,184659504,38468,4309628,10651,...,0,0,0,Asia,0,0,0,0,0,0
2,0,Europe,228582926,0,1922945,0,221716969,58547,4943012,9959,...,0,0,0,Europe,0,0,0,0,0,0
3,0,South America,64157375,0,1330347,0,62453298,862,373730,10214,...,0,0,0,South America,0,0,0,0,0,0
4,0,Oceania,12411438,1443,21034,15,12248217,0,142187,97,...,0,0,0,Australia/Oceania,0,0,0,0,0,0


In [583]:
df2 = df.iloc[8:-8]

In [584]:
df2

Unnamed: 0,#,"Country,Other",TotalCases,NewCases,TotalDeaths,NewDeaths,TotalRecovered,NewRecovered,ActiveCases,Critical,...,TotalTests,Tests/\n1M pop\n,Population,Continent,1 Caseevery X ppl,1 Deathevery X ppl,1 Testevery X ppl,New Cases/1M pop,New Deaths/1M pop,Active Cases/1M pop
8,1,USA,98411240,0,1086685,0,95432346,0,1892209,2733,...,1119730103,3344422,334805269,North America,3,308,0,0,0,5652
9,2,India,44604463,0,528745,0,44043436,0,32282,698,...,895958696,636953,1406631776,Asia,32,2660,2,0,0,23
10,3,France,35639699,0,155319,0,34676080,0,808300,869,...,271490188,4139547,65584518,Europe,2,422,0,0,0,12325
11,4,Brazil,34743598,0,686640,0,33898481,0,158477,8318,...,63776166,296146,215353593,South America,6,314,3,0,0,736
12,5,Germany,33652255,0,150289,0,32512500,39100,989466,1406,...,122332384,1458359,83883596,Europe,2,558,1,0,0,11796
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
725,226,Niue,80,0,0,0,80,0,0,0,...,0,0,1622,Australia/Oceania,20,0,0,0,0,0
726,227,Vatican City,29,0,0,0,29,0,0,0,...,0,0,799,Europe,28,0,0,0,0,0
727,228,Tuvalu,20,0,0,0,0,0,20,0,...,0,0,12066,Australia/Oceania,603,0,0,0,0,1658
728,229,Western Sahara,10,0,1,0,9,0,0,0,...,0,0,626161,Africa,62616,626161,0,0,0,0
