In [1]:
# libraries for Web Scrapping
import requests
from bs4 import BeautifulSoup

# library for advance string manipulation
import string

# library for data manipulation
import pandas as pd

# library for advance mathematical operations
import numpy as np

# Scrape Table

In [2]:
# importing the datetime library to obtain the current date and time
from datetime import datetime

# function that returns the date and the time
def get_time_of_parsing():
    # datetime object containing current date and time
    now = datetime.now()
    
    # mm-dd-YY H-M-S
    date_string = now.strftime("%b-%d-%Y %H-%M-%S")
    return(date_string)

In [3]:
# running the get_time_of_parsing function to get the date and time at the time of accessing the webpage details
date_string = get_time_of_parsing()

page = requests.get("https://www.worldometers.info/coronavirus/")

# parsing the page using html parser. There are other parsers like lxml
soup = BeautifulSoup(page.content, 'html.parser')

table_wrapper = soup.find(class_='main_table_countries_div')
table = table_wrapper.find_all('table', class_="table table-bordered table-hover main_table_countries")

In [4]:
stat_table = table[0]

In [5]:
# creating an empty list to contain columns
columns = []

# the column names are persent inside the 'th' tag
for cols in stat_table.find_all('th'):
    # cleaning the text of the column names by replacing the unwanted characters by ''
    columns.append(cols.text.replace('\n', '').replace('\xa0', ''))

# cleaning the name of the first column 'Country, others'. Considering only the word 'Country' as the column name
columns[0] = columns[0].split(',')[0]
columns

['Country',
 'TotalCases',
 'NewCases',
 'TotalDeaths',
 'NewDeaths',
 'TotalRecovered',
 'ActiveCases',
 'Serious,Critical',
 'TotCases/1M pop',
 'Deaths/1M pop',
 'TotalTests',
 'Tests/1M pop',
 'Continent']

# Writing the data to a text file 'covid.txt'

In [6]:
# opening the text file in writing mode
with open('covid.txt', 'w') as r:
    # iterating the list items for 'tr' tag. We are taking the rows from 9 because the rows from 0 to 8 are unnecessary
    for row in stat_table.find_all('tr')[9:]:
        # iterate the cell value of each row. The cell value is present inside the 'td' tag
        for cell in row.find_all('td'):
            # write the cell to the text file
            r.write(cell.text)
            # leave a tab after each cell
            r.write('\t')
        # go to the newline after writing a row
        r.write('\n')

# Read covid.txt as a dataframe

In [7]:
# reading the covid.txt file as a dataframe
# header = None: will not consider the first row as the columns
# sep = '\t': parse the cells separated by a tab
# names=columns: assigns the column name
# # index_col=False: will not consider the first column as the row index
covid = pd.read_csv('covid.txt', 
                    encoding='latin-1', 
                    header=None, 
                    sep='\t', 
                    names=columns, 
                    index_col=False
                   )

In [8]:
# accesing the row 221 and saving that on a variable world_report. This contains the details of total cases in the world
world_report = covid.iloc[221]
world_report

Country                Total:
TotalCases          3,523,355
NewCases              +41,984
TotalDeaths           246,394
NewDeaths              +1,731
TotalRecovered      1,141,898
ActiveCases         2,135,063
Serious,Critical       50,524
TotCases/1M pop         452.0
Deaths/1M pop            31.6
TotalTests                NaN
Tests/1M pop              NaN
Continent                 All
Name: 221, dtype: object

In [9]:
# creating a separate dataset that contains the details of total corona cases for each continent
covid_cases_per_continent = covid[214:220].iloc[:, [1,2,3,4,5,6,7,12]]
covid_cases_per_continent

Unnamed: 0,TotalCases,NewCases,TotalDeaths,NewDeaths,TotalRecovered,ActiveCases,"Serious,Critical",Continent
214,1269234,10632,74427,647,216273,978534,17725,North America
215,1441999,17613,140078,668,540145,761776,17406,Europe
216,556103,10293,19532,193,292407,244164,5366,Asia
217,202643,2893,10456,213,70270,121917,9862,South America
218,8390,22,115,2,7165,1110,31,Australia/Oceania
219,44265,531,1771,8,14993,27501,130,Africa


In [10]:
# replacing the null values with 0
covid.replace(np.nan, 0, inplace = True)
covid

Unnamed: 0,Country,TotalCases,NewCases,TotalDeaths,NewDeaths,TotalRecovered,ActiveCases,"Serious,Critical",TotCases/1M pop,Deaths/1M pop,TotalTests,Tests/1M pop,Continent
0,USA,1168985,+8211,67954,+510,174017,927014,16478,3532,205,6976379,21077,North America
1,Spain,247122,+1533,25264,+164,148558,73300,2386,5285,540,1528833,32699,Europe
2,Italy,210717,+1389,28884,+174,81654,100179,1501,3485,478,2153772,35622,Europe
3,UK,182260,0,28131,0,0,153785,1559,2685,414,1129907,16644,Europe
4,France,168396,0,24760,0,50562,93074,3827,2580,379,1100228,16856,Europe
...,...,...,...,...,...,...,...,...,...,...,...,...,...
217,Total:,202643,+2893,10456,+213,70270,121917,9862,0,0,0,0,South America
218,Total:,8390,+22,115,+2,7165,1110,31,0,0,0,0,Australia/Oceania
219,Total:,44265,+531,1771,+8,14993,27501,130,0,0,0,0,Africa
220,Total:,721,0,15,0,645,61,4,0,0,0,0,0


In [11]:
# ignoring the last rows that contains the details of total corona cases for each continent
# this will contain the details of total corona cases for each country 
covid = covid[0:214]
covid

Unnamed: 0,Country,TotalCases,NewCases,TotalDeaths,NewDeaths,TotalRecovered,ActiveCases,"Serious,Critical",TotCases/1M pop,Deaths/1M pop,TotalTests,Tests/1M pop,Continent
0,USA,1168985,+8211,67954,+510,174017,927014,16478,3532,205,6976379,21077,North America
1,Spain,247122,+1533,25264,+164,148558,73300,2386,5285,540,1528833,32699,Europe
2,Italy,210717,+1389,28884,+174,81654,100179,1501,3485,478,2153772,35622,Europe
3,UK,182260,0,28131,0,0,153785,1559,2685,414,1129907,16644,Europe
4,France,168396,0,24760,0,50562,93074,3827,2580,379,1100228,16856,Europe
...,...,...,...,...,...,...,...,...,...,...,...,...,...
209,Western Sahara,6,0,,0,5,1,0,10,0,0,0,Africa
210,Anguilla,3,0,,0,3,0,0,200,0,0,0,North America
211,Comoros,3,0,,0,0,3,0,3,0,0,0,Africa
212,Saint Pierre Miquelon,1,0,,0,0,1,0,173,0,0,0,North America


In [12]:
# replacing the dataframe indices with the Country name
covid.index = covid.Country

# deleting the column 'Country'
del covid['Country']

In [13]:
covid

Unnamed: 0_level_0,TotalCases,NewCases,TotalDeaths,NewDeaths,TotalRecovered,ActiveCases,"Serious,Critical",TotCases/1M pop,Deaths/1M pop,TotalTests,Tests/1M pop,Continent
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
USA,1168985,+8211,67954,+510,174017,927014,16478,3532,205,6976379,21077,North America
Spain,247122,+1533,25264,+164,148558,73300,2386,5285,540,1528833,32699,Europe
Italy,210717,+1389,28884,+174,81654,100179,1501,3485,478,2153772,35622,Europe
UK,182260,0,28131,0,0,153785,1559,2685,414,1129907,16644,Europe
France,168396,0,24760,0,50562,93074,3827,2580,379,1100228,16856,Europe
...,...,...,...,...,...,...,...,...,...,...,...,...
Western Sahara,6,0,,0,5,1,0,10,0,0,0,Africa
Anguilla,3,0,,0,3,0,0,200,0,0,0,North America
Comoros,3,0,,0,0,3,0,3,0,0,0,Africa
Saint Pierre Miquelon,1,0,,0,0,1,0,173,0,0,0,North America


# Corona Live Update Email Alerts

In [14]:
# list of the countries for which we want the details to be sent as an email alert
countries = ['India', 'USA', 'Spain', 'Italy', 'China']


html = """\
<html>
<head>

</head>
<body>

<h2>Corona Live Cases Update</h2>
<h3>Updated Date and Time: {}</h3>

<table>
  <tr>
    <th>World Report</th>
    <th>Counts</th>
  </tr>
  <tr>
    <td>Total Cases</td>
    <td>{}</td>
  </tr>
  <tr>
    <td>New Cases</td>
    <td>{}</td>
  </tr>
  <tr>
    <td>Total Deaths</td>
    <td>{}</td>
  </tr>
  <tr>
    <td>New Deaths</td>
    <td>{}</td>
  </tr>
  <tr>
    <td>Total Recovered</td>
    <td>{}</td>
  </tr>
  <tr>
    <td>Active Cases</td>
    <td>{}</td>
  </tr>
</table>

<br><br>

<table>
  <tr>
    <th>India</th>
    <th>Cases</th>
  </tr>
  <tr>
    <td>Total Cases</td>
    <td>{}</td>
  </tr>
  <tr>
    <td>New Cases</td>
    <td>{}</td>
  </tr>
  <tr>
    <td>Total Deaths</td>
    <td>{}</td>
  </tr>
  <tr>
    <td>New Deaths</td>
    <td>{}</td>
  </tr>
  <tr>
    <td>Total Recovered</td>
    <td>{}</td>
  </tr>
  <tr>
    <td>Active Cases</td>
    <td>{}</td>
  </tr>
</table>

<br><br>

<table>
  <tr>
    <th>USA</th>
    <th>Cases</th>
  </tr>
  <tr>
    <td>Total Cases</td>
    <td>{}</td>
  </tr>
  <tr>
    <td>New Cases</td>
    <td>{}</td>
  </tr>
  <tr>
    <td>Total Deaths</td>
    <td>{}</td>
  </tr>
  <tr>
    <td>New Deaths</td>
    <td>{}</td>
  </tr>
  <tr>
    <td>Total Recovered</td>
    <td>{}</td>
  </tr>
  <tr>
    <td>Active Cases</td>
    <td>{}</td>
  </tr>
</table>

<br><br>

<table>
  <tr>
    <th>Spain</th>
    <th>Cases</th>
  </tr>
  <tr>
    <td>Total Cases</td>
    <td>{}</td>
  </tr>
  <tr>
    <td>New Cases</td>
    <td>{}</td>
  </tr>
  <tr>
    <td>Total Deaths</td>
    <td>{}</td>
  </tr>
  <tr>
    <td>New Deaths</td>
    <td>{}</td>
  </tr>
  <tr>
    <td>Total Recovered</td>
    <td>{}</td>
  </tr>
  <tr>
    <td>Active Cases</td>
    <td>{}</td>
  </tr>
</table>

<br><br>

<table>
  <tr>
    <th>Italy</th>
    <th>Cases</th>
  </tr>
  <tr>
    <td>Total Cases</td>
    <td>{}</td>
  </tr>
  <tr>
    <td>New Cases</td>
    <td>{}</td>
  </tr>
  <tr>
    <td>Total Deaths</td>
    <td>{}</td>
  </tr>
  <tr>
    <td>New Deaths</td>
    <td>{}</td>
  </tr>
  <tr>
    <td>Total Recovered</td>
    <td>{}</td>
  </tr>
  <tr>
    <td>Active Cases</td>
    <td>{}</td>
  </tr>
</table>

<br><br>

<table>
  <tr>
    <th>China</th>
    <th>Cases</th>
  </tr>
  <tr>
    <td>Total Cases</td>
    <td>{}</td>
  </tr>
  <tr>
    <td>New Cases</td>
    <td>{}</td>
  </tr>
  <tr>
    <td>Total Deaths</td>
    <td>{}</td>
  </tr>
  <tr>
    <td>New Deaths</td>
    <td>{}</td>
  </tr>
  <tr>
    <td>Total Recovered</td>
    <td>{}</td>
  </tr>
  <tr>
    <td>Active Cases</td>
    <td>{}</td>
  </tr>
</table>

</body>
</html>
""".format(date_string, world_report[1], world_report[2], world_report[3], world_report[4], world_report[5], world_report[6],
          covid.loc[countries[0]][0], covid.loc[countries[0]][1], covid.loc[countries[0]][2], covid.loc[countries[0]][3],
          covid.loc[countries[0]][4], covid.loc[countries[0]][5],
          covid.loc[countries[1]][0], covid.loc[countries[1]][1], covid.loc[countries[1]][2], covid.loc[countries[1]][3],
          covid.loc[countries[1]][4], covid.loc[countries[1]][5],
          covid.loc[countries[2]][0], covid.loc[countries[2]][1], covid.loc[countries[2]][2], covid.loc[countries[2]][3],
          covid.loc[countries[2]][4], covid.loc[countries[2]][5],
          covid.loc[countries[3]][0], covid.loc[countries[3]][1], covid.loc[countries[3]][2], covid.loc[countries[3]][3],
          covid.loc[countries[3]][4], covid.loc[countries[3]][5],
          covid.loc[countries[4]][0], covid.loc[countries[4]][1], covid.loc[countries[4]][2], covid.loc[countries[4]][3],
          covid.loc[countries[4]][4], covid.loc[countries[4]][5])

# Send Email

In [15]:
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText

# senders address
fromaddr = "Senders Email Address"

# receivers address
toaddr = "Receivers Email Address"

# Create a multipart message and set headers
msg = MIMEMultipart()

# senders address
msg['From'] = fromaddr 
# receivers address
msg['To'] = toaddr
# subject of the email
msg['Subject'] = "Covid 2020 Updates"

part1 = MIMEText(html, 'html')
msg.attach(part1)

s = smtplib.SMTP('smtp.gmail.com', 587)
s.ehlo()
s.starttls()
s.login(fromaddr, "Senders Password")

# convert message to string
text = msg.as_string()

s.sendmail(fromaddr, toaddr, text)
s.quit()

(221, b'2.0.0 closing connection t21sm3149381pgu.39 - gsmtp')