In [634]:
#Importing the required libraries.
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import json

# DATA SCRAPING

In [679]:
#Scraping World Data
def world_data(url):
    res = requests.get(url)
    html_c = res.content
    soup =BeautifulSoup(html_c,'html.parser')
    data = soup.find_all(class_='maincounter-number')
    for i in range(len(data)):
        if(i==0):
            total_cw = data[i].span.string                  #To get the Total Coronavirus Cases Count Worldwide
            total_cw = int(total_cw.replace(',', ''))
        elif(i==1):
            total_dw = data[i].span.string                  #To get the Total Coronavirus Deaths Count Worldwide
            total_dw = int(total_dw.replace(',', ''))      
        else:
            total_rw = data[i].span.string                  #To get the Total Coronavirus Recovered Cases Count Worldwide
            total_rw = int(total_rw.replace(',', ''))
        
#To get countrywise updates:
#Empty list to store data.
    countries = []
    total = []
    active = []
    totalr = []
    totald = []
    serious = []
    tcm = []
    data = soup.find("table", id = "main_table_countries_today")
    table_data = data.tbody.find_all("tr") 
    for i in range(8, len(table_data)):
        c = table_data[i].find_all("td")[1].string.strip()  #Country List
        countries.append(c)
        num = table_data[i].find_all("td")[2].string        #Total Cases Countrywise
        total.append(num)
        num = table_data[i].find_all("td")[8].string        #Active Cases Countrywise
        active.append(num)
        num = table_data[i].find_all("td")[6].string        #Total Recovered Countrywise
        totalr.append(num)
        num = table_data[i].find_all("td")[4].string        #Total Deaths Worldwise
        totald.append(num)
        num = table_data[i].find_all("td")[9].string        #Serious/Critical Cases Countrywise
        serious.append(num)
        num = table_data[i].find_all("td")[10].string        #Total Cases Per Million
        tcm.append(num)
    
    #Converting lists to Dataframe
    df = pd.DataFrame(list(zip(countries, total, active, totalr, totald, serious, tcm)), columns =['Countries', 'Total Cases','Active Cases', 'Total Recovered','Total Deceased','Serious','Cases Per Million']) 
    for i in range(len(df['Active Cases'])):
        df['Active Cases'][i] = df['Active Cases'][i].replace(",","")          #Remove ',' to change string to Int.
        df['Total Cases'][i] = df['Total Cases'][i].replace(",","")            #Skipping 'N/A' values
        df['Total Deceased'][i] = df['Total Deceased'][i].replace(",","")
        try:
            df['Total Recovered'][i] = df["Total Recovered"][i].replace(",","")
        except:
            pass
        try:
            df['Serious'][i] = df["Serious"][i].replace(",","")
        except:
            pass
        try:
            df['Cases Per Million'][i] = df["Cases Per Million"][i].replace(",","")
        except:
            pass

    cols = ['Total Cases','Active Cases', 'Total Recovered','Total Deceased','Serious','Cases Per Million']
    df[cols] = df[cols].apply(pd.to_numeric, errors='coerce', axis=1)
    df = df.sort_values(by = ['Total Cases'], ascending=False)              #Sorting the Dataframe on the basis of Total Cases
    df.index = np.arange(1,len(df)+1)                                       #Resetting the index from 1.
    df.fillna(0, inplace = True)                                            #Filling N/A values by 0 for visualisation
    
    #Return Dataframe
    return df




In [680]:
#Scraping Indian Data
def india_data(url):
    res = requests.get(url)
    html_c = res.content
    soup =BeautifulSoup(html_c,'lxml')
    data = soup.find("table", class_ = "table table-striped")
    table_data = data.tbody.find_all("tr") 
    states = []
    totalindia = []
    activeindia = []
    totalri = []
    totaldi = []
    for i in range(0,37):
        s = table_data[i].find_all("td")[1].text.strip()     #State List
        states.append(s)
        num = table_data[i].find_all("td")[2].text.strip()   #Active Cases in India
        num = num.replace('\n',"")
        activeindia.append(num)
        num = table_data[i].find_all("td")[3].text.strip()   #Total Recovered Cases in India
        num = num.replace('\n',"")
        totalri.append(num)
        num = table_data[i].find_all("td")[4].text.strip()   #Total Deceased in India
        num = num.replace('\n',"")
        totaldi.append(num)
        num = table_data[i].find_all("td")[5].text.strip()   #Total Cases in India
        num = int(num.replace('\n',""))
        totalindia.append(num)

    #Data Cleaning   
    data_india = pd.DataFrame(list(zip(states, totalindia, activeindia, totalri, totaldi)), columns =['States', 'Total Cases','Active Cases', 'Total Recovered','Total Deceased'])
    data_india = data_india.sort_values(by = ['Total Cases'], ascending=False)       #Sorting the Dataframe on the basis of Total Cases
    cols = ['Total Cases','Active Cases', 'Total Recovered','Total Deceased']
    data_india[cols] = data_india[cols].apply(pd.to_numeric, errors='coerce', axis=1, downcast='integer')  
    data_india.index = np.arange(1,len(data_india)+1)                                #Resetting the index from 1.
    data_india.fillna(0, inplace = True)                                             #Filling N/A values by 0 for visualisation
    
    return data_india


In [681]:
#Time Series Indian Data using API
def timeseriesindia(url):
    res = requests.get(url)
    data = res.json()                                       #Extracting the Data
    timeseries = []
    totalconfirmed = []
    dailyconfirmed = []
    dailydeceased = []
    dailyrecovered = []
    totalrecovered = []
    totaldeceased = []
    #To get daily confirmed Cases in India
    for i in data['cases_time_series']:
        timeseries.append(i['date'])
        dailyconfirmed.append(i['dailyconfirmed'])
        dailyrecovered.append(i['dailyrecovered'])
        dailydeceased.append(i['dailydeceased'])
        totalconfirmed.append(i['totalconfirmed'])
        totalrecovered.append(i['totalrecovered'])
        totaldeceased.append(i['totaldeceased'])
    
    
    #Converting lists to Dataframe
    datatimeseries = pd.DataFrame(list(zip(timeseries, dailyconfirmed, dailyrecovered, dailydeceased, totalconfirmed, totalrecovered, totaldeceased)), 
                              columns =['Date', 'Daily Cases','Daily Recovered', 'Daily Deceased','Total Confirmed','Total Recovered','Total Deceased'])    
    
    return datatimeseries
    

In [682]:
dw = world_data("https://www.worldometers.info/coronavirus/")
di = india_data("https://www.mohfw.gov.in/")
dts = timeseriesindia('https://api.covid19india.org/data.json')

In [683]:
dw

Unnamed: 0,Countries,Total Cases,Active Cases,Total Recovered,Total Deceased,Serious,Cases Per Million
1,USA,3413995.0,1759129.0,1517084.0,137782.0,15822.0,10312.0
2,Brazil,1866176.0,580513.0,1213512.0,72151.0,8318.0,8778.0
3,India,879466.0,301850.0,554429.0,23187.0,8944.0,637.0
4,Russia,727162.0,214766.0,501061.0,11335.0,2300.0,4983.0
5,Peru,326326.0,97345.0,217111.0,11870.0,1316.0,9893.0
6,Chile,315041.0,24160.0,283902.0,6979.0,1995.0,16476.0
7,Spain,300988.0,0.0,0.0,28403.0,617.0,6438.0
8,Mexico,299750.0,79980.0,184764.0,35006.0,378.0,2324.0
9,UK,289603.0,0.0,0.0,44819.0,185.0,4265.0
10,South Africa,276242.0,137289.0,134874.0,4079.0,539.0,4656.0


In [684]:
di

Unnamed: 0,States,Total Cases,Active Cases,Total Recovered,Total Deceased
1,Total#,878254.0,301609.0,553471.0,23174.0
2,Maharashtra,254427.0,103813.0,140325.0,10289.0
3,Tamil Nadu,138470.0,46972.0,89532.0,1966.0
4,Delhi,112494.0,19155.0,89968.0,3371.0
5,Gujarat,41820.0,10613.0,29162.0,2045.0
6,Karnataka,38843.0,22750.0,15409.0,684.0
7,Uttar Pradesh,36476.0,12208.0,23334.0,934.0
8,Telangana,34671.0,11833.0,22482.0,356.0
9,West Bengal,30013.0,10500.0,18581.0,932.0
10,Andhra Pradesh,29168.0,13428.0,15412.0,328.0


In [685]:
dts

Unnamed: 0,Date,Daily Cases,Daily Recovered,Daily Deceased,Total Confirmed,Total Recovered,Total Deceased
0,30 January,1,0,0,1,0,0
1,31 January,0,0,0,1,0,0
2,01 February,0,0,0,1,0,0
3,02 February,1,0,0,2,0,0
4,03 February,1,0,0,3,0,0
5,04 February,0,0,0,3,0,0
6,05 February,0,0,0,3,0,0
7,06 February,0,0,0,3,0,0
8,07 February,0,0,0,3,0,0
9,08 February,0,0,0,3,0,0


# DATA VISUALISATION