In [634]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import json

# DATA SCRAPING

In [None]:
#Scraping World Data
def world_data(url):
    res = requests.get(url)
    html_c = res.content
    soup =BeautifulSoup(html_c,'html.parser')
    data = soup.find_all(class_='maincounter-number')
    for i in range(len(data)):
        if(i==0):
            total_cw = data[i].span.string
            total_cw = int(total_cw.replace(',', ''))
        elif(i==1):
            total_dw = data[i].span.string
            total_dw = int(total_dw.replace(',', ''))
        else:
            total_rw = data[i].span.string
            total_rw = int(total_rw.replace(',', ''))
        
#To get countrywise updates:
    countries = []
    total = []
    active = []
    totalr = []
    totald = []
    serious = []
    tcm = []
    data = soup.find("table", id = "main_table_countries_today")
    table_data = data.tbody.find_all("tr") 
    for i in range(8, len(table_data)):
        c = table_data[i].find_all("td")[1].string.strip()
        countries.append(c)
        num = table_data[i].find_all("td")[2].string
        total.append(num)
        num = table_data[i].find_all("td")[8].string
        active.append(num)
        num = table_data[i].find_all("td")[6].string
        totalr.append(num)
        num = table_data[i].find_all("td")[4].string
        totald.append(num)
        num = table_data[i].find_all("td")[9].string        #Serious/Critical
        serious.append(num)
        num = table_data[i].find_all("td")[10].string        #Total Cases Per Million
        tcm.append(num)
    
    df = pd.DataFrame(list(zip(countries, total, active, totalr, totald, serious, tcm)), columns =['Countries', 'Total Cases','Active Cases', 'Total Recovered','Total Deceased','Serious','Cases Per Million']) 
    for i in range(len(df['Active Cases'])):
        df['Active Cases'][i] = df['Active Cases'][i].replace(",","")
        df['Total Cases'][i] = df['Total Cases'][i].replace(",","")
        df['Total Deceased'][i] = df['Total Deceased'][i].replace(",","")
        try:
            df['Total Recovered'][i] = df["Total Recovered"][i].replace(",","")
        except:
            pass
        try:
            df['Serious'][i] = df["Serious"][i].replace(",","")
        except:
            pass
        try:
            df['Cases Per Million'][i] = df["Cases Per Million"][i].replace(",","")
        except:
            pass

    cols = ['Total Cases','Active Cases', 'Total Recovered','Total Deceased','Serious','Cases Per Million']
    df[cols] = df[cols].apply(pd.to_numeric, errors='coerce', axis=1)
    df = df.sort_values(by = ['Total Cases'], ascending=False)
    df.index = np.arange(1,len(df)+1)
    df.fillna(0, inplace = True)
    
    return df




In [None]:
#Scraping Indian Data
def india_data(url):
    res = requests.get(url)
    html_c = res.content
    soup =BeautifulSoup(html_c,'lxml')
    data = soup.find("table", class_ = "table table-striped")
    table_data = data.tbody.find_all("tr") 
    states = []
    totalindia = []
    activeindia = []
    totalri = []
    totaldi = []
    for i in range(0,37):
        s = table_data[i].find_all("td")[1].text.strip()     #State List
        states.append(s)
        num = table_data[i].find_all("td")[2].text.strip()   #Active Cases in India
        num = num.replace('\n',"")
        activeindia.append(num)
        num = table_data[i].find_all("td")[3].text.strip()   #Total Recovered Cases in India
        num = num.replace('\n',"")
        totalri.append(num)
        num = table_data[i].find_all("td")[4].text.strip()   #Total Deceased in India
        num = num.replace('\n',"")
        totaldi.append(num)
        num = table_data[i].find_all("td")[5].text.strip()   #Total Cases in India
        num = int(num.replace('\n',""))
        totalindia.append(num)

    #Data Cleaning   
    data_india = pd.DataFrame(list(zip(states, totalindia, activeindia, totalri, totaldi)), columns =['States', 'Total Cases','Active Cases', 'Total Recovered','Total Deceased'])
    data_india = data_india.sort_values(by = ['Total Cases'], ascending=False)
    cols = ['Total Cases','Active Cases', 'Total Recovered','Total Deceased']
    data_india[cols] = data_india[cols].apply(pd.to_numeric, errors='coerce', axis=1, downcast='integer')
    data_india.index = np.arange(1,len(data_india)+1)
    data_india.fillna(0, inplace = True)
    
    return data_india


In [677]:
#Time Series Indian Data
def timeseriesindia(url):
    res = requests.get(url)
    data = res.json()
    timeseries = []
    totalconfirmed = []
    dailyconfirmed = []
    dailydeceased = []
    dailyrecovered = []
    totalrecovered = []
    totaldeceased = []
    for i in data['cases_time_series']:
        timeseries.append(i['date'])
        dailyconfirmed.append(i['dailyconfirmed'])
        dailyrecovered.append(i['dailyrecovered'])
        dailydeceased.append(i['dailydeceased'])
        totalconfirmed.append(i['totalconfirmed'])
        totalrecovered.append(i['totalrecovered'])
        totaldeceased.append(i['totaldeceased'])
    

    datatimeseries = pd.DataFrame(list(zip(timeseries, dailyconfirmed, dailyrecovered, dailydeceased, totalconfirmed, totalrecovered, totaldeceased)), 
                              columns =['Date', 'Daily Cases','Daily Recovered', 'Daily Deceased','Total Confirmed','Total Recovered','Total Deceased'])    
    
    return datatimeseries
    

In [678]:
dw = world_data("https://www.worldometers.info/coronavirus/")
di = india_data("https://www.mohfw.gov.in/")
dts = timeseriesindia('https://api.covid19india.org/data.json')

# DATA VISUALISATION