# Corona Tracker

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [2]:
# The URL to MyGov Covid-19 information portal
URL = "https://www.mygov.in/covid-19/"

In [3]:
# Get the HTML of the web page corresponding to the url
html = requests.get(URL).text

In [4]:
# Initiate the scraper
soup = BeautifulSoup(html, "lxml")

In [5]:
# print(soup.prettify())

In [6]:
# Get the div corresponding to the dashboard
dashboard = soup.select("div#dashboard")[0]

In [7]:
# print(dashboard.prettify())

In [8]:
# Get the list of numbers corresponding to total cases, active cases, discharged and deaths
totals = dashboard.select("span.icount")
for i in range(len(totals)):
    totals[i] = int(totals[i].text.replace(',', '').strip())

# Create a dictionary for the numbers
totalLabels = ["total_confirmed", "total_active", "total_discharged", "total_deaths"]
totalsDict = dict(zip(totalLabels, totals))

In [9]:
# Add the total vaccinations to the dict
totalsDict["total_vaccinations"] = int(soup.select("div.total-vcount")[0].strong.text.replace(',', '').strip())

In [10]:
# totalsDict

In [11]:
# Get the list of numbers corresponding to the daily increase in total cases, active cases, discharges and deaths
increases = dashboard.select("div.increase_block")
for i in range(len(increases)):
    increases[i] = int(increases[i].text.replace(',', '').strip())
    
# Create a dictionary for the numbers
increaseLabels = ["confirmed_increase", "active_increase", "discharged_increase", "deaths_increase"]
increasesDict = dict(zip(increaseLabels, increases))

In [12]:
# Add the increase in vaccinations to the dict
increasesDict["vaccinations_increase"] = int(soup.select("div.yday-vcount")[0].strong.text.replace(',', '').strip())

In [13]:
# increasesDict

In [14]:
# Get the list of the divs corresponding to the data for different states
states = soup.select("div.views-row")

In [15]:
# states

In [16]:
# len(states)

In [17]:
# Get the dict of name of the region : corresponding dict of data
statesDict = {}
for state in states:
    stateName = state.select("span.st_name")[0].text.strip()
    stateLabels = totalLabels + ["total_vaccinations"]
    stateData = [int(d.text.replace(',', '').strip()) for d in state.select("div.st_all_counts")[0].select("small")]
    
    statesDict[stateName] = dict(zip(stateLabels, stateData))

In [18]:
# statesDict

In [19]:
# Create a dataframe for the corresponding data
df = pd.DataFrame(index = ["INDIA", "INDIA (Increases)"] + list(statesDict.keys()), columns = list(list(statesDict.values())[0].keys()))

In [20]:
# Add the data for India
df.loc["INDIA"] = totalsDict
df.loc["INDIA (Increases)"] = list(increasesDict.values())

In [21]:
# Add the data for the states
for n, d in statesDict.items():
    df.loc[n] = d

In [22]:
# Make the column names look more natural
df.columns = ["Confirmed", "Active", "Discharged", "Deaths", "Vaccinations"]

In [23]:
# Preview the data
df

Unnamed: 0,Confirmed,Active,Discharged,Deaths,Vaccinations
INDIA,19557457,3349644,15992271,215542,156816031
INDIA (Increases),392488,80934,307865,3689,1826219
Andaman and Nicobar,6046,205,5773,68,104374
Andhra Pradesh,1121102,130752,982297,8053,6579817
Arunachal Pradesh,18636,1387,17190,59,241483
Assam,256576,26374,228872,1330,2484613
Bihar,484106,108203,373261,2642,6968234
Chandigarh,43446,7222,35735,489,208489
Chhattisgarh,744602,121099,614693,8810,5571099
Dadra and Nagar Haveli and Daman and Diu,7712,1867,5841,4,95496


In [24]:
# Save the data into a CSV file
from datetime import date
fileName = str(date.today()) + "-corona-daily-india.csv"
df.to_csv(fileName)

In [25]:
# Try loading the file for testing purposes
test_df = pd.read_csv(fileName, index_col = 0)
test_df.head()

Unnamed: 0,Confirmed,Active,Discharged,Deaths,Vaccinations
INDIA,19557457,3349644,15992271,215542,156816031
INDIA (Increases),392488,80934,307865,3689,1826219
Andaman and Nicobar,6046,205,5773,68,104374
Andhra Pradesh,1121102,130752,982297,8053,6579817
Arunachal Pradesh,18636,1387,17190,59,241483
