In [21]:
import datetime
import json
import requests
import argparse
import logging
from bs4 import BeautifulSoup 
from tabulate import tabulate

from selenium import webdriver

driver = webdriver.Chrome()


FORMAT = '%(asctime)s %(message)s'
logging.basicConfig(format=FORMAT, level=logging.DEBUG, filename='bot.log', filemode='w')

URL = 'https://www.mohfw.gov.in/'
short_headers = ['S.No', 'St','In','Cu','Dt']
FILE_NAME = 'corona_india_data.json'
webhook_url='https://hooks.slack.com/services/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'

driver.get(URL)
driver.find_element_by_xpath('/html/body/div[2]/div/div[2]/strong/div[8]/button').click()
html = driver.page_source 
soup = BeautifulSoup(html)
div = soup.find(id = "cases")
t = div.find('table')
print(len(t))


extract_content = lambda row: [x.text.replace('\n','') for x in row]

def save(s):
    with open(FILE_NAME, 'w') as out:
        json.dump(s,out)
        
def load():
    res = {}
    with open(FILE_NAME,'r') as x:
        res = json.load(x)
    return res
        
if __name__ == '__main__':

    
    current_time = datetime.datetime.now().strftime('%d/%m/%Y %H:%M')
    info = []
    
    try:

        header = extract_content(t.find_all('th'))
        
        stats = []
        all_rows = t.find_all('tr')
        for row in all_rows:
            stat = extract_content(row.find_all('td'))
            if len(stat) == 4:
                # last row
                stat = ['',*stat]
                stats.append(stat)
            else:
                stats.append(stat)
        stats.pop(0)
            
        past_data = load()
        cur_data = {x[1]: {current_time : x[2:]} for x in stats}
        
        changes = False
        
       
        for state in cur_data:
            if state not in past_data:
                info.append(f'NEW State {state} got corona virus:{cur_data[state][current_time]}')
                past_data[state] = {}
                changes = True
            else:
                past = past_data[state]['latest']
                cur = cur_data[state][current_time]
                if past != cur:
                    changed = True
                    info.append(f'Change for {state}: {past}->{cur}')
        
        events_info = ''
        for event in info:
            logging.warning(event)
            events_info += '\n - ' + event.replace("'", "")
        
        if changes:
            # override the latest one now
            for state in cur_data:
                past_data[state]['latest'] = cur_data[state][current_time]
                past_data[state][current_time] = cur_data[state][current_time]
            save(past_data)
        
        table = tabulate(stats, headers=short_headers, tablefmt='psql')
        msg = f'Please find CoronaVirus Summary for India below:\n{events_info}\n```{table}```'
        logging.info('Sending {msg} to slack'.format(msg=msg))
        payload = { 'text': msg }
        requests.post(webhook_url, data=json.dumps(payload))
        
    except Exception as e:
        logging.exception('oops, corono script failed.')

5
