
# Telegram bot parsing data regarding Covid-19 information



In [2]:
import telebot # library for the access to API of telegram bots телеграм
import bs4
import requests
import re
import pandas as pd
 
#you must have the file credential.py with bot token, bot user name and URL
from credentials import bot_token, bot_user_name, URL
bot = telebot.TeleBot(bot_token) #the code will be the backend of the bot

parsed = False #to verify whether the parsing is successful

supported = ['Ethiopia', 'France', 'China', 'Italy', 'Poland', 'Ukraine'] #the list of supported countries

# Commands

# Decorator that handles the response to command /start
@bot.message_handler(commands=['start'])
def show_start(message):
    # nethod send_message requires two arguments: whom and what we send
    # whom: from_user.id or chat.id
    bot.send_message(message.from_user.id, "Hello! I work with the site https://www.worldometers.info/coronavirus/.\
If we type the country name, I'll return the statistics regarding new cases and daily deaths\
Type /parse to start parsing \n\
When the parsing complete I can output the mean cases or deaths from 15.02.2020 or for the given day\
Type help to observe all available commands\
Type /parse_help to see the admissible countries.")

# response to command /help
@bot.message_handler(commands=['help'])
def show_help(message):
    bot.send_message(message.from_user.id,"/parse <Country> starts parsing\
    \n/parse_help - show the list of supported countries\
    \n/file - recieve the file with the data\
    \n/median - compute the median\
    \n/mean - compute the mean\
    \n/date - get info for the given date")

# response to /parse. 
# set parsed = False to control that the parsing has not been finished yet
@bot.message_handler(commands=['parse'])
def parse(message):
    global parsed #Mind the global variable
    parsed = False
    bot.send_message(message.from_user.id, "Type the country name in English: ")
    
# response to /parse_help. Show available list of countries    
@bot.message_handler(commands=['parse_help'])
def show_parse_help(message):
    bot.send_message(message.from_user.id, f"I am able to find the information\
        only for the following countries:\n {' '.join(supported)}")
    
# response to /file, if parsed = True, i.e, parsing is completed send the file to user    
@bot.message_handler(commands=['file'])
def get_file(message):
    global parsed
    if parsed: # check the parsing is completed
        fh = open('data.csv', 'rb') # local or server file with the results of parsing
        bot.send_document(message.from_user.id, fh) # send the file to user
        fh.close() # close file
    else:
        # if parsing is not completed send the instruction of how to parse
        bot.send_message(message.from_user.id, "Parsing has not been completed; type /parse") 

# response to /median - return the median of deaths or new cases        
@bot.message_handler(commands=['median'])
def get_median(message):
    global parsed 
    if parsed: # check the parsing is completed
        col = message.text.split() #parsing the user message; it can contain the command
        # and choose deaths or cases
        if len(col) == 2: # check the existence of the specification
            # is so, split() returns the list of two variables
            if col[1] == 'cases': # user asks for cases
                col = 'number of daily cases'
            else: # user asks for deaths
                col = 'number of daily deaths'
        else: # if user gives onlу the command /median, choose default specification (cases)
            col = 'number of daily cases'
# the check of potential input errors are not performed
# do it yourself
        data = pd.read_csv('data.csv', delimiter = ',') # read table with pandas
        med = data[col].median() # method median() for the correct column
        # send the message with the computed median
        bot.send_message(message.from_user.id, "Median of  " + col + " = " + str(med))
    else:
        # the case, /median is called by user before the parsing completed
        bot.send_message(message.from_user.id, "Parsing not completed. Type /parse for parsing")

# response to /mean - return the mean of deaths or new cases (identical to the previous response)       
@bot.message_handler(commands=['mean'])
def get_mean(message):
    global parsed
    if parsed:
        col = message.text.split()
        if len(col) == 2:
            if col[1] == 'cases':
                col = 'number of daily cases'
            else:
                col = 'number of daily deaths'
        else:
            col = 'number of daily cases'
            
        data = pd.read_csv('data.csv', delimiter = ',')
        mea = data[col].mean()
        bot.send_message(message.from_user.id, "The mean for the column " + col + " = " + str(mea))
    else:
        bot.send_message(message.from_user.id, "Parsing not completed. Type /parse for parsing")

# response to command /date - return the info about the day specified by the user 
@bot.message_handler(commands=['date'])
def get_date(message):
    global parsed
    if parsed: # check that the parsed is done
        col = message.text.split() # we expect the format '/date Feb 02 2020'; split it by spaces
        if len(col) != 4: # error input (format command month date year is not observed)
            # write the classification of the error
            bot.send_message(message.from_user.id, "Wrong date")
            return
        mon = col[1] # month
        day = col[2] # day
        yr = col[3] # year
        try:
            ddate = mon + " " + day + "  " + yr #gather date into string formatted as in data.csv
            num_cases = -1 #defaul value; used in the case of unexpected error
            num_deaths = -1 #defaul value; used in the case of unexpected error
            with open('data.csv', 'r') as f: #search for the date
                for line in f:
                    ll = line.split(',')
                    if ddate in ll:
                        num_cases = int(ll[1])
                        num_deaths = int(ll[2])
                        break
            if num_cases == -1: # date not found
                bot.send_message(message.from_user.id,\
                                 'Error in the date or the date is absent, try again.')
            else:
                bot.send_message(message.from_user.id,\
                             f'{ddate} registered {num_cases} new cases and {num_deaths} deaths')
        except Exception:
            # tell the user about the error in the date
            bot.send_message(message.from_user.id, "Error in the date or the date is absent, try again.")
    else:
        bot.send_message(message.from_user.id, "Parsing not completed. Type /parse to parse")
    
# response to the messages that are different to the above commands
@bot.message_handler(content_types=['text'])
def get_text_messages(message):
    global parsed
    if not parsed: 
        if message.text in supported: # the country is supported
            try: # try to parse
                bot.send_message(message.from_user.id, "Starting parsing") 
                
                url = f'https://www.worldometers.info/coronavirus/country/{message.text.lower()}' 
                html = requests.get(url).text
                
                soup = bs4.BeautifulSoup(html, 'lxml')

                dates_cases = []
                cases = []
                
                # standard parsing is below, but str(graph) is used instead of graph.text,
                # because heroku considers the atributes of beautiful soup objects empty
                # (I do not now the reason)
                for graph in soup.find_all('script', {'type': "text/javascript"}): 
                    # derived the infromation about new cases from the GRAPH
                    if 'Daily New Cases' in str(graph): 
                        #print(str(graph))
                        dates_cases = re.findall(r'categories: \[([\w\s",]*)', str(graph))[0]
                        cases = re.findall(r'data: \[([\w,]+)', str(graph))[0]
                        #\w Matches any alphanumeric character; equivalent to the class [a-zA-Z0-9_].
                        #\s Matches any whitespace character; equivalent to the class [ \t\n\r\f\v]
                        #print('\n', HERE', dates_cases, cases)
                        # Each single date is split into two elements of data_cases
                        # data_cases[2*i] contains Month and Day
                        # data_cases[2*i+1] contains Year
                        
                dates_deaths = []
                deaths = []
                # the same for deaths
                for graph in soup.find_all('script', {'type': "text/javascript"}):
                    if 'Daily Deaths' in str(graph):
                        dates_deaths = re.findall(r'categories: \[([\w\s",]+)', str(graph))[0]
                        deaths = re.findall(r'data: \[([\w,]+)', str(graph))[0]
                 
                # avoid from "" in the dates and create the list
                dates = [date.strip('"') for date in dates_cases.split(',')]
                # replace all null with '0'
                cases = cases.replace('null', '0')
                deaths = deaths.replace('null', '0')
                cases = [int(x) for x in cases.split(',')] # generates lists of int
                deaths = [int(x) for x in deaths.split(',')]

                with open('data.csv', 'w') as fh: # file with the parsed infromation
                    fh.write('date,number of daily cases,number of daily deaths\n') 
                    # write the names of the columns (used above)
                    for i in range(int(len(dates)/2)): # len(cases)
                        # write the dates and corresponding data
                        try:
                            fh.write(f'{dates[2*i]} {dates[2*i+1]},{cases[i]},{deaths[i]}\n')
                        except:
                            break;
                        
                parsed = True # parsing completed
                bot.send_message(message.from_user.id, "Parsing completed. Choose the next command:") # tell user about it 
                # notify user about possible requests
                bot.send_message(message.from_user.id, f'''/file - get the file with data\ 
                \n/median - Compute the median. Type the column: cases or deaths - for which the median is searched after space\
                \n/mean - Compute the mean. Type the column: cases or deaths - for which the mean is searched after space\
                \n/date - Get info about specific date written after the command in the way Month Day Year, example:  Feb 05 2021\
                \nFor {message.text} the data is available within the range {dates[0]} {dates[1]} - {dates[-2]} {dates[-1]}''')

            except Exception:
                # Parsing is not completed
                parsed = False # set False (if the error appeared after True was set in the previous block)
                bot.send_message(message.from_user.id, "Parsing was not completed. Try again or change the country of search.")
        else:
            # we are here if parsed == False
            # this else relates to those if that checks the available country
            show_parse_help(message) 
            # show the user the list of available countries 
            # and call function parse (that asks the user to type the country name once more)
            parse(message) 
    else:
        # we are here if parsed == True
        # as of now, we work only with commands; tell the user that the command is not known
        # and recall the list of the commands
        bot.send_message(message.from_user.id, "The command not known")
        bot.send_message(message.from_user.id, "/file - get the file with data\
                \n/median - Compute the median. Type the column: cases or deaths - for which the median is searched after space\
                \n/mean - Compute the mean. Type the column: cases or deaths - for which the mean is searched after space\
                \n/date - Get info about specific date written after the command in the way Month Day Year, example:  Feb 05 2021\
                \nnFor {message.text} the data is available within the range {dates[0]} {dates[1]} - {dates[-2]} {dates[-1]}")

# this method continuously asks server Telegram whether new messages arrived to the bot
# as soon as they arrived the bot proccesses them
# if the line is not in the code, the bot will not recieve the messages from users
bot.polling(none_stop=True, interval=0)

# Excercise

1. define the string variable s
````python
s = 'xAxis: {\n\
            categories: ["Feb 15, 2020","Feb 16, 2020","Feb 17, 2020","Feb 18, 2020","Feb 19, 2020","Feb 20, 2020","Feb 21, 2020","Feb 22, 2020"]\n\
        },'
````
this variable is taken from site view-source:https://www.worldometers.info/coronavirus/country/poland/ and the number of the dates is reduced
In my code, variable `graph` looping
`soup.find_all('script', {'type': "text/javascript"})`

````python
graph in soup.find_all('script', {'type': "text/javascript"})
````
is of the same structure. Therefore you can develop your pasring, which extracts the dates, with this string.

2. Extract the dates and save them into the list. The $0$th element of the list will be 'Feb 15 2020', the $1$st element 'Feb 16 2020' and so on

In [1]:
print('Hello world')

Hello world
