In [1]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import time
from _nlp_ops import *
import os

### Appending all the data, data_text + data_fig  + data_query

In [2]:
def create_data(query = '') :
    data_path = os.getcwd()
    data_path = data_path + '\\data\\'
    data_text = pd.read_excel(data_path + 'FAQ.xlsx')
    data_fig = pd.read_excel(data_path +'Data.xlsx')
    #data_text = pd.read_excel('FAQ.xlsx')
    #data_fig = pd.read_excel('Data.xlsx')
    data_query = pd.DataFrame({'Question' : [query], 'Answer' : [None]})
    return data_text, data_fig, data_query

def cleaning_text_data(data, col):
    data[col] = data[col].map(lambda row : replace_word_contractions(row))
    data[col] = data[col].map(lambda row : remove_non_alphabet(row))
    data[col] = data[col].map(lambda row : remove_URLs(row))
    data[col] = data[col].map(lambda row : remove_digits(row))
    data[col] = data[col].map(lambda row : remove_stop_words(row))
    data[col] = data[col].map(lambda row : porter_stemmer(row))
    data[col] = data[col].map(lambda row : wordnet_lemmatizer(row))
    return data

def cleaning_fig_data(data, col):
    data[col] = data[col].map(lambda row : row.lower())
    data[col] = data[col].map(lambda row : row.replace('#', ' '))
    return data

def feature_extraction(data, col):
    tf_vect = TfidfVectorizer(lowercase=True,stop_words='english')
    tf_matrix = tf_vect.fit_transform(data[col])
    tf_names = tf_vect.get_feature_names()
    X_tf_vect = pd.DataFrame(tf_matrix.toarray(), columns=tf_names)
    return X_tf_vect, tf_names

def calc_similarity(X_tf_vect):
    similarity_score = []
    target = X_tf_vect.iloc[X_tf_vect.index.stop - 1].values.tolist()
    for i in range(0, len(X_tf_vect)):
        candidate = X_tf_vect.iloc[i].values.tolist()
        similarity_score.append(cosine_similarity([target], [candidate])[0][0])
    X_tf_vect['sim_score'] = similarity_score
    return X_tf_vect

def greeting_check(text):
    text = text.lower()
    text = ''.join(sorted(set(text), key=text.index))
    if np.any([True for each in ['hey', 'hi', 'hello', '/start', 'how are you'] if text.lower() in each]):
        ans = """Hello!! I am a new bot, still learning about corona virus information. I can help answer frequently asked questions about COVID -19 and about it's count in India state or city wise. Not sure what to ask? Try, How does COVID spread?' or 'Count in Odisha'
                """
        return ans
    elif np.any([True for each in ['bye', 'bye bye', 'thankyou', 'thank you','by'] if text.lower() in each]):
        ans = """Bye Bye!!"""
        return ans
        
    return False

In [3]:
def get_bot_reply(query):
    #query = input()
    #query = 'what is the death in chhattisgarh'
    #print("Inside the get_bot_reply, and the question is {}".format(query))
    text_data, fig_data, query_data = create_data(query)
    cleaned_text_data= cleaning_text_data(text_data, col ='Question')
    cleaned_query_data = cleaning_text_data(query_data, col ='Question')
    cleaned_fig_data = cleaning_fig_data(fig_data, col ='Question')
    data_total = pd.concat([cleaned_text_data, cleaned_fig_data, cleaned_query_data]).reset_index(drop = True)

    # feature exactraction
    X_tf_vect, tf_names = feature_extraction(data_total, col = 'Question')
    X_tf_vect = calc_similarity(X_tf_vect)
    data_total_with_sim = pd.concat([data_total, X_tf_vect['sim_score']], axis = 1)
    ans = data_total.loc[data_total_with_sim.sim_score[: len(data_total) -1].idxmax()][1]
    #print(ans)
    return ans
    #print("\n")
    #print("Any thing else??!!")
    #print("press 'y' to continue")  

In [4]:
a = get_bot_reply('how does it spread?')

a

'People can catch COVID-19 from others who have the virus. The disease can spread from person to person through small droplets from the nose or mouth which are spread when a person with COVID-19 coughs or exhales. These droplets land on objects and surfaces around the person. Other people then catch COVID-19 by touching these objects or surfaces, then touching their eyes, nose or mouth. People can also catch COVID-19 if they breathe in droplets from a person with COVID-19 who coughs out or exhales droplets. This is why it is important to stay more than 1 meter (3 feet) away from a person who is sick. WHO is assessing ongoing research on the ways COVID-19 is spread and will continue to share updated findings.'

## Telegram communication

In [5]:
import json 
import requests

TOKEN = "1157749429:AAFLd02TS_E4ZZtkd1asumI3EK6d7WgkeRE"
URL = "https://api.telegram.org/bot{}/".format(TOKEN)


def get_url(url):
    response = requests.get(url)
    content = response.content.decode("utf8")
    return content


def get_json_from_url(url):
    content = get_url(url)
    js = json.loads(content)
    return js

def get_updates(offset=None):
    url = URL + "getUpdates"
    if offset:
        url += "?offset={}".format(offset)
    js = get_json_from_url(url)
    return js


def get_last_chat_id_and_text(updates):
    num_updates = len(updates["result"])
    last_update = num_updates - 1
    text = updates["result"][last_update]["message"]["text"]
    chat_id = updates["result"][last_update]["message"]["chat"]["id"]
    return (text, chat_id)

def get_last_update_id(updates):
    update_ids = []
    for update in updates["result"]:
        update_ids.append(int(update["update_id"]))
    return max(update_ids)

def send_message(text, chat_id):
    url = URL + "sendMessage?text={}&chat_id={}".format(text, chat_id)
    get_url(url)

    
def echo_all(updates):
    for update in updates["result"]:
        #print("inside echo all : ", update)
        try:
            print(update)
            text = update["message"]["text"]
            chat = update["message"]["chat"]["id"]
            print("Question : ",text)
            bot_answer = greeting_check(text)
            if not bot_answer:
                bot_answer = get_bot_reply(text)
            print("Answer : ",bot_answer)
            send_message(bot_answer.replace('#', ' '), chat)
        except Exception as e:
            print(e)




In [8]:
def main():
    last_update_id = None
    while True:
        updates = get_updates(last_update_id)
        if len(updates["result"]) > 0:
            last_update_id = get_last_update_id(updates) + 1
            echo_all(updates)
        time.sleep(0.5)


if __name__ == '__main__':
    main()

{'update_id': 80065425, 'message': {'message_id': 493, 'from': {'id': 1226128895, 'is_bot': False, 'first_name': 'Shekhar', 'last_name': 'Dubey'}, 'chat': {'id': 1226128895, 'first_name': 'Shekhar', 'last_name': 'Dubey', 'type': 'private'}, 'date': 1586956614, 'text': '/start', 'entities': [{'offset': 0, 'length': 6, 'type': 'bot_command'}]}}
Question :  /start
Answer :  Hello!! I am a new bot, still learning about corona virus information. I can help answer frequently asked questions about COVID -19 and about it's count in India state or city wise. Not sure what to ask? Try, How does COVID spread?' or 'Count in Odisha'
                
{'update_id': 80065426, 'message': {'message_id': 494, 'from': {'id': 1162409252, 'is_bot': False, 'first_name': 'Saurav', 'last_name': 'Pal'}, 'chat': {'id': 1162409252, 'first_name': 'Saurav', 'last_name': 'Pal', 'type': 'private'}, 'date': 1586957736, 'text': '/start', 'entities': [{'offset': 0, 'length': 6, 'type': 'bot_command'}]}}
Question :  /sta

Answer :  active#41 confirmed#60 deaths#1 recovered#18 state#Odisha
{'update_id': 80065441, 'message': {'message_id': 523, 'from': {'id': 1234120643, 'is_bot': False, 'first_name': 'Pooja', 'last_name': 'Dubey', 'language_code': 'en'}, 'chat': {'id': 1234120643, 'first_name': 'Pooja', 'last_name': 'Dubey', 'type': 'private'}, 'date': 1586959817, 'text': 'Count in Chhattisgarh?'}}
Question :  Count in Chhattisgarh?
Answer :  active#20 confirmed#33 deaths#0 recovered#13 state#Chhattisgarh
{'update_id': 80065442, 'message': {'message_id': 525, 'from': {'id': 1226128895, 'is_bot': False, 'first_name': 'Shekhar', 'last_name': 'Dubey', 'language_code': 'en'}, 'chat': {'id': 1226128895, 'first_name': 'Shekhar', 'last_name': 'Dubey', 'type': 'private'}, 'date': 1586959832, 'video_note': {'duration': 5, 'length': 240, 'thumb': {'file_id': 'AAMCBQADGQEAAgINXpcV1200PoRaCIHFxEiGfgG0uzkAAjsBAAJyb7lUW7nLT4VGbOebSNdqdAADAQAHbQADjFAAAhgE', 'file_unique_id': 'AQADm0jXanQAA4xQAAI', 'file_size': 12522, '

KeyboardInterrupt: 

In [None]:
#get_bot_reply('mumbai')

In [38]:
#get_bot_reply('tamil')

'active#1014 confirmed#1075 deaths#11 recovered#50 state#Tamil Nadu'

In [None]:
# Steps for deploying

#1) create token from bot father
#2) create virtual env 
#3) activate vir env



In [None]:
# requirements.txt
'''
sklearn
nltk
flask

'''