In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import time
from _nlp_ops import *
import os

### Appending all the data, data_text + data_fig  + data_query

In [28]:
def create_data(query = '') :
    data_path = os.getcwd()
    data_path = data_path + '\\data\\'
    data_text = pd.read_excel(data_path + 'FAQ.xlsx')
    data_fig = pd.read_excel(data_path +'Data.xlsx')
    data_text = pd.read_excel('FAQ.xlsx')
    data_fig = pd.read_excel('Data.xlsx')
    data_query = pd.DataFrame({'Question' : [query], 'Answer' : [None]})
    return data_text, data_fig, data_query

def cleaning_text_data(data, col):
    data[col] = data[col].map(lambda row : replace_word_contractions(row))
    data[col] = data[col].map(lambda row : remove_non_alphabet(row))
    data[col] = data[col].map(lambda row : remove_URLs(row))
    data[col] = data[col].map(lambda row : remove_digits(row))
    data[col] = data[col].map(lambda row : remove_stop_words(row))
    data[col] = data[col].map(lambda row : porter_stemmer(row))
    data[col] = data[col].map(lambda row : wordnet_lemmatizer(row))
    return data

def cleaning_fig_data(data, col):
    data[col] = data[col].map(lambda row : row.lower())
    data[col] = data[col].map(lambda row : row.replace('#', ' '))
    return data

def feature_extraction(data, col):
    tf_vect = TfidfVectorizer(lowercase=True,stop_words='english')
    tf_matrix = tf_vect.fit_transform(data[col])
    tf_names = tf_vect.get_feature_names()
    X_tf_vect = pd.DataFrame(tf_matrix.toarray(), columns=tf_names)
    return X_tf_vect, tf_names

def calc_similarity(X_tf_vect):
    similarity_score = []
    target = X_tf_vect.iloc[X_tf_vect.index.stop - 1].values.tolist()
    for i in range(0, len(X_tf_vect)):
        candidate = X_tf_vect.iloc[i].values.tolist()
        similarity_score.append(cosine_similarity([target], [candidate])[0][0])
    X_tf_vect['sim_score'] = similarity_score
    return X_tf_vect

def greeting_check(text):
    text = text.lower()
    text = ''.join(sorted(set(text), key=text.index))
    if np.any([True for each in ['hey', 'hi', 'hello', '/start', 'how are you'] if text.lower() in each]):
        ans = """Hello!! Let me know how may I help you with COVID-19 information!! I am a new bot, still learning. I can answer question about counts in different states of India. 
                """
        return ans
    elif np.any([True for each in ['bye', 'bye bye', 'thankyou', 'thank you','by'] if text.lower() in each]):
        ans = """Bye Bye!!"""
        return ans
        
    return False

In [3]:
def get_bot_reply(query):
    #query = input()
    #query = 'what is the death in chhattisgarh'
    #print("Inside the get_bot_reply, and the question is {}".format(query))
    text_data, fig_data, query_data = create_data(query)
    cleaned_text_data= cleaning_text_data(text_data, col ='Question')
    cleaned_query_data = cleaning_text_data(query_data, col ='Question')
    cleaned_fig_data = cleaning_fig_data(fig_data, col ='Question')
    data_total = pd.concat([cleaned_text_data, cleaned_fig_data, cleaned_query_data]).reset_index(drop = True)

    # feature exactraction
    X_tf_vect, tf_names = feature_extraction(data_total, col = 'Question')
    X_tf_vect = calc_similarity(X_tf_vect)
    data_total_with_sim = pd.concat([data_total, X_tf_vect['sim_score']], axis = 1)
    ans = data_total.loc[data_total_with_sim.sim_score[: len(data_total) -1].idxmax()][1]
    #print(ans)
    return ans
    #print("\n")
    #print("Any thing else??!!")
    #print("press 'y' to continue")  

## Telegram communication

In [14]:
import json 
import requests

TOKEN = "1157749429:AAFLd02TS_E4ZZtkd1asumI3EK6d7WgkeRE"
URL = "https://api.telegram.org/bot{}/".format(TOKEN)


def get_url(url):
    response = requests.get(url)
    content = response.content.decode("utf8")
    return content


def get_json_from_url(url):
    content = get_url(url)
    js = json.loads(content)
    return js

def get_updates(offset=None):
    url = URL + "getUpdates"
    if offset:
        url += "?offset={}".format(offset)
    js = get_json_from_url(url)
    return js


def get_last_chat_id_and_text(updates):
    num_updates = len(updates["result"])
    last_update = num_updates - 1
    text = updates["result"][last_update]["message"]["text"]
    chat_id = updates["result"][last_update]["message"]["chat"]["id"]
    return (text, chat_id)

def get_last_update_id(updates):
    update_ids = []
    for update in updates["result"]:
        update_ids.append(int(update["update_id"]))
    return max(update_ids)

def send_message(text, chat_id):
    url = URL + "sendMessage?text={}&chat_id={}".format(text, chat_id)
    get_url(url)

    
def echo_all(updates):
    for update in updates["result"]:
        #print("inside echo all : ", update)
        try:
            text = update["message"]["text"]
            chat = update["message"]["chat"]["id"]
            bot_answer = greeting_check(text)
            if not bot_answer:
                bot_answer = get_bot_reply(text)
            send_message(bot_answer.replace('#', ' '), chat)
        except Exception as e:
            print(e)




In [20]:
def main():
    last_update_id = None
    while True:
        updates = get_updates(last_update_id)
        if len(updates["result"]) > 0:
            last_update_id = get_last_update_id(updates) + 1
            echo_all(updates)
        time.sleep(0.5)


if __name__ == '__main__':
    main()

KeyboardInterrupt: 

In [24]:
get_bot_reply('mumbai')

'state#Maharashtra district#Mumbai  confirmed#1259'

In [38]:
get_bot_reply('tamil')

'active#1014 confirmed#1075 deaths#11 recovered#50 state#Tamil Nadu'

In [None]:
def main():
    last_update_id = None
    while True:
        updates = get_updates(last_update_id)
        if len(updates["result"]) > 0:
            last_update_id = get_last_update_id(updates) + 1
            echo_all(updates)
        time.sleep(0.5)


if __name__ == '__main__':
    main()

In [None]:
# Steps for deploying

#1) create token from bot father
#2) create virtual env 
#3) activate vir env



In [None]:
# requirements.txt
'''
sklearn
nltk
flask

'''

In [12]:
a = get_bot_reply('what was the source')

'Currently, the source of SARS-CoV-2, the coronavirus (CoV) causing COVID-19 is unknown. All available evidence suggests that SARS-CoV-2 has a natural animal origin and is not a constructed virus. SARS-CoV-2 virus most probably has its ecological reservoir in bats. SARS-CoV-2, belongs to a group of genetically related viruses, which also include SARS-CoV and a number of other CoVs isolated from bats populations. MERS-CoV also belongs to this group, but is less closely related'