In [1]:
import tensorflow as tf
import re
import numpy as np
import spacy
import pandas as pd

# How the pipeline works ?

If implemented in a chatbot, this model in based on the user input. So I have to transform the input (raw text), into a vectorized form of the input text, then run the prediction and return the topic.

In [2]:
#Load the model
topic_rec = tf.keras.models.load_model('topic_text_1000.model')

In [3]:
raw_text = input("Hey bud! What's wrong.. do you wanna talk about it ? 🤗 \n")
type(raw_text)

Hey bud! What's wrong.. do you wanna talk about it ? 🤗 
I over-portion/over-do everything Maybe it’s for comfort, but I always dish up so much food, or use so much lotion or lip balm, or drink a lot, or smoke a lot, or use a lot of perfume, or spend so much money. Literally anything that can be overdone, i over do it. I don’t understand why I do this and it’s not as easy as “just dish up less”... because it’s infiltrated in everything I do. Help?


str

In [4]:
#I'll use spacy as it seemed like a good option to lemmatize with the appropriate pos tag, detects pronouns and superlative 
#forms of words.
nlp = spacy.load('en_core_web_sm', disable=['parser', 'ner'])

#Clean the text:
def clean_input(s):
    s = s.lower()                   #remove caps to avoid double words
    s = re.sub('[\W\d]', ' ', s)    #remove special signs, punctuation, numbers
    s = re.sub(' +', ' ', s)        #remove excessive spaces
    s = s.strip()                   #remove first and last spaces
    return s

def spacy_lem(l):
    doc = nlp(l)
    return [token.lemma_ for token in doc]

word_features_topic = pd.read_csv('../data/word_features_topic.csv')
word_features = list(word_features_topic['0'])

def vectorize_input(l):
    new_list = []
    for word in word_features:
        if word in l:
            new_list.append(1)
        else:
            new_list.append(0)
    return new_list

topics = {'SPIRITUALITY':0,
          'COUNSELING':1,
          'WORKPLACE':2,
          'FAMILY':3,
          'RELATIONSHIPS':4,
          'SLEEP':5,
          'BEHAVIOR':6,
          'SEXUALITY':7,
          'SELF_ESTEEM':8,
          'GRIEF':9,
          'TRAUMA':10,
          'STRESS':11,
          'EATING_DISORDERS':12,
          'ADDICTION':13,
          'DEPRESSION':14,
          'LGBTQ':15,
          'DOMESTIC_VIOLENCE':16,
          'SELF_HARM':17}

In [5]:
clean_text = clean_input(raw_text)
lem_text = spacy_lem(clean_text)
vect_text = vectorize_input(lem_text)


type(np.array(vect_text))

numpy.ndarray

In [6]:
pred = topic_rec.predict([vect_text])

In [7]:
print(np.argmax(pred))
for i in pred:
    if i.any() > 0.1:
        print(i)

13
[4.3599198e-06 5.1479746e-04 6.6560456e-06 3.8392644e-04 6.9331891e-06
 5.6064946e-05 2.3698327e-04 2.3333209e-04 3.1701962e-03 1.2232776e-04
 7.6867198e-04 5.9286575e-03 1.9935238e-04 9.8432910e-01 3.7060697e-03
 2.5700062e-04 2.1533733e-05 5.4005955e-05]


In [8]:
for key in topics:
    if np.argmax(pred) == topics[key]:
        print(key)

ADDICTION


In [9]:
def topic_recognizer():
    raw_text = input("Hey bud! What's wrong.. do you wanna talk about it ? 🤗 \n\n")

    vect_text = vectorize_input(spacy_lem(clean_input(raw_text)))
#    vect_text = np.array(vect_text)
    pred = topic_rec.predict([vect_text])

    recognized_topics = []
    
    for key in topics: 
        if np.max(pred) > 0.5:
            if np.argmax(pred) == topics[key]:
                recognized_topics.append((key, round((pred[0][np.argmax(pred)])*100, 2)))
        else:
            for p in (-pred).argsort():
                for top in p[:3]:
                    if top == topics[key]:
                        recognized_topics.append((key, pred[0][top]))

    return recognized_topics

In [10]:
recognized_topics = topic_recognizer()
recognized_topics

Hey bud! What's wrong.. do you wanna talk about it ? 🤗 

I over-portion/over-do everything Maybe it’s for comfort, but I always dish up so much food, or use so much lotion or lip balm, or drink a lot, or smoke a lot, or use a lot of perfume, or spend so much money. Literally anything that can be overdone, i over do it. I don’t understand why I do this and it’s not as easy as “just dish up less”... because it’s infiltrated in everything I do. Help?


[('ADDICTION', 98.43)]

In [11]:
recognized_topics

[('ADDICTION', 98.43)]