In [2]:
! pip -q install transformers

In [5]:
from transformers import BlenderbotTokenizer, BlenderbotForConditionalGeneration
import torch
import string

In [6]:
tokenizer = BlenderbotTokenizer.from_pretrained("facebook/blenderbot-400M-distill")
model = BlenderbotForConditionalGeneration.from_pretrained("facebook/blenderbot-400M-distill")

In [17]:
with open("./filter/trigger_words.txt") as file:
    trigger_words = [line.rstrip() for line in file]
    
with open("./filter/bad_words.txt") as file:
    bad_words = [line.rstrip() for line in file]

In [18]:
def filter_preprocessing(sentence):    
    # lower all words
    sentence = sentence.lower()    
    # remove punctuation
    for punctuation in string.punctuation:
        sentence = sentence.replace(punctuation, '')        
    # strip withespaces
    sentence = sentence.strip()    
    return sentence 

In [19]:
def filter_bad_words(sentence, bad_words): 
    
    # preprocessing
    sentence = filter_preprocessing(sentence)
    
    # Check
    for word in sentence.split():        
        if word in bad_words:
            return True                
    else:
        return False

In [20]:
def filter_trigger_words(sentence, trigger_words):
    
    # preprocessing
    sentence = filter_preprocessing(sentence)
    
    # Check        
    for word in sentence.split():        
        if word in trigger_words:
            return True                
    else:
        return False

In [21]:
def predict(input, history=[]):

#preprocessing input  
    filter_preprocessing(input)

    if len(history) != 0:
      input = ' '.join((*history, input))

#check for potential triggers
    if filter_trigger_words(input, trigger_words) is True:
      response = "A therapist will be in contact with you shortly."
      return response, history
  
#check for potential bad words
    elif filter_bad_words(input, bad_words) is True:
      response = "Let's try and say this a bit nicer."
      return response, history
  
#if neither triggers nor bad words are present, generate a model output
    else:
      input_token = tokenizer(input, return_tensors='pt')
    # print(inputs_token)

      result = model.generate(**input_token)
      outputs = tokenizer.decode(result[0])

      history.append(' '.join((input, outputs)))

      return outputs, history

In [22]:
input5 = 'Please help me.'
history5 = []
predict(input5, history5)



('<s> What do you need help with? I can help you if you want to talk about anything.</s>',
 ['Please help me. <s> What do you need help with? I can help you if you want to talk about anything.</s>'])

In [23]:
input6 = 'Fuck everybody, I want to kill myself.'
history6 = []
predict(input6, history6)

('A therapist will be in contact with you shortly.', [])