# Imports

In [39]:
# General 

# Blendebot
from transformers import BlenderbotTokenizer, BlenderbotForConditionalGeneration

# Filter for bad words and trigger words
from better_profanity import profanity 

# Detect neutral input, emotions and offensive language
import numpy as np
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification
from transformers import TFAutoModelForSequenceClassification
from scipy.special import softmax



# Filters

## Trigger words and bad words

**To do:**

* Review lists of both, bad and trigger words.

In [36]:
# Load list of bad words and trigger words from folder

with open("../filter/trigger_words.txt") as file:
    trigger_words = [line.rstrip() for line in file]
    
with open("../filter/bad_words.txt") as file:
    bad_words = [line.rstrip() for line in file] 


In [20]:
bad_words

['2 girls 1 cup',
 'anal',
 'anus',
 'areole',
 'arian',
 'arrse',
 'arse',
 'arsehole',
 'aryan',
 'asanchez',
 'ass',
 'assbang',
 'assbanged',
 'asses',
 'assfuck',
 'assfucker',
 'assfukka',
 'asshole',
 'assmunch',
 'asswhole',
 'autoerotic',
 'ballsack',
 'bastard',
 'bdsm',
 'beastial',
 'beastiality',
 'bellend',
 'bestial',
 'bestiality',
 'bimbo',
 'bimbos',
 'bitch',
 'bitches',
 'bitchin',
 'bitching',
 'blowjob',
 'blowjobs',
 'blue waffle',
 'bondage',
 'boner',
 'boob',
 'boobs',
 'booobs',
 'boooobs',
 'booooobs',
 'booooooobs',
 'booty call',
 'breasts',
 'brown shower',
 'brown showers',
 'buceta',
 'bukake',
 'bukkake',
 'bullshit',
 'busty',
 'butthole',
 'carpet muncher',
 'cawk',
 'chink',
 'cipa',
 'clit',
 'clitoris',
 'clits',
 'cnut',
 'cock',
 'cockface',
 'cockhead',
 'cockmunch',
 'cockmuncher',
 'cocks',
 'cocksuck',
 'cocksucked',
 'cocksucker',
 'cocksucking',
 'cocksucks',
 'cokmuncher',
 'coon',
 'cowgirl',
 'cowgirls',
 'crap',
 'crotch',
 'cum',
 'cu

In [21]:
trigger_words

['suicide',
 'suicidal',
 'kill',
 'death',
 'dead',
 'murder',
 'self-murder',
 'self-slaughter',
 'self-suicide',
 'cut my throat',
 'cut my veins',
 'slice my veins',
 'jump off a bridge',
 'in front of a train',
 'fall off a bridge',
 'hang myself',
 'hang up',
 'take sleeping pills',
 'not want to wake up',
 'no longer want to wake up']

In [147]:
# Check if sentence contains bad words (True or false)

def filter_bad_words(sentence, bad_words): 
    
    # load list of bad words
    profanity.load_censor_words(bad_words)
    
    # Check
    return profanity.contains_profanity(sentence)

In [148]:
sentence = "Piece of Sh1t"
filter_bad_words(sentence, bad_words)

True

In [149]:
# Check if sentence contains trigger words (True or false)

def filter_trigger_words(sentence, trigger_words): 
    
    # load list of bad words
    profanity.load_censor_words(trigger_words)
    
    # Check
    return profanity.contains_profanity(sentence)

In [150]:
# Test function
sentence = "I think about killing myself"
filter_trigger_words(sentence, trigger_words)

True

## Detect neutral input, emotions and offensive language

Model card: https://huggingface.co/cardiffnlp/twitter-roberta-base-emotion

Benchmarks: https://arxiv.org/pdf/2010.12421.pdf

### Neutral input

In [46]:
## Instantiate tokenizer and model

tokenizer_neut = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment")
model_neut = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment")

In [106]:
# Function for predicting neutrality / Return true or false

def predict_neutrality(text):
    
    # Labels
    labels_neut = ['negative', 'neutral', 'positive']
    
    # Encode
    encoded_input = tokenizer_neut(text, return_tensors='pt')
    
    # Model output
    output = model_neut(**encoded_input)
    
    # Convert output (tensors) to numpy array
    scores = output[0][0].detach().numpy()
    
    # Apply softmax function
    scores = softmax(scores)
    
    # Convert scores for emotions to dictionary
    neutrality = {}
    for i in range(len(scores)):
        neutrality[labels_neut[i]] = scores[i]
        
    # Check if user input is neutral or not        
    if neutrality['neutral'] > neutrality['negative'] and neutrality['neutral'] > neutrality['positive']:
        return True    
    else:
        return False

In [108]:
# Test function
text = "Test"
neutrality = predict_neutrality(text)
neutrality

True

### Emotions

In [55]:
## Instantiate tokenizer and model

tokenizer_emo = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-emotion")
model_emo = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-emotion")

In [59]:
# Function for predicting emotions / returns dictionary with emotions

def predict_emotion(text):
    
    # Labels
    labels_emo = ['anger', 'joy', 'optimism', 'sadness']
    
    # Encode
    encoded_input = tokenizer_emo(text, return_tensors='pt')
    
    # Model output
    output = model_emo(**encoded_input)
    
    # Convert output (tensors) to numpy array
    scores = output[0][0].detach().numpy()
    
    # Apply softmax function
    scores = softmax(scores)
    
    # Convert scores for emotions to dictionary
    emotions = {}
    for i in range(len(scores)):
        emotions[labels_emo[i]] = scores[i]
    
    return emotions

In [60]:
# Test function
text = "I want to have sex but my partner does not want to"
emotion = predict_emotion(text)
emotion

{'anger': 0.12061143,
 'joy': 0.031667437,
 'optimism': 0.021350257,
 'sadness': 0.8263709}

### Offensive language

In [42]:
## Instantiate tokenizer and model

tokenizer_off = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-offensive")
model_off = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-offensive")

In [158]:
# Function for predicting offensive language / Returns True or False
### needs to be 

def predict_offensive(text):
    
    # Labels
    labels_off = ['not-offensive', 'offensive']
    
    # Encode
    encoded_input = tokenizer_off(text, return_tensors='pt')
    
    # Model output
    output = model_off(**encoded_input)
    
    # Convert output (tensors) to numpy array
    scores = output[0][0].detach().numpy()
    
    # Apply softmax function
    scores = softmax(scores)
    
    # Convert scores for emotions to dictionary
    offensive = {}
    for i in range(len(scores)):
        offensive[labels_off[i]] = scores[i]
        
    # Check if text is offensive or not        
    if offensive['offensive'] > offensive['not-offensive']:
        return True    
    else:
        return False

In [157]:
# Test function
text = "I want to kill you"
offensive = predict_offensive(text)
offensive

True

# Predict answer in a dialog

Paper: https://arxiv.org/pdf/2004.13637.pdf

Fine-tuning: https://parl.ai/projects/recipes/

Model card: https://huggingface.co/facebook/blenderbot-400M-distill?text=Hey+my+name+is+Mariama%21+How+are+you%3F

In [61]:
tokenizer_blend = BlenderbotTokenizer.from_pretrained("facebook/blenderbot-400M-distill")
model_blend = BlenderbotForConditionalGeneration.from_pretrained("facebook/blenderbot-400M-distill")

Downloading: 100%|████████████████████████████████████████████████████████████| 127k/127k [00:00<00:00, 472kB/s]
Downloading: 100%|██████████████████████████████████████████████████████████| 62.9k/62.9k [00:00<00:00, 290kB/s]
Downloading: 100%|██████████████████████████████████████████████████████████| 1.15k/1.15k [00:00<00:00, 287kB/s]
Downloading: 100%|███████████████████████████████████████████████████████████| 16.0/16.0 [00:00<00:00, 7.15kB/s]
Downloading: 100%|██████████████████████████████████████████████████████████████| 772/772 [00:00<00:00, 276kB/s]
Downloading: 100%|██████████████████████████████████████████████████████████| 1.57k/1.57k [00:00<00:00, 978kB/s]
Downloading: 100%|███████████████████████████████████████████████████████████| 730M/730M [05:33<00:00, 2.19MB/s]


In [159]:
def predict(text, history=''):

    # Check for potential triggers
    if filter_trigger_words(text, trigger_words) is True:
        ####### Question: Do we need here and in the following hard codes answers the <s> tag that comes with the model output?
        output = "A therapist will be in contact with you shortly."
        return output, history
  
    # Check for potential bad words
    elif filter_bad_words(text, bad_words) is True:
        output = "Let's try and say this a bit nicer."
        return output, history
    
    elif predict_neutrality(text) is True:
        output = "Could you explain further?"
        return output, history
    
#### Here the emotion analysis needs to step in with the thresholds and hard coded output
    
  
    # If neither triggers nor bad words are present, generate a model output
    else:
        
        # Tokenize input
        input_token = tokenizer_blend(text, return_tensors='pt')

        # Get result from model
        result = model_blend.generate(**input_token)
        
        # Decode result to model answer
        output = tokenizer_blend.decode(result[0])
        
        # Check model answer for offensive language
        if predict_offensive(output) is True:
            result = model_blend.generate(**input_token)
            output = tokenizer_blend.decode(result[0])

            
    # Append model answer to history
    history = ''.join((history, text, output))

    return output, history

In [160]:
# Test model
input1 = 'I am sad'
history1 = ''
output1, history2 = predict(input1, history1)

print(output1)
print(history2)


<s> I am sorry to hear that. Why are you sad? Do you want to talk about it?</s>
I am sad<s> I am sorry to hear that. Why are you sad? Do you want to talk about it?</s>


In [161]:
# Test model
input2 = 'No, I do not want to talk'

output2, history3 = predict(input2, history2)

print(output2)
print(history3)


<s> I'm sorry to hear that. Do you have any hobbies that you like to do?</s>
I am sad<s> I am sorry to hear that. Why are you sad? Do you want to talk about it?</s>No, I do not want to talk<s> I'm sorry to hear that. Do you have any hobbies that you like to do?</s>


In [154]:
# Test model
input3 = 'No I am sad'

output3, history4 = predict(input3, history3)

print(output3)
print(history4)

<s> I am sorry to hear that. Why are you sad? Do you want to talk about it?</s>
I am sad<s> I am sorry to hear that. Why are you sad? Do you want to talk about it?</s>No, I do not want to talk<s> I'm sorry to hear that. Do you have any hobbies that you like to do?</s>No I am sad<s> I am sorry to hear that. Why are you sad? Do you want to talk about it?</s>
