# Imports

In [2]:
! pip3 install better_profanity

Collecting better_profanity
  Downloading better_profanity-0.7.0-py3-none-any.whl (46 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.1/46.1 kB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: better_profanity
Successfully installed better_profanity-0.7.0


In [1]:
# General 

# Blendebot
from transformers import BlenderbotTokenizer, BlenderbotForConditionalGeneration

# Filter for bad words and trigger words
from better_profanity import profanity 

# Detect neutral input, emotions and offensive language
import numpy as np
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification
from transformers import TFAutoModelForSequenceClassification
from scipy.special import softmax



  from .autonotebook import tqdm as notebook_tqdm
2022-12-10 20:59:44.122732: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Filters

## Trigger words and bad words

**To do:**

* Review lists of both, bad and trigger words.

In [4]:
# Load list of bad words and trigger words from folder

with open("../filter/trigger_words.txt") as file:
    trigger_words = [line.rstrip() for line in file]
    
with open("../filter/bad_words.txt") as file:
    bad_words = [line.rstrip() for line in file] 


In [5]:
bad_words

['2g1c',
 '2 girls 1 cup',
 'acrotomophilia',
 'alabama hot pocket',
 'alaskan pipeline',
 'anal',
 'anilingus',
 'anus',
 'apeshit',
 'arsehole',
 'ass',
 'asshole',
 'assmunch',
 'auto erotic',
 'autoerotic',
 'babeland',
 'baby batter',
 'baby juice',
 'ball gag',
 'ball gravy',
 'ball kicking',
 'ball licking',
 'ball sack',
 'ball sucking',
 'bangbros',
 'bangbus',
 'bareback',
 'barely legal',
 'barenaked',
 'bastard',
 'bastardo',
 'bastinado',
 'bbw',
 'bdsm',
 'beaner',
 'beaners',
 'beaver cleaver',
 'beaver lips',
 'beastiality',
 'bestiality',
 'big black',
 'big breasts',
 'big knockers',
 'big tits',
 'bimbos',
 'birdlock',
 'bitch',
 'bitches',
 'black cock',
 'blonde action',
 'blonde on blonde action',
 'blowjob',
 'blow job',
 'blow your load',
 'blue waffle',
 'blumpkin',
 'bollocks',
 'bondage',
 'boner',
 'boob',
 'boobs',
 'booty call',
 'brown showers',
 'brunette action',
 'bukkake',
 'bulldyke',
 'bullet vibe',
 'bullshit',
 'bung hole',
 'bunghole',
 'busty',


In [6]:
trigger_words

['suicide',
 'suicidal',
 'kill',
 'death',
 'dead',
 'murder',
 'self-murder',
 'self-slaughter',
 'self-suicide',
 'cut my throat',
 'cut my veins',
 'slice my veins',
 'jump off a bridge',
 'in front of a train',
 'fall off a bridge',
 'hang myself',
 'hang up',
 'take sleeping pills',
 'not want to wake up',
 'no longer want to wake up']

In [7]:
# Check if sentence contains bad words (True or false)

def filter_bad_words(sentence, bad_words): 
    
    # load list of bad words
    profanity.load_censor_words(bad_words)
    
    # Check
    return profanity.contains_profanity(sentence)

In [8]:
sentence = "Piece of Sh1t"
filter_bad_words(sentence, bad_words)

True

In [9]:
# Check if sentence contains trigger words (True or false)

def filter_trigger_words(sentence, trigger_words): 
    
    # load list of bad words
    profanity.load_censor_words(trigger_words)
    
    # Check
    return profanity.contains_profanity(sentence)

In [10]:
# Test function
sentence = "I think about killing myself"
filter_trigger_words(sentence, trigger_words)

False

## Detect neutral input, emotions and offensive language

Model card: https://huggingface.co/cardiffnlp/twitter-roberta-base-emotion

Benchmarks: https://arxiv.org/pdf/2010.12421.pdf

### Neutral input

In [8]:
## Instantiate tokenizer and model

tokenizer_neut = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment")
model_neut = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment")

In [9]:
# Function for predicting neutrality / Return true or false

def predict_neutrality(text):
    
    # Labels
    labels_neut = ['negative', 'neutral', 'positive']
    
    # Encode
    encoded_input = tokenizer_neut(text, return_tensors='pt')
    
    # Model output
    output = model_neut(**encoded_input)
    
    # Convert output (tensors) to numpy array
    scores = output[0][0].detach().numpy()
    
    # Apply softmax function
    scores = softmax(scores)
    
    # Convert scores for emotions to dictionary
    neutrality = {}
    for i in range(len(scores)):
        neutrality[labels_neut[i]] = scores[i]
        
    # Check if user input is neutral or not        
    if neutrality['neutral'] > neutrality['negative'] and neutrality['neutral'] > neutrality['positive']:
        return True    
    else:
        return False

In [10]:
# Test function
text = "Test"
neutrality = predict_neutrality(text)
neutrality

True

### Emotions

In [2]:
## Instantiate tokenizer and model

tokenizer_emo = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-emotion")
model_emo = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-emotion")

In [17]:
# Function for predicting emotions / returns dictionary with emotions

def predict_emotion(text):
    
    # Labels
    labels_emo = ['anger', 'joy', 'optimism', 'sadness']
    
    # Encode
    encoded_input = tokenizer_emo(text, return_tensors='pt')
    
    # Model output
    output = model_emo(**encoded_input)
    
    # Convert output (tensors) to numpy array
    scores = output[0][0].detach().numpy()
    
    # Apply softmax function
    scores = softmax(scores)
    
    # Convert scores for emotions to dictionary
    emotions = {}
    for i in range(len(scores)):
        emotions[labels_emo[i]] = scores[i]
    
    print(emotions)
    
    if emotions['anger'] >= 0.9:
        return f"let's calm down a bit"
    else:
        if emotions['sadness'] >= 0.9:
            return f"let's talk to a friend"
        else:
            return f"Tell me a joke."

In [18]:
# Test function
text = "I am a very sad"
predict_emotion(text)

{'anger': 0.0047455905, 'joy': 0.0069374996, 'optimism': 0.0040948116, 'sadness': 0.9842221}


"let's talk to a friend"

In [19]:
# Test function
text = "I am a bit sad"
predict_emotion(text)

{'anger': 0.00440111, 'joy': 0.0067090294, 'optimism': 0.004636546, 'sadness': 0.98425335}


"let's talk to a friend"

In [21]:
# Test function
text = "I really want to die"
predict_emotion(text)

{'anger': 0.05627922, 'joy': 0.010269949, 'optimism': 0.0049911006, 'sadness': 0.92845976}


"let's talk to a friend"

In [23]:
# Test function
text = "I want beat someone"
predict_emotion(text)

{'anger': 0.9568228, 'joy': 0.014350439, 'optimism': 0.013390444, 'sadness': 0.015436331}


"let's calm down a bit"

In [24]:
# Test function
text = "I have a lot of hope"
predict_emotion(text)

{'anger': 0.013719822, 'joy': 0.043690234, 'optimism': 0.90079266, 'sadness': 0.041797217}


'Tell me a joke.'

### Offensive language

In [None]:
## Instantiate tokenizer and model

tokenizer_off = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-offensive")
model_off = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-offensive")

In [None]:
# Function for predicting offensive language / Returns True or False
### needs to be 

def predict_offensive(text):
    
    # Labels
    labels_off = ['not-offensive', 'offensive']
    
    # Encode
    encoded_input = tokenizer_off(text, return_tensors='pt')
    
    # Model output
    output = model_off(**encoded_input)
    
    # Convert output (tensors) to numpy array
    scores = output[0][0].detach().numpy()
    
    # Apply softmax function
    scores = softmax(scores)
    
    # Convert scores for emotions to dictionary
    offensive = {}
    for i in range(len(scores)):
        offensive[labels_off[i]] = scores[i]
        
    # Check if text is offensive or not        
    if offensive['offensive'] > offensive['not-offensive']:
        return True    
    else:
        return False

In [None]:
# Test function
text = "I want to kill you"
offensive = predict_offensive(text)
offensive

True

# Predict answer in a dialog

Paper: https://arxiv.org/pdf/2004.13637.pdf

Fine-tuning: https://parl.ai/projects/recipes/

Model card: https://huggingface.co/facebook/blenderbot-400M-distill?text=Hey+my+name+is+Mariama%21+How+are+you%3F

In [None]:
tokenizer_blend = BlenderbotTokenizer.from_pretrained("facebook/blenderbot-400M-distill")
model_blend = BlenderbotForConditionalGeneration.from_pretrained("facebook/blenderbot-400M-distill")

Downloading: 100%|████████████████████████████████████████████████████████████| 127k/127k [00:00<00:00, 472kB/s]
Downloading: 100%|██████████████████████████████████████████████████████████| 62.9k/62.9k [00:00<00:00, 290kB/s]
Downloading: 100%|██████████████████████████████████████████████████████████| 1.15k/1.15k [00:00<00:00, 287kB/s]
Downloading: 100%|███████████████████████████████████████████████████████████| 16.0/16.0 [00:00<00:00, 7.15kB/s]
Downloading: 100%|██████████████████████████████████████████████████████████████| 772/772 [00:00<00:00, 276kB/s]
Downloading: 100%|██████████████████████████████████████████████████████████| 1.57k/1.57k [00:00<00:00, 978kB/s]
Downloading: 100%|███████████████████████████████████████████████████████████| 730M/730M [05:33<00:00, 2.19MB/s]


In [None]:
def predict(text, history=''):

    # Check for potential triggers
    if filter_trigger_words(text, trigger_words) is True:
        ####### Question: Do we need here and in the following hard codes answers the <s> tag that comes with the model output?
        output = "A therapist will be in contact with you shortly."
        return output, history
  
    # Check for potential bad words
    elif filter_bad_words(text, bad_words) is True:
        output = "Let's try and say this a bit nicer."
        return output, history
    
    elif predict_neutrality(text) is True:
        output = "Could you explain further?"
        return output, history
    
#### Here the emotion analysis needs to step in with the thresholds and hard coded output
    
  
    # If neither triggers nor bad words are present, generate a model output
    else:
        
        # Tokenize input
        input_token = tokenizer_blend(text, return_tensors='pt')

        # Get result from model
        result = model_blend.generate(**input_token)
        
        # Decode result to model answer
        output = tokenizer_blend.decode(result[0])
        
        # Check model answer for offensive language
        if predict_offensive(output) is True:
            result = model_blend.generate(**input_token)
            output = tokenizer_blend.decode(result[0])

            
    # Append model answer to history
    history = ''.join((history, text, output))

    return output, history

In [None]:
# Test model
input1 = 'I am sad'
history1 = ''
output1, history2 = predict(input1, history1)

print(output1)
print(history2)


<s> I am sorry to hear that. Why are you sad? Do you want to talk about it?</s>
I am sad<s> I am sorry to hear that. Why are you sad? Do you want to talk about it?</s>


In [None]:
# Test model
input2 = 'No, I do not want to talk'

output2, history3 = predict(input2, history2)

print(output2)
print(history3)


<s> I'm sorry to hear that. Do you have any hobbies that you like to do?</s>
I am sad<s> I am sorry to hear that. Why are you sad? Do you want to talk about it?</s>No, I do not want to talk<s> I'm sorry to hear that. Do you have any hobbies that you like to do?</s>


In [None]:
# Test model
input3 = 'No I am sad'

output3, history4 = predict(input3, history3)

print(output3)
print(history4)

<s> I am sorry to hear that. Why are you sad? Do you want to talk about it?</s>
I am sad<s> I am sorry to hear that. Why are you sad? Do you want to talk about it?</s>No, I do not want to talk<s> I'm sorry to hear that. Do you have any hobbies that you like to do?</s>No I am sad<s> I am sorry to hear that. Why are you sad? Do you want to talk about it?</s>
