In [None]:
"""
Post-Processing of Generated output from Hugchat 
1. Extract label, community, rationale and keywords 
2. Create dataset which removes all "-1" tags 
3. Convert labels into: hate, offensive, normal speech. 
4. Convert communities targeted from the list ['African', 'Islam', 'Jewish', 'Homosexual', 'Women', 'Refugee', 'Arab', 'Caucasian', 'Asian', 'Hispanic']
5. Check if keywords are present in the sentence. 
"""

In [1]:
import pandas as pd
import numpy as np
import difflib
import string
from nltk.corpus import stopwords


In [2]:
#### INPUT 
df_rationale_path = '../../data/rationale_extraction/df_train_with_rationales.csv'

### Output 
df_rationale_post_processed_path = '../../data/rationale_extraction/df_rationale_post_processed.csv'

In [3]:
df = pd.read_csv(df_rationale_path)
df.dropna(subset=['hugchat_response'], inplace=True)
df.reset_index(drop=True, inplace=True)
print(df.shape)

(14616, 8)


In [4]:
# Function to extract classification label from hugchat responses
# Split the text response by '\n'. Split sentences by ':'. Check for current phrase in the list to be similar to 'classification'. 
# Return the next phrase

def extract_label(hugchat_response):
    text = hugchat_response.lower()
    text_list = text.split('\n')
    for sentence in text_list:
        phrases = sentence.split(":")
        for i in range(len(phrases)):
            if phrases[i].strip() in ['classification']:
                if i+1<len(phrases):
                    return phrases[i+1]
            elif phrases[i].strip() in ['output part1', 'output part 1', 'output 1', 'output part i', 'part1', 'first part']:
                # check next word is not 'classification'
                if i+1<len(phrases):
                    s = difflib.SequenceMatcher(None, "classification", phrases[i+1].strip())
                    if s.ratio() >= 0.80 and (i+2)<len(phrases):
                        return phrases[i+2]
                    else:
                        return phrases[i+1]
            else:
                s = difflib.SequenceMatcher(None, "classification", phrases[i].strip())
                if (s.ratio()>=0.70) and (i+1)<len(phrases):
                    return phrases[i+1]
                else:
                    s = difflib.SequenceMatcher(None, "output start", phrases[i].strip())
                    t = difflib.SequenceMatcher(None, "start", phrases[i].strip())
                    if (s.ratio()>0.9) and (i+1)<len(phrases):
                        if phrases[i+1].strip() in ['normal', 'normal speech', 'hateful', 'hateful speech', 'hate speech', 'offensive', 'offensive speech']:
                            return phrases[i+1]
                    if (t.ratio()>0.9) and (i+1)<len(phrases):
                        if phrases[i+1].strip() in ['normal', 'normal speech', 'hateful', 'hateful speech', 'hate speech', 'offensive', 'offensive speech']:
                            return phrases[i+1]
            
    return "-1"

In [5]:
# Function to extract targeted communities from hugchat responses
# Split the text response by '\n'. Split sentences by ':'. Check for current phrase in the list to be similar to 'communities targeted'. 
# Return the next phrase

def extract_comm_targeted(hugchat_response):
    text = hugchat_response.lower()
    text_list = text.split('\n')
    for sentence in text_list:
        phrases = sentence.split(":")
        for i in range(len(phrases)):
            if phrases[i].strip() in ['communities targeted', 'community targeted', 'targeted community', 'targeted communities', 'targeted Community(ies)']:
                if i+1<len(phrases):
                    return phrases[i+1]
            elif phrases[i].strip() in ['output part2', 'output part 2', 'output 2', 'output part ii', 'part2', 'second part']:
                # check next word is not communities targeted
                if i+1<len(phrases):
                    s = difflib.SequenceMatcher(None, "communities targeted", phrases[i+1].strip())
                    if s.ratio() >= 0.60 and (i+2)<len(phrases):
                        return phrases[i+2]
                    else:
                        return phrases[i+1]
            else:
                s = difflib.SequenceMatcher(None, "communities targeted", phrases[i].strip())
                t = difflib.SequenceMatcher(None, "targeted communities", phrases[i].strip())
                u = difflib.SequenceMatcher(None, "community targeted", phrases[i].strip())
                v = difflib.SequenceMatcher(None, "targeted community", phrases[i].strip())
                if (max(s.ratio(),t.ratio(), u.ratio(), v.ratio())>=0.60) and (i+1)<len(phrases):
                    return phrases[i+1]
    return "-1"

In [6]:
# Function to extract targeted communities from hugchat responses
# Split the text response by '\n'. Split sentences by ':'. Check for current phrase in the list to be similar to 'explanation'. 
# Return the next phrase

def extract_rationale(hugchat_response):
    text = hugchat_response.lower()
    text_list = text.split('\n')
    #print("Text list: \n", text_list)
    for sentence in text_list:
        #print("Sentence: ", sentence)
        phrases = sentence.split(":")
        for i in range(len(phrases)):
            #print("current phrase: ", phrases[i])
            if phrases[i].strip() in ['explanation', 'justification', '* explanation', '### explanation ###', 'explancement', 'explaining my answer']:
                if i+1<len(phrases):
                    return phrases[i+1]
            elif phrases[i].strip() in ['output part3', 'output part 3', 'output 3', 'output part iii', 'part3', 'third part']:
                # check next word is not explanation
                if i+1<len(phrases):
                    s = difflib.SequenceMatcher(None, "explanation", phrases[i+1].strip())
                    if s.ratio() >= 0.80 and (i+2)<len(phrases):
                        return phrases[i+2]
                    else:
                        return phrases[i+1]
            elif 'explanation' in phrases[i].strip():
                if (i+1)<len(phrases):
                    if len(phrases[i+1].split()) > 20:
                        return phrases[i+1]
            else:
                s = difflib.SequenceMatcher(None, "explanation", phrases[i].strip())
                t = difflib.SequenceMatcher(None, "justification", phrases[i].strip())
                if (max(s.ratio(),t.ratio())>=0.80) and (i+1)<len(phrases):
                    return phrases[i+1]
    return "-1"

In [7]:
# Function to extract keywords from hugchat responses
# Split the text response by '\n'. Split sentences by ':'. Check for current phrase in the list to be similar to 'keywords'. 
# Return the next phrase

def extract_keywords(hugchat_response):
    text = hugchat_response.lower()
    text_list = text.split('\n')
    #print("Text list: \n", text_list)
    for sentence in text_list:
        #print("Sentence: ", sentence)
        phrases = sentence.split(":")
        for i in range(len(phrases)):
            #print("current phrase: ", phrases[i])
            if phrases[i].strip() in ['keywords', 'key word', 'keywords', 'key words']:
                if i+1<len(phrases):
                    return phrases[i+1]
            elif phrases[i].strip() in ['output part4', 'output part 4', 'output 4', 'output part iv', 'part4', 'part 4', 'fourth part']:
                # check next word is not 'keywords'
                if i+1<len(phrases):
                    s = difflib.SequenceMatcher(None, "keywords", phrases[i+1].strip())
                    if s.ratio() >= 0.80 and (i+2)<len(phrases):
                        return phrases[i+2]
                    else:
                        return phrases[i+1]
            elif 'key' in phrases[i].strip():
                if (i+1)<len(phrases):
                    if len(phrases[i+1].split()) < 20:
                        return phrases[i+1]
            else:
                s = difflib.SequenceMatcher(None, "keyword", phrases[i].strip())
                if (s.ratio()>=0.80) and (i+1)<len(phrases):
                    return phrases[i+1]
    return "-1"

In [8]:
# Function to remove all entries which have "-1" in any of the field 
def drop_no_output_rows(df):
    print("Intital shape: ", df.shape)
    df_copy = df.copy()
    print("label -1 rows : ", df_copy[df_copy['hugchat_label']=="-1"].shape[0])
    df_copy = df_copy[df_copy['hugchat_label']!="-1"].copy()
    print("comm targeted -1 : ", df_copy[df_copy['hugchat_comm_targeted']=="-1"].shape[0])
    df_copy = df_copy[df_copy['hugchat_comm_targeted']!="-1"].copy()
    print("keywords -1 : ", df_copy[df_copy['hugchat_keywords']=="-1"].shape[0])
    df_copy = df_copy[df_copy['hugchat_keywords']!="-1"].copy()
    print("explanation -1 : ", df_copy[df_copy['hugchat_explanation']=="-1"].shape[0])
    df_copy = df_copy[df_copy['hugchat_explanation']!="-1"].copy()
    print("Final shape: ",df_copy.shape)
    return df_copy


In [9]:
# Function to convert hugchat labels into "hate", "offensive", "normal". 
# Some responses have hate/offensive. Labels are overindexed on hate speech, prioritizing offensive over hate and normal.
# Priority Order: hate < normal < offensive
def convert_hugchat_label(hugchat_label):
    label = ''
    hugchat_label = hugchat_label.strip()
    if "hate" in hugchat_label:
        label = "hate_speech"
    if "normal" in hugchat_label:
        label = "normal_speech"
    if "offensive" in hugchat_label:
        label = "offensive_speech"
    return label 

In [10]:
# Function to convert hugchat comm targeted into ['African', 'Islam', 'Jewish', 'Homosexual', 'Women', 'Refugee', 'Arab', 'Caucasian', 'Asian', 'Hispanic', 'None'] 
def convert_comm_targeted(hugchat_comm_targeted):
    hugchat_comm_targeted = hugchat_comm_targeted.strip()
    hugchat_comm_targeted = hugchat_comm_targeted.translate(str.maketrans('', '', string.punctuation))
    comm_target = []
    none_list = ['none', 'na', 'not applicable', 'not specified']
    islam_list = ['islam', 'muslim', 'moslem', 'muslims', 'moslems']
    homosexual_list = ['homo','gay','lgbtq', 'lesbian','queer']
    women_list = ['women', 'female', 'feminist']
    refugee_list = ['refugee', 'migrant', 'immigrant']
    african_list = ['african','black']
    asian_list = ['asian', 'india', 'pakistan', 'bangla']
    hispanic_list = ['latin','hispanic']
    
    for item in none_list:
        if item in hugchat_comm_targeted:
            comm_target.append('none')
    
    for item in african_list:
        if item in hugchat_comm_targeted:
            comm_target.append('african')
    
    for item in islam_list:
        if item in hugchat_comm_targeted:
            comm_target.append('islam')
    
    if 'jew' in hugchat_comm_targeted:
        comm_target.append('jewish')
    
    if 'arab' in hugchat_comm_targeted:
        comm_target.append('arab')
    
    if 'white' in hugchat_comm_targeted:
        comm_target.append('caucasian')
    
    for item in asian_list:
        if item in hugchat_comm_targeted:
            comm_target.append('asian')
    
    for item in hispanic_list:
        if item in hugchat_comm_targeted:
            comm_target.append('hispanic')
    
    for item in women_list:
        if item in hugchat_comm_targeted:
            comm_target.append('women')
    
    for item in homosexual_list:
        if item in hugchat_comm_targeted:
            comm_target.append('homosexual')
    
    for item in refugee_list:
        if item in hugchat_comm_targeted:
            comm_target.append('refugee')
    
    if ('none' in comm_target) and (len(comm_target)>1):
        comm_target = [x for x in comm_target if x!='none']
    
    return list(set(comm_target))

In [11]:
# Function to check to return hugchat keywords present in the sentence. 

def convert_hugchat_keywords(sentence, hugchat_keywords):
    kw_list = []
    sentence = sentence.translate(str.maketrans('', '', string.punctuation))
    words = sentence.split()
    words = [word for word in words if word not in stopwords.words('english')]
    kwords = hugchat_keywords.split()
    for kw in kwords:
        kw = kw.strip()
        for word in words:
            word = word.strip()
            s = difflib.SequenceMatcher(None, word, kw)
            if s.ratio()>=0.7:
                kw_list.append(word)
    return list(set(kw_list))

In [12]:
# Get outputs from Hugchat Response
df['hugchat_label'] = df['hugchat_response'].apply(lambda x: extract_label(x))
df['hugchat_comm_targeted'] = df['hugchat_response'].apply(lambda x: extract_comm_targeted(x))
df['hugchat_explanation'] = df['hugchat_response'].apply(lambda x: extract_rationale(x))
df['hugchat_keywords'] = df['hugchat_response'].apply(lambda x: extract_keywords(x))

# Process these outputs
df_filtered = drop_no_output_rows(df)
df_filtered['hugchat_label_processed'] = df_filtered['hugchat_label'].apply(lambda x: convert_hugchat_label(x))
df_filtered['hugchat_comm_targeted_processed'] = df_filtered['hugchat_comm_targeted'].apply(lambda x: convert_comm_targeted(x))
df_filtered['hugchat_keywords_processed'] = df_filtered[['unmasked_sentence','hugchat_keywords']].apply(lambda row: convert_hugchat_keywords(row['unmasked_sentence'], row['hugchat_keywords']), axis=1)


print(df_filtered.shape)

Intital shape:  (14616, 12)
label -1 rows :  164
comm targeted -1 :  151
keywords -1 :  140
explanation -1 :  211
Final shape:  (13950, 12)
(13950, 15)


In [13]:
df_filtered.head(10)

Unnamed: 0,id,annotators,rationales,post_tokens,masked_tokens,unmasked_sentence,masked_sentence,hugchat_response,hugchat_label,hugchat_comm_targeted,hugchat_explanation,hugchat_keywords,hugchat_label_processed,hugchat_comm_targeted_processed,hugchat_keywords_processed
0,23107796_gab,"{'label': [0, 2, 2], 'annotator_id': [203, 204...","[[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,...","['u', 'really', 'think', 'i', 'would', 'not', ...",[],u really think i would not have been raped by ...,u really think i would not have been raped by ...,Output Part1: Hateful Speech\n\nOutput Part2: ...,hateful speech,"indian, bangladeshi, hindus, muslims, neo nazis",the language used in the sentence suggests an...,"raped, feral, hindus, muslims, neo nazis, sex...",hate_speech,"[asian, islam]","[muslim, rape, neo, nazi, raped, feral, hindu]"
1,9995600_gab,"{'label': [2, 2, 0], 'annotator_id': [27, 6, 4...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","['the', 'uk', 'has', 'threatened', 'to', 'retu...",[],the uk has threatened to return radioactive wa...,the uk has threatened to return radioactive wa...,Output Start:\n\nClassification: Hate Speech\n...,hate speech,british south asian community,this statement contains derogatory language t...,"paki, radioactive waste, migrant.",hate_speech,[asian],"[paki, waste, radioactive, migrants]"
2,1227920812235051008_twitter,"{'label': [2, 2, 2], 'annotator_id': [209, 203...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,...","['if', 'english', 'is', 'not', 'imposition', '...",[],if english is not imposition then hindi is als...,if english is not imposition then hindi is als...,Classification: Hateful\nCommunities Targeted:...,hateful,south asian community (including but not limi...,this sentence contains several derogatory ter...,"shut up, chutiya, retarded, hindi imposition",hate_speech,[asian],"[shut, hindi, imposition, chutiya, retards]"
3,1204931715778543624_twitter,"{'label': [2, 2, 2], 'annotator_id': [235, 222...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1], [0, 0, 0, ...","['no', 'liberal', 'congratulated', 'hindu', 'r...",[],no liberal congratulated hindu refugees post c...,no liberal congratulated hindu refugees post c...,Classification: Normal Speech\n\nTargeted Comm...,normal speech,none,the statement seems factually correct as ther...,"liberal, hindu, refugees, cab, congratulation",normal_speech,[none],"[liberal, refugees, congratulated, hindus, cab..."
4,1179102559241244672_twitter,"{'label': [2, 2, 2], 'annotator_id': [51, 25, ...","[[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,...","['he', 'said', 'bro', 'even', 'your', 'texts',...",['<user>'],he said bro even your texts sound redneck what...,he said bro even your texts sound redneck what...,Output Part 1: Classification: Normal Speech\n...,normal speech,none,this sentence appears to be casual conversati...,"bro, redneck (while potentially derogatory, d...",normal_speech,[none],"[redneck, bro]"
5,18790322_gab,"{'label': [2, 1, 1], 'annotator_id': [203, 202...",[],"['i', 'live', 'and', 'work', 'with', 'many', '...",[],i live and work with many legal mexican immigr...,i live and work with many legal mexican immigr...,Classification: Normal Speech\nCommunities tar...,normal speech,none,this sentence contains no derogatory language...,"legal, mexico, workers",normal_speech,[none],"[legal, illegals, work, mexican]"
6,13459307_gab,"{'label': [0, 2, 0], 'annotator_id': [67, 57, ...","[[0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0,...","['in', 'britain', 'the', 'prime', 'minister', ...",[],in britain the prime minister a childless woma...,in britain the prime minister a childless woma...,Classification: Hateful Speech\nCommunities Ta...,hateful speech,"women, lesbians, scottish community",this sentence contains several derogatory ter...,"moslem, invader, fat woman",hate_speech,"[homosexual, women]","[woman, fat, moslem, invader]"
7,1113705567782969344_twitter,"{'label': [1, 2, 1], 'annotator_id': [207, 221...",[],"['<user>', '<user>', 'there', 'shd', 'be', 'a'...","['<user>', '<user>']",Paul Paul there shd be a film called no hindus...,<user> <user> there shd be a film called no hi...,Output:\nStart: Classification: Hate Speech\nC...,hate speech,"indian (caste), islamic",this sentence uses derogatory language toward...,"no, india, hindu, film, perfect, islamic.",hate_speech,"[asian, islam]","[islamic, film, hindus, perfect]"
8,24555628_gab,"{'label': [2, 1, 1], 'annotator_id': [246, 221...",[],"['jews', 'defends', 'jews', 'regardless', 'mus...",[],jews defends jews regardless muslims defend mu...,jews defends jews regardless muslims defend mu...,Output Start:\nClassification: Hate Speech\nCo...,hate speech,"jewish, islamic",this sentence is biased against both jews and...,"christian europeans, judaism, islam, faith di...",hate_speech,"[jewish, islam]","[european, christians, christian]"
9,16382456_gab,"{'label': [0, 0, 0], 'annotator_id': [228, 220...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","['the', 'non', 'partisan', 'congressional', 'b...","['<number>', '<number>']",the non partisan congressional budget office c...,the non partisan congressional budget office c...,Classification: Normal Speech\nCommunities Tar...,normal speech,none,this sentence appears to be providing factual...,"congressional budget office (cbo), sens linds...",normal_speech,[none],"[immigrant, lindsey, act, graham, dream, congr..."
