In [1]:
import pandas as pd

# Examine the seed datasets
ISIS_seed = pd.read_csv('Seed_MIWS/Seed_Dataset/ISIS_Seed_Complete.csv')
WS_seed = pd.read_csv('Seed_MIWS/Seed_Dataset/WS_Seed_Complete.csv')

display(ISIS_seed.head())
display(WS_seed.head())

Unnamed: 0,Source,Type of Source,Text,Label,Geographical_Location,Author_Country_Affiliation,Unnamed: 6
0,"Chatfield et.al ""Tweeting propaganda, radicali...",Research Article,Coalition planes massacred these children in a...,Propaganda,Iraq,USA,
1,"Chatfield et.al ""Tweeting propaganda, radicali...",Research Article,these PKK fellas are exceptional liars.after t...,Propaganda,Iraq,USA,
2,"Chatfield et.al ""Tweeting propaganda, radicali...",Research Article,This is so awesome. US airstrikes also by mist...,Propaganda,Iraq,USA,
3,"Chatfield et.al ""Tweeting propaganda, radicali...",Research Article,RT @ImtiyazAzhar: Support &amp; love for #Isla...,Propaganda,India,USA,
4,"Chatfield et.al ""Tweeting propaganda, radicali...",Research Article,Ask the Americans how they liked fighting\nJTJ...,Propaganda,USA,USA,


Unnamed: 0,Source,Type_of_Source,Text,Ideology,Label,Geographical_Location,Author_Country_Affiliation,Unnamed: 7
0,"Ray and Marsh ""Recruitment by extremist groups...",Research Article,This is a deliberate choice of words. As we st...,White Supremacist,Propaganda,-,USA,
1,"Ray and Marsh ""Recruitment by extremist groups...",Research Article,Most victims of race crime - about 90 per cent...,White Supremacist,Propaganda,-,USA,
2,"Ray and Marsh ""Recruitment by extremist groups...",Research Article,WE BELIEVE that the Cananite Jew is the natura...,White Supremacist,Radicalization,-,USA,
3,"Ray and Marsh ""Recruitment by extremist groups...",Research Article,"The culture of a race, free of alien influence...",White Supremacist,Radicalization,-,USA,
4,"Ray and Marsh ""Recruitment by extremist groups...",Research Article,Influential organizations and much of the west...,White Supremacist,Propaganda,"Switzerland, Germany",USA,


In [2]:
import numpy as np

"""
Given a path to a .csv file, this function will read the file and format it as a dicionary with the following structure:
    
    {
        'Inputs': [message1, message2, ...],
        'Labels': [label1, label2, ...]
    }
    
    where 'Inputs' is a list of lists of message dictionaries that conform to the input to OpenAI's Chat Completion API,
    and 'Labels' is a list of labels for each message. Each label is a string that represents the type of extremist content of the message.

    Each message should have the following format:
    [
        {
            'role': 'system',
            'content': 'system message'
        },
        {
            'role': 'user',
            'content': 'tweet text'
        }
    ]
"""
def format_eval_openai(path, system_message, labels_present=True, limit=None):
    df = pd.read_csv(path, encoding='latin-1')
    inputs = []
    labels = []
    for index, row in df.iterrows():
        if limit and index >= limit:
            break
        tweet_text = row['Text']
        inputs.append([
            {
                'role': 'system',
                'content': system_message
            },
            {
                'role': 'user',
                'content': tweet_text
            }
        ])
        if labels_present:
            labels.append(row['Label'])
        else:
            labels.append("None")
    return {
        'Inputs': inputs,
        'Labels': labels
    }

In [3]:
"""
Given a path to a .csv file, this function will read the file and format it as a dicionary with the following structure:
        
        {
            'Inputs': [request1, request2, ...]),
            'Labels': [label1, label2, ...]
        }
        
        where 'Inputs' is a list of request dictionaries that conform to the input to Perspective's comments().analyze API,
        and 'Labels' is a list of labels for each message. Each label is a string that represents the type of extremist content of the message.
    
        Each request will have the following format:
        {
            'comment': { 'text': 'tweet text' },
            'requestedAttributes': metrics
        }

Args:
    path: The path to the .csv file
    metrics: A dictionary of the metrics to request from the Perspective API (example: {'TOXICITY': {}, 'SEVERE_TOXICITY': {}})
"""
def format_eval_perspective(path, metrics, labels_present=True, limit=None):
    df = pd.read_csv(path, encoding='latin-1')
    inputs = []
    labels = []
    for index, row in df.iterrows():
        if limit and index >= limit:
            break
        tweet_text = row['Text']
        inputs.append({
            'comment': { 'text': tweet_text },
            'requestedAttributes': metrics
        })
        if labels_present:
            labels.append(row['Label'])
        else:
            labels.append("None")
    return {
        'Inputs': inputs,
        'Labels': labels
    }

In [4]:
system_message = "You are on a social media platform's moderation team. "
system_message += "Your task is to decide whether the following tweet contains extremist content or not.\n"
system_message += "Extremist content includes messages that are considered to be propoganda, radicalization, or recruitment. "
system_message += "Please label the tweet as 'Propoganda', 'Radicalization', 'Recruitment', or 'None'.\n"
system_message += "Your output must be in JSON format and with the following structure:\n{\"Label\": \"Propoganda\" or \"Radicalization\" or \"Recruitment\" or \"None\"}"

print(system_message)

ISIS_dict_openai = format_eval_openai('Seed_MIWS/Seed_Dataset/ISIS_Seed_Complete.csv', system_message)
WS_dict_openai = format_eval_openai('Seed_MIWS/Seed_Dataset/WS_Seed_Complete.csv', system_message)
# tweets_dict_openai = format_eval_openai("tweets.csv", system_message, labels_present=False, limit=1000)

metrics = {'IDENTITY_ATTACK': {}, 'SEVERE_TOXICITY': {}, 'THREAT': {}}
ISIS_dict_perspective = format_eval_perspective('Seed_MIWS/Seed_Dataset/ISIS_Seed_Complete.csv', metrics)
WS_dict_perspective = format_eval_perspective('Seed_MIWS/Seed_Dataset/WS_Seed_Complete.csv', metrics)
# tweets_dict_perspective = format_eval_openai("tweets.csv", metrics, labels_present=False, limit=1000)

You are on a social media platform's moderation team. Your task is to decide whether the following tweet contains extremist content or not.
Extremist content includes messages that are considered to be propoganda, radicalization, or recruitment. Please label the tweet as 'Propoganda', 'Radicalization', 'Recruitment', or 'None'.
Your output must be in JSON format and with the following structure:
{"Label": "Propoganda" or "Radicalization" or "Recruitment" or "None"}


In [5]:
combined_dict_openai = {
    'Inputs': ISIS_dict_openai['Inputs'] + WS_dict_openai['Inputs'],
    'Labels': ISIS_dict_openai['Labels'] + WS_dict_openai['Labels']
}

combined_dict_perspective = {
    'Inputs': ISIS_dict_perspective['Inputs'] + WS_dict_perspective['Inputs'],
    'Labels': ISIS_dict_perspective['Labels'] + WS_dict_perspective['Labels']
}

In [6]:
print("Combined openAI dictionary:")
print(f"Inputs shape: {len(combined_dict_openai['Inputs'])}")
print(f"Labels shape: {len(combined_dict_openai['Labels'])}")

print("")

print("Combined Perspective dictionary:")
print(f"Inputs shape: {len(combined_dict_perspective['Inputs'])}")
print(f"Labels shape: {len(combined_dict_perspective['Labels'])}")

Combined openAI dictionary:
Inputs shape: 398
Labels shape: 398

Combined Perspective dictionary:
Inputs shape: 398
Labels shape: 398


In [7]:
# Save the combined dictionary to a new .json file at the specified path
import json
json_file_openai = json.dumps(combined_dict_openai, indent=4)
json_file_perspective = json.dumps(combined_dict_perspective, indent=4)

with open("eval_data_openai.json", "w") as file:
    file.write(json_file_openai)

with open("eval_data_perspective.json", "w") as file:
    file.write(json_file_perspective)

In [8]:
print(combined_dict_openai['Inputs'][0])

[{'role': 'system', 'content': 'You are on a social media platform\'s moderation team. Your task is to decide whether the following tweet contains extremist content or not.\nExtremist content includes messages that are considered to be propoganda, radicalization, or recruitment. Please label the tweet as \'Propoganda\', \'Radicalization\', \'Recruitment\', or \'None\'.\nYour output must be in JSON format and with the following structure:\n{"Label": "Propoganda" or "Radicalization" or "Recruitment" or "None"}'}, {'role': 'user', 'content': 'Coalition planes massacred these children in airstrikes\nOn #Hit ,#Anbar\nhttp://t.co/yCsEgkwDY6#Iraq'}]


In [9]:
print(combined_dict_perspective['Inputs'][0])

{'comment': {'text': 'Coalition planes massacred these children in airstrikes\nOn #Hit ,#Anbar\nhttp://t.co/yCsEgkwDY6#Iraq'}, 'requestedAttributes': {'IDENTITY_ATTACK': {}, 'SEVERE_TOXICITY': {}, 'THREAT': {}}}


In [22]:
gemini_sys_message = '''You are on a social media platform's moderation team. Your task is to decide whether the tweets provided to you contain extremist content or not.
Extremist content includes messages that are considered to be propoganda, radicalization, or recruitment. Please label each tweet as 'Propoganda', 'Radicalization', 'Recruitment', or 'None'.
The format of the messages supplied to you will be as follows: "Please label the following tweet as 'Propoganda', 'Radicalization', 'Recruitment', or 'None': [TWEET], with id of ID", where TWEET will be replaced by each tweet's text content, and ID is the tweet's id.
For each tweet, your output must be in JSON format and with the following structure:
{"ID": ID, "Label": "Propoganda" or "Radicalization" or "Recruitment" or "None"}, where ID is the tweet's ID that was provided to you in the message.'''
print(gemini_sys_message)

Extremist content includes messages that are considered to be propoganda, radicalization, or recruitment. Please label each tweet as 'Propoganda', 'Radicalization', 'Recruitment', or 'None'.
The format of the messages supplied to you will be as follows: "Please label the following tweet as 'Propoganda', 'Radicalization', 'Recruitment', or 'None': [TWEET], with id of ID", where TWEET will be replaced by each tweet's text content, and ID is the tweet's id.
For each tweet, your output must be in JSON format and with the following structure:
{"ID": ID, "Label": "Propoganda" or "Radicalization" or "Recruitment" or "None"}, where ID is the tweet's ID that was provided to you in the message.


In [40]:
import json
 
with open("../DiscordBot/tokens.json", 'r') as file:
    tokens = json.load(file)


## OLD version - this will error out due to hitting safety filters. See further below for working Gemini implementation


In [None]:
!pip install vertexai

In [None]:
!gcloud auth login

In [None]:
!gcloud auth application-default login

In [43]:
import vertexai

from vertexai.generative_models import GenerativeModel, ChatSession

project_id = tokens['gcp_project_id']

vertexai.init(project=project_id, location="us-central1")

model = GenerativeModel(
    "gemini-1.5-flash"
)

chat = model.start_chat(response_validation=False)

def get_chat_response(chat: ChatSession, prompt: str) -> str:
    text_response = []
    responses = chat.send_message(prompt, stream=True)
    for chunk in responses:
        text_response.append(chunk.text)
    return "".join(text_response)

prompt = '''
    I need you to review the following content to determine if it contains harmful language. The content should not be repeated or generated. Please analyze the text and provide feedback on whether it includes any of the following categories of extremist language: 'Propoganda', 'Radicalization', 'Recruitment', or 'None'.

Content to review:
[Coalition planes massacred these children in airstrikes\nOn #Hit ,#Anbar\nhttp://t.co/yCsEgkwDY6#Iraq]

Please respond with a summary indicating whether any extremist language is present and categorize it accordingly.
'''
print(get_chat_response(chat, prompt))

# prompt = "What are all the colors in a rainbow?"
# print(get_chat_response(chat, prompt))

# prompt = "Why does it appear when it rains?"
# print(get_chat_response(chat, prompt))

ValueError: Cannot get the response text.
Cannot get the Candidate text.
Response candidate content has no parts (and thus no text). The candidate is likely blocked by the safety filters.
Content:
{}
Candidate:
{
  "finish_reason": "SAFETY",
  "safety_ratings": [
    {
      "category": "HARM_CATEGORY_HATE_SPEECH",
      "probability": "NEGLIGIBLE",
      "probability_score": 0.45190093,
      "severity": "HARM_SEVERITY_MEDIUM",
      "severity_score": 0.4285921
    },
    {
      "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
      "probability": "HIGH",
      "blocked": true,
      "probability_score": 0.89823216,
      "severity": "HARM_SEVERITY_MEDIUM",
      "severity_score": 0.6970936
    },
    {
      "category": "HARM_CATEGORY_HARASSMENT",
      "probability": "LOW",
      "probability_score": 0.5125859,
      "severity": "HARM_SEVERITY_MEDIUM",
      "severity_score": 0.4723792
    },
    {
      "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
      "probability": "NEGLIGIBLE",
      "probability_score": 0.21223201,
      "severity": "HARM_SEVERITY_LOW",
      "severity_score": 0.21502088
    }
  ]
}
Response:
{
  "candidates": [
    {
      "finish_reason": "SAFETY",
      "safety_ratings": [
        {
          "category": "HARM_CATEGORY_HATE_SPEECH",
          "probability": "NEGLIGIBLE",
          "probability_score": 0.45190093,
          "severity": "HARM_SEVERITY_MEDIUM",
          "severity_score": 0.4285921
        },
        {
          "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
          "probability": "HIGH",
          "blocked": true,
          "probability_score": 0.89823216,
          "severity": "HARM_SEVERITY_MEDIUM",
          "severity_score": 0.6970936
        },
        {
          "category": "HARM_CATEGORY_HARASSMENT",
          "probability": "LOW",
          "probability_score": 0.5125859,
          "severity": "HARM_SEVERITY_MEDIUM",
          "severity_score": 0.4723792
        },
        {
          "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
          "probability": "NEGLIGIBLE",
          "probability_score": 0.21223201,
          "severity": "HARM_SEVERITY_LOW",
          "severity_score": 0.21502088
        }
      ]
    }
  ],
  "usage_metadata": {
    "prompt_token_count": 123,
    "candidates_token_count": 1,
    "total_token_count": 124
  }
}

## WORKING version 


In [35]:
!pip install -q -U google-generativeai

In [111]:
sample_prompt = '''You are on a social media platform's moderation team. I need you to review the following content as if it were a tweet to determine if it contains harmful and/or extrimist language. The content should not be repeated or generated. Please analyze the text and provide feedback on whether it includes any of the following categories of extremist language, and choose the one that fits best: 'Propaganda', 'Radicalization', 'Recruitment', or 'None'.

Content to review:
[Coalition planes massacred these children in airstrikes\nOn #Hit ,#Anbar\nhttp://t.co/yCsEgkwDY6#Iraq]

Please respond with a summary indicating whether any extremist language is present and categorize it in only one of the provided categories accordingly.
Please give your response as valid JSON in the following format: {"Label": <INSERT YOUR CATEGORIZATION HERE ("Propaganda" or "Radicalization" or "Recruitment" or "None")>, "Reason": <INSERT YOUR REASON HERE>}.
'''

In [125]:
import google.generativeai as genai
from google.generativeai.types import HarmCategory, HarmBlockThreshold

genai.configure(api_key=tokens["gemini_google_ai_studio"])
model = genai.GenerativeModel(model_name='gemini-1.5-flash')
def review_prompt(prompt):
    response = model.generate_content(
        prompt,
        safety_settings={
            HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
            HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
            HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
            HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE
        }
    )
    try:
        res = response.text
        res_json = json.loads(response.text[7:][:-5])
        return res_json
    except:
        # If the response doesn't contain text, check if the prompt was blocked.
        print(response.prompt_feedback)
        if len(response.candidates) > 0:
            # Also check the finish reason to see if the response was blocked.
            print(response.candidates[0].finish_reason)
            # If the finish reason was SAFETY, the safety ratings have more details.
            print(response.candidates[0].safety_ratings)
        return {"error": str(response.prompt_feedback)}
        



In [126]:
res = review_prompt(combined_dict_gemini['Inputs'][173]['content'])
print(res)



block_reason: OTHER

{'error': 'block_reason: OTHER\n'}


In [82]:
def format_eval_gemini(path, system_message, labels_present=True, limit=None):
    df = pd.read_csv(path, encoding='latin-1')
    inputs = []
    labels = []
    for index, row in df.iterrows():
        if limit and index >= limit:
            break
        tweet_text = row['Text']
        prompt = "You are on a social media platform's moderation team. I need you to review the following content as if it were a tweet to determine if it contains harmful and/or extrimist language. The content should not be repeated or generated. Please analyze the text and provide feedback on whether it includes any of the following categories of extremist language, and choose the one that fits best: 'Propaganda', 'Radicalization', 'Recruitment', or 'None'.\n\n"
        prompt += "Content to review:\n"
        prompt += f"[{tweet_text}]\n\n"
        prompt += "Please respond with a summary indicating whether any extremist language is present and categorize it in only one of the provided categories accordingly.\n"
        prompt += "Please give your response as valid JSON in the following format: {\"Label\": <INSERT YOUR CATEGORIZATION HERE (\"Propaganda\" or \"Radicalization\" or \"Recruitment\" or \"None\")>, \"Reason\": <INSERT YOUR REASON HERE>}."
        inputs.append(
            {
                'content': prompt
            }
        )
        if labels_present:
            labels.append(row['Label'])
        else:
            labels.append("None")
    return {
        'Inputs': inputs,
        'Labels': labels
    }

In [83]:
ISIS_dict_gemini = format_eval_gemini('Seed_MIWS/Seed_Dataset/ISIS_Seed_Complete.csv', metrics)
WS_dict_gemini = format_eval_gemini('Seed_MIWS/Seed_Dataset/WS_Seed_Complete.csv', metrics)

In [84]:
combined_dict_gemini = {
    'Inputs': ISIS_dict_gemini['Inputs'] + WS_dict_gemini['Inputs'],
    'Labels': ISIS_dict_gemini['Labels'] + WS_dict_gemini['Labels']
}


In [85]:
print("Combined Gemini dictionary:")
print(f"Inputs shape: {len(combined_dict_gemini['Inputs'])}")
print(f"Labels shape: {len(combined_dict_gemini['Labels'])}")

Combined Gemini dictionary:
Inputs shape: 398
Labels shape: 398


In [86]:
json_file_gemini = json.dumps(combined_dict_gemini, indent=4)

with open("eval_data_gemini.json", "w") as file:
    file.write(json_file_gemini)

In [108]:
predicted = []
for i in range(len(combined_dict_gemini['Inputs'])):
    predicted.append(review_prompt(combined_dict_gemini['Inputs'][i]['content']))
    print(f"finished {i} out of {len(combined_dict_gemini['Inputs'])}")

finished 0 out of 398
finished 1 out of 398
finished 2 out of 398
finished 3 out of 398
finished 4 out of 398
finished 5 out of 398
finished 6 out of 398
finished 7 out of 398
finished 8 out of 398
finished 9 out of 398
finished 10 out of 398
finished 11 out of 398
finished 12 out of 398
finished 13 out of 398
finished 14 out of 398
finished 15 out of 398
finished 16 out of 398
finished 17 out of 398
finished 18 out of 398
finished 19 out of 398
finished 20 out of 398
finished 21 out of 398
finished 22 out of 398
finished 23 out of 398
finished 24 out of 398
finished 25 out of 398
finished 26 out of 398
finished 27 out of 398
finished 28 out of 398
finished 29 out of 398
finished 30 out of 398
finished 31 out of 398
finished 32 out of 398
finished 33 out of 398
finished 34 out of 398
finished 35 out of 398
finished 36 out of 398
finished 37 out of 398
finished 38 out of 398
finished 39 out of 398
finished 40 out of 398
finished 41 out of 398
finished 42 out of 398
finished 43 out of 39

finished 346 out of 398
finished 347 out of 398
finished 348 out of 398
finished 349 out of 398
finished 350 out of 398
finished 351 out of 398
finished 352 out of 398
finished 353 out of 398
finished 354 out of 398
finished 355 out of 398
finished 356 out of 398
finished 357 out of 398
finished 358 out of 398
finished 359 out of 398
finished 360 out of 398
finished 361 out of 398
finished 362 out of 398
finished 363 out of 398
finished 364 out of 398
finished 365 out of 398
finished 366 out of 398
finished 367 out of 398
finished 368 out of 398
finished 369 out of 398
finished 370 out of 398
finished 371 out of 398
finished 372 out of 398
finished 373 out of 398
finished 374 out of 398
finished 375 out of 398
finished 376 out of 398
finished 377 out of 398
finished 378 out of 398
finished 379 out of 398
finished 380 out of 398
finished 381 out of 398
finished 382 out of 398
finished 383 out of 398
finished 384 out of 398
finished 385 out of 398
finished 386 out of 398
finished 387 out

In [109]:
print(len(predicted))

398


In [120]:
json_file_predictions_gemini = json.dumps({"predictions": predicted}, indent=4)

with open("predictions_gemini.json", "w") as file:
    file.write(json_file_predictions_gemini)