In [None]:
import pandas as pd
import numpy as np
import sys
import os

import ast
import random

import transformers
from transformers import set_seed

import torch

import json
import re

In [None]:
# Set random seed for reproducibility
random_seed = 42
torch.manual_seed(random_seed)
random.seed(random_seed)

set_seed(random_seed)

In [None]:
model_id = "meta-llama/Llama-3.1-8B-Instruct"

pipeline = transformers.pipeline(
    "text-generation",
    model=model_id,
    device="cuda:0",
)

In [None]:
df = pd.read_csv('Reddit_DVSV_Data.csv', index_col = 0)

In [None]:
df['content'] = df['title'] + df['text']
content_list = df['content'].to_list()

In [None]:
output_dir = "Output"
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

def create_folder(feature):
    feat_dir = os.path.join(output_dir, f'{feature}')
    if not os.path.exists(feat_dir):
        os.makedirs(feat_dir)
    print(f'Created folder {feature}')                       

features = ['Relationship', 'Location', 'Environment', 'Pattern', 'SelfBlame', 'Abuser', 'Characters',
            'AbuseType', 'Coping', 'Intent', 'Impact']

for feature in features:
    create_folder(feature)

## Prompting

In [None]:
def process_post(input_system, input_task, reddit_post):
    messages = [
        {"role": "system", "content": input_system},
        {"role": "victim", "content": input_task + reddit_post},
    ]

    outputs = pipeline(
        messages, num_beams=1, do_sample=True, max_new_tokens=1024, temperature = 0.9,
        pad_token_id = pipeline.tokenizer.eos_token_id
    )
    
    output = outputs[0]["generated_text"][-1]

    return output

In [None]:
def process_batches(input_dataset, input_list, feature, feat_path, input_system, input_task, fname, start_idx = 0, batch_size = 10000):
    
    for batch_start in range(start_idx, len(input_list), batch_size):
        data = {'Title': [], 'Text': [], feature : []}
        batch_end = min(batch_start + batch_size, len(input_list))
        print("Processing batch:", batch_start, batch_end)
        batch = input_list[batch_start:batch_end]

        for i, content in enumerate(batch):
            attempts = 0
            success = False

            while attempts < 4 and not success:
                try:
                    output = process_post(input_system, input_task, content)
                    content.replace("'", "\\'")
                    content = re.sub(r'(\}.*)', r'}', content)
                    
                    result = ast.literal_eval(output['content'])[feature]
                    success = True  # Mark as successful if no exception
                except Exception as e:
                    print(f"Attempt {attempts + 1} failed for post {batch_start + i}: {e}")
                    result = output['content']
                    attempts += 1
    
            data['Title'].append(input_dataset['title'].iloc[batch_start + i])
            data['Text'].append(input_dataset['text'].iloc[batch_start + i])
            data[feature].append(result)
            
        batch_df = pd.DataFrame(data)
        batch_filename = f"Output/{feat_path}/{batch_start}_{batch_end}{fname}.csv"
        batch_df.to_csv(batch_filename, index=False)
        print(f"Batch {batch_start}_{batch_end} processed and saved to {batch_filename}")

## Relationship Feature Extraction

In [None]:
intimate_partner_input_system = (
    "You are an expert NLP model specialized in analyzing trauma narratives related to sexual violence on social media platforms. "
    "Your goal is to determine whether the perpetrator (the person responsible for the harm) is an intimate partner of the victim (the person harmed). "
)

intimate_partner_task = (
    "Analyze the following trauma narrative and determine whether the perpetrator is an intimate partner of the victim. "
    "An intimate partner is someone with whom the victim has or had a close personal relationship involving emotional connection, regular interaction, and potentially romantic, sexual, or cohabiting aspects. "
    "This includes current or former spouses, romantic or sexual partners, and individuals in a dating relationship, regardless of legal status, gender, or duration. "
    "Consider only information explicitly stated in the post. Avoid making assumptions or inferences."
    "Return the result strictly in the following format: \n" 
    '''{"IntimatePartner": "Yes/No"}'''
    "Return only this format with no additional text, explanations, or options. Do not provide any other output. Below is the input text: "
)

process_batches(df, content_list, 'IntimatePartner', 'Relationship', intimate_partner_input_system, intimate_partner_task, 'IntimatePartner', batch_size = 1000)


In [None]:
family_member_input_system = (
    "You are an expert NLP model specialized in analyzing trauma narratives related to sexual violence on social media platforms. "
    "Your goal is to determine whether the perpetrator (the person responsible for the harm) is a family member of the victim (the person harmed)."
)

family_member_task = (
    "Analyze the following trauma narrative and determine whether the perpetrator is a family member of the victim. "
    "A family member includes relatives by blood, marriage, adoption, or guardianship, such as parents, children, siblings, grandparents, aunts, uncles, cousins, and in-laws. "
    "It also includes step-relatives, foster family members, and partners of family members (e.g., an aunt’s spouse or cousin’s partner). "
    "Consider only information explicitly stated in the post. Avoid making assumptions or inferences."
    "Return the result strictly in the following format: \n" 
    '''{"FamilyMember": "Yes/No"}'''
    "Return only this format with no additional text, explanations, or options. Do not provide any other output. Below is the input text: "
)

process_batches(df, content_list, 'FamilyMember', 'Relationship', family_member_input_system, family_member_task, 'FamilyMember', batch_size=1000)


In [None]:
close_friend_input_system = (
    "You are an expert NLP model specialized in analyzing trauma narratives related to sexual violence on social media platforms. "
    "Your goal is to determine whether the perpetrator (the person responsible for the harm) is a close friend of the victim (the person harmed)."
)

close_friend_task = (
    "Your task is to analyze the following trauma narrative and determine whether the perpetrator is a close friend of the victim. "
    "A close friend is someone with whom the victim shares a deep, trusting bond, provides emotional support, communicates meaningfully, and spends quality time. "
    "Consider only information explicitly stated in the post. Avoid making assumptions or inferences."
    "Format your output as follows: \n" 
    '''{"CloseFriend": "Yes/No"}'''
    "Return only this format with no additional text, explanations, or options. Do not provide any other output. Below is the input text: "
)

process_batches(df, content_list, 'CloseFriend', 'Relationship', close_friend_input_system, close_friend_task, 'CloseFriend', batch_size = 1000)



In [None]:
colleague_input_system = (
    "You are an expert NLP model specialized in analyzing trauma narratives related to sexual violence on social media platforms. "
    "Your goal is to determine whether the perpetrator (the person responsible for the harm) is either a colleague or classmate of the victim (the person harmed)."
)

colleague_task = (
    "Your task is to analyze the following trauma narrative and determine whether the perpetrator is either a colleague or classmate of the victim. "
    "A colleague is someone with whom the victim works in a professional setting, such as an office, business, or other workplace. "
    "A classmate is someone who attends the same school, college, or academic program as the victim. "
    "Consider only information explicitly stated in the post. Avoid making assumptions or inferences." 
    "Format your output as follows: \n" 
    '''{"Colleague": "Yes/No"}'''
    "Return only this format with no additional text, explanations, or options. Do not provide any other output. Below is the input text: "
)

process_batches(df, content_list, 'Colleague', 'Relationship', colleague_input_system, colleague_task, 'Colleague', batch_size = 1000)



In [None]:
authority_fig_input_system = (
    "You are an expert NLP model specialized in analyzing trauma narratives related to sexual violence on social media platforms. "
    "Your goal is to identify whether the perpetrator holds a position of authority or influence over the victim in a professional, institutional, or societal context."
)

authority_fig_task = (
    "Your task is to analyze the following trauma narrative and determine whether the perpetrator is an authority figure of the victim. "
    "An authority figure is an individual who holds power or influence over another person in any capacity, whether professional or societal. "
    "This includes roles such as supervisors, teachers, managers, police officers, government officials, or community leaders, who can enforce rules, make decisions, or provide guidance. "
    "Important: This does NOT include intimate partners or family members, even if they may have power over the victim. "
    "Consider only information explicitly stated in the post. Avoid making assumptions or inferences.\n"
    "Format your output as follows: \n"
    '''{"AuthorityFigure": "Yes/No"}'''
    "Return only this format with no additional text, explanations, or options. Do not provide any other output. Below is the input text: "
)

process_batches(df, content_list, 'AuthorityFigure', 'Relationship', authority_fig_input_system, authority_fig_task, 'AuthorityFigure', batch_size=1000)


In [None]:
stranger_input_system = (
    "You are an expert NLP model specialized in analyzing trauma narratives related to sexual violence on social media platforms. "
    "Your goal is to identify if the perpetrator (the person responsible for the harm) is a stranger to the victim (the person harmed)."
)


stranger_task = (
    "Your task is to analyze the following trauma narrative and determine whether the perpetrator is a stranger to the victim. "
    "A stranger is someone with whom the victim has no prior personal relationship and no direct interaction before the incident. "
    "Important: The victim should not have any relation with the perpetrator. Exclude cases where the perpetrator is not mentioned. "
    "Consider only information explicitly stated in the post. Avoid making assumptions or inferences." 
    "Format your output as follows: \n" 
    '''{"Stranger": "Yes/No"}'''
    "Return only this format with no additional text, explanations, or options. Do not provide any other output. Below is the input text: "
)

process_batches(df, content_list, 'Stranger', 'Relationship', stranger_input_system, stranger_task, 'Stranger', batch_size = 1000)


## Location Feature Extraction

In [None]:
domestic_input_system = (
    "You are an expert NLP model specialized in analyzing trauma narratives related to sexual violence on social media platforms. "
    "Your goal is to determine whether the violence inflicted by the perpetrator on the victim occurred in a domestic space."
)

domestic_task = (
    "Your task is to analyze the following text and identify if the violence inflicted by the perpetrator on the victim occurred in a domestic space. "
    "Domestic spaces are private living environments, such as homes, apartments, and residential areas, where individuals typically reside. "
    "Important: This does NOT include social gatherings occuring in residential areas. "
    "Consider only information explicitly available in the text and avoid making assumptions or inferences. "
    "Format your output as follows: "
    '''{"Domestic": "Yes/No"}'''
    "Return only this format with no additional text, explanations, or options. Do not provide any other output. Below is the input text: "
)

process_batches(df, content_list, 'Domestic', 'Location', domestic_input_system, domestic_task, 'Domestic', batch_size=1000)


In [None]:
social_input_system = (
    "You are an expert NLP model specialized in analyzing trauma narratives related to sexual violence on social media platforms. "
    "Your goal is to determine whether the violence inflicted by the perpetrator on the victim occurred in a social space."
)

social_task = (
    "Your task is to analyze the following text and identify if the violence inflicted by the perpetrator (person causing harm) on the victim (person harmed) occurred in a social space. "
    "Social spaces are recreational settings where people gather for leisure and entertainment, such as bars, clubs, parties, and gyms. "
    "Consider only information explicitly available in the text and avoid making assumptions or inferences. "
    "Format your output as follows: "
    '''{"Social": "Yes/No"}'''
    "Return only this format with no additional text, explanations, or options. Do not provide any other output. Below is the input text: "
)

process_batches(df, content_list, 'Social', 'Location', social_input_system, social_task, 'Social', batch_size=1000)


In [None]:
professional_input_system = (
    "You are an expert NLP model specialized in analyzing trauma narratives related to sexual violence on social media platforms. "
    "Your goal is to determine whether the violence inflicted by the perpetrator on the victim occurred in a professional space."
)

professional_task = (
    "Your task is to analyze the following text and identify if the violence inflicted by the perpetrator (person causing harm) on the victim (person harmed) occurred in a professional space. "
    "Professional spaces are environments associated with work or education, such as offices, schools, and colleges, where individuals engage in career or academic activities. "
    "Consider only information explicitly available in the text and avoid making assumptions or inferences. "
    "Format your output as follows: "
    '''{"Professional": "Yes/No"}'''
    "Return only this format with no additional text, explanations, or options. Do not provide any other output. Below is the input text: "
)

process_batches(df, content_list, 'Professional', 'Location', professional_input_system, professional_task, 'Professional', batch_size=1000)


In [None]:
public_input_system = (
    "You are an expert NLP model specialized in analyzing trauma narratives related to sexual violence on social media platforms. "
    "Your goal is to determine whether the violence inflicted by the perpetrator on the victim occurred in a public space."
)

public_task = (
    "Your task is to analyze the following text and identify if the violence inflicted by the perpetrator (person causing harm) on the victim (person harmed) occurred in a public space. "
    "Public spaces are open and accessible areas available to the general public, such as streets, parks, shopping centers, and public transportation hubs. "
    "Consider only information explicitly available in the text and avoid making assumptions or inferences. "
    "Format your output as follows: "
    '''{"Public": "Yes/No"}'''
    "Return only this format with no additional text, explanations, or options. Do not provide any other output. Below is the input text: "
)

process_batches(df, content_list, 'Public', 'Location', public_input_system, public_task, 'Public', batch_size=1000)


In [None]:
cyber_input_system = (
    "You are an expert NLP model specialized in analyzing trauma narratives related to sexual violence on social media platforms. "
    "Your goal is to determine whether the violence inflicted by the perpetrator on the victim occurred in a cyber space. "
)

cyber_task = (
    "Your task is to analyze the following text and identify if the violence inflicted by the perpetrator (person causing harm) on the victim (person harmed) occurred in a cyber space. "
    "Cyber spaces are virtual environments where online interactions and activities take place, such as websites, social media, and digital platforms. "
    "Examples of violence in cyber spaces include but are not limited to: cyberbullying, online stalking, revenge porn, doxxing, sending threatening or explicit messages, sending uncomfortable images or links, coercing someone into sending nudes, harassment on social media platforms, abusive behavior in online forums, sharing private or explicit content without consent, impersonating someone online, gaslighting, and spreading rumors or false information. "
    "Additionally, harassment can include sending unsolicited messages, creating fake profiles for manipulation, trolling, and attempting to isolate or intimidate someone online. "
    "Consider only information explicitly available in the text and avoid making assumptions or inferences. "
    "Format your output as follows: "
    '''{"Cyber": "Yes/No"}'''
    "Return only this format with no additional text, explanations, or options. Do not provide any other output. Below is the input text: "
)

process_batches(df, content_list, 'Cyber', 'Location', cyber_input_system, cyber_task, 'Cyber', batch_size = 1000)


## Environment Feature Extraction

In [None]:
night_time_input_system = (
    "You are an expert NLP model specialized in analyzing trauma narratives related to sexual violence on social media platforms. "
    "Your goal is to determine whether the violence inflicted by the perpetrator on the victim occurred at nighttime. "
)

night_time_task = (
    "Your task is to analyze the following text and identify if the violence inflicted by the perpetrator (person causing harm) on the victim (person harmed) occurred at nighttime. "
    "Nighttime is defined as the period between 9:00 PM and 6:00 AM local time, when it is typically dark outside. "
    "Consider only explicit information available in the text and avoid making assumptions or inferences. "
    "Format your output as follows: "
    '''{"NightTime": "Yes/No"}'''
    "Return only this format with no additional text, explanations, or options. Do not provide any other output. Below is the input text: "
)

process_batches(df, content_list, 'NightTime', 'Environment', night_time_input_system, night_time_task, 'NightTime', batch_size=1000)


In [None]:
intoxication_input_system = (
    "You are an expert NLP model specialized in analyzing trauma narratives related to sexual violence on social media platforms. "
    "Your goal is to determine whether the violence inflicted by the perpetrator on the victim occurred when the victim was intoxicated. "
)

intoxication_task = (
    "Your task is to analyze the following text and identify if the violence inflicted by the perpetrator (person causing harm) on the victim (person harmed) "
    "occurred when the victim was intoxicated, i.e., under the influence of alcohol or any drugs. "
    "Consider only explicit information available in the text and avoid making assumptions or inferences. "
    "Format your output as follows: "
    '''{"Intoxicated": "Yes/No"}'''
    "Return only this format with no additional text, explanations, or options. Do not provide any other output. Below is the input text: "
)

process_batches(df, content_list, 'Intoxicated', 'Environment', intoxication_input_system, intoxication_task, 'Intoxicated', batch_size=1000)


## Pattern Feature Extraction

In [None]:
singular_input_system = (
    "You are an expert NLP model specialized in analyzing trauma narratives related to sexual violence on social media platforms. "
    "Your goal is to determine whether the violence inflicted by the perpetrator on the victim was a single, isolated incident with no indication of repeated or ongoing harm."
)
singular_task = (
    "Analyze the following text and identify if the violence inflicted by the perpetrator on the victim was a singular event. "
    "A singular event is defined as a one-time incident of violence that did not persist or recur over an extended period (e.g., days, weeks, months, or years). "
    "Exclude cases where the violence stopped momentarily and then resumed. Consider only explicit information available in the text. "
    "Return the result strictly in the following format: \n"
    '''{"Singular": "Yes/No"}''' "\n"
    "Return only this format with no additional text, explanations, or options. Do not provide any other output. Below is the input text: "
)


process_batches(df, content_list, 'Singular', 'Pattern', singular_input_system, singular_task, 'Singular', batch_size=1000)


In [None]:
recurring_input_system = (
    "You are an expert NLP model specialized in analyzing trauma narratives related to sexual violence on social media platforms. "
    "Your goal is to determine whether the violence inflicted by the perpetrator on the victim was recurring or persistent."
)
recurring_task = (
    "Analyze the following text and identify if the violence inflicted by the perpetrator on the victim occurred multiple times or was an ongoing pattern. "
    "Recurring or persistent violence may be indicated by phrases such as 'multiple times', 'repeatedly', 'ongoing', 'continued', or 'over time'. "
    "Consider only explicit information available in the text and avoid making assumptions or inferences. "
    "Format your output as follows: \n"
    '''{"Recurring": "Yes/No"}'''
    "Return only this format with no additional text, explanations, or options. Do not provide any other output. Below is the input text: "
)

process_batches(df, content_list, 'Recurring', 'Pattern', recurring_input_system, recurring_task, 'Recurring', batch_size=1000)


## SelfBlame Feature Extraction

In [None]:
notendingabuse_input_system = (
    "You are an expert NLP model specialized in analyzing trauma narratives related to sexual and domestic violence on social media platforms. "
    "Your goal is to determine whether the text explicitly shows that the victim blames themselves for not ending the abuse inflicted by the perpetrator sooner. "
)

notendingabuse_task = (
    "Your task is to analyze the following text and determine whether the victim self-blames for not ending the abuse sooner. "
    "Follow these steps: "
    "1. Identify any direct statements where the victim expresses guilt, regret, or blame for not acting sooner. "
    "2. Classify the text as 'Yes' if self-blame is explicitly present, or 'No' if there is no clear self-blame. "
    "Examples: "
    "Example 1 (Yes): 'I feel disgusted in myself for ever having been with him, and responsible because I didn’t leave sooner.'\n"
    "Example 2 (Yes): 'I froze. I should have screamed or run away, but I didn’t. I just laid there and let him do it to me.'\n"
    "Example 3 (No): 'It took me a long time to leave, but I know that I did the best I could under those circumstances.'\n"
    "Example 4 (No): 'I didn’t fight back, but that doesn’t mean it was my fault.'\n"
    "Important: ONLY use explicit statements in the text. DO NOT make assumptions or infer anything beyond what is directly stated in the text. "
    "Format your output as follows: \n"
    '''{"NotEndingAbuse": "Yes/No"}'''
    "Return ONLY this JSON format with no extra text, explanations, or alternative options. Below is the input text:"
)

process_batches(df, content_list, 'NotEndingAbuse', 'SelfBlame', notendingabuse_input_system, notendingabuse_task, 'NotEndingAbuse', batch_size=1000)


In [None]:
enablingabuser_input_system = (
    "You are an expert NLP model specialized in analyzing trauma narratives related to sexual and domestic violence on social media platforms. "
    "DO NOT make assumptions or infer anything beyond what is directly stated in the text."
)

enablingabuser_task = (
    "Your task is to analyze the following text and determine whether the victim expresses self-blame for enabling, allowing, or excusing the abuser’s behavior. "
    "Follow these steps: "
    "1. Identify any direct statements where the victim expresses guilt, regret, or responsibility for allowing the abuse to continue. "
    "2. Classify the text as 'Yes' if explicit self-blame is present, or 'No' if the victim does not blame themselves for enabling the abuse. "
    "Examples:"
    "Example 1 (Yes): 'I said yes to making out, but then he forced himself on me. I should have never let things get that far.'\n"
    "Example 2 (Yes): 'I agreed to go to his place, so I guess it’s my fault for not seeing this coming.'\n"
    "Example 3 (Yes): 'I told him we could do some stuff but not sex. Maybe I led him on by not leaving sooner.'\n"
    "Example 4 (No): 'I told him no, but he didn’t listen. That’s not on me.'\n"
    "Example 5 (No): 'Just because I agreed to one thing doesn’t mean he had the right to do whatever he wanted.'\n"
   "Important: ONLY use explicit statements in the text. Do NOT infer.\n"
    "Format your output as follows:\n"
    '''{"EnablingAbuser": "Yes/No"}'''
    "Return ONLY this JSON format with no extra text, explanations, or alternative options. Below is the input text:"
)

process_batches(df, content_list, 'EnablingAbuser', 'SelfBlame', enablingabuser_input_system, enablingabuser_task, 'EnablingAbuser', batch_size=1000)


## Abuser Feature Extraction

In [None]:
detailed_abuser_discussion_input_system = (
    "You are an expert NLP model specialized in analyzing trauma narratives related to sexual and domestic violence on social media platforms. "
    "Your goal is to determine whether the text provides explicit, specific details about the abuser's actions, behavior, or impact."
)

detailed_abuser_discussion_task = (
    "Analyze the following text and decide if it provides explicit, specific details about the abuser's actions, behavior, or impact. "
    "A 'detailed discussion' requires explicit descriptions of the abuser's: "
    "- Specific actions or behaviors "
    "- Thoughts, feelings, or motivations "
    "- Impact on the victim's life or well-being "
    "Examples of detailed discussions include descriptions of manipulation, control, gaslighting, or other abusive tactics. "
    "Classify the text as 'Yes' if it provides explicit, specific details about the abuser, or 'No' if the mention is brief, vague, or absent. "
    "Only consider explicit information in the text and avoid making inferences. "
    "Format your output as follows:\n"
    '''{"DetailedAbuserDiscussion": "Yes/No"}'''
    "Return only this format with no additional text, explanations, or options. Do not provide any other output. Below is the input text:"
)

process_batches(df, content_list, 'DetailedAbuserDiscussion', 'Abuser', detailed_abuser_discussion_input_system, detailed_abuser_discussion_task, 'DetailedAbuserDiscussion', batch_size=1000)


## Characters Feature Extraction

In [None]:
supporters_input_system = (
    "You are an expert NLP model specialized in analyzing trauma narratives related to sexual and domestic violence on social media platforms. "
    "Your goal is to determine whether the victim mentions any supporters who helped or aided their recovery from the violence they experienced."
)

supporters_task = (
    "Analyze the following social media post and determine whether the victim is mentioning any supporters. "
    "This includes references to individuals or groups providing emotional, psychological, or physical support, such as friends, family members, "
    "therapists, or support organizations, who help in the victim's recovery process by providing any form of aid. "
    "Consider only the information explicitly stated in the post. Avoid making assumptions or inferences. "
    "Return the result strictly in the following format:\n" 
    '''{"Supporters": "Yes/No"}''' "\n"
    "Return only this format with no additional text, explanations, or options. Do not provide any other output. Below is the input text:"
)

process_batches(df, content_list, 'Supporters', 'Characters', supporters_input_system, supporters_task, 'Supporters', batch_size=1000)


In [None]:
antagonists_input_system = (
    "You are an expert NLP model specialized in analyzing trauma narratives related to sexual and domestic violence on social media platforms. "
    "Your goal is to identify individuals or groups, excluding the primary perpetrator, who have caused additional harm or hindered the victim's recovery.")

antagonists_task = (
    "Analyze the following social media post and determine whether the victim mentions any antagonists. "
    "An antagonist is any individual or group, excluding the perpetrator, who has contributed to the victim’s distress or hindered their recovery. "
    "This includes friends, family members, authorities, or community members who engaged in harmful actions such as disbelief, victim-blaming, minimizing the abuse, pressuring the victim into silence, or providing inadequate support. "
    "Only consider information explicitly stated in the post. Avoid making assumptions or inferences. "
    "Important: This does NOT include the perpetrator, i.e., the primary person who inflicted violence on the victim. "
    "Return the result strictly in the following format:\n"  
    '''{"Antagonists": "Yes/No"}''' "\n"
    "Return only this format with no additional text, explanations, or options. Do not provide any other output. Below is the input text:"
)

process_batches(df, content_list, 'Antagonists', 'Characters', antagonists_input_system, antagonists_task, 'Antagonists', batch_size=1000)


## Abuse Type Feature Extraction

In [None]:
physical_input_system = (
    "You are an expert NLP model specialized in analyzing trauma narratives related to violence on social media platforms. "
    "Your goal is to determine whether the perpetrator inflicted physical harm or injury on the victim, excluding sexual assault and non-consensual touching."
)
physical_task = (
    "Analyze the following trauma narrative and determine whether the perpetrator physically assaulted the victim, resulting in bodily harm, pain, or injury. "
    "Physical Assault is defined as intentional acts of violence or force, such as: "
    "- Intentional physical contact (e.g., hitting, slapping, pushing, grabbing) that results in visible injuries or harm "
    "- Restraint or confinement (e.g., holding down, tying up) that leads to physical harm "
    "- Use of objects or weapons to inflict physical harm "
    "Excludes sexual assault, non-consensual touching, and emotional or psychological abuse. "
    "Consider only explicit information stated in the post. Avoid making assumptions or inferences. "
    "Return the result strictly in the following format: \n"
    '''{"Physical": "Yes/No"}''' "\n"
    "Return only this format with no additional text, explanations, or options. Do not provide any other output. Below is the input text: "
)

process_batches(df, content_list, 'Physical', 'AbuseType', physical_input_system, physical_task, 'Physical', batch_size = 1000)


In [None]:
verbal_input_system =  (
    "You are an expert NLP model specialized in analyzing trauma narratives related to sexual violence on social media platforms. "
    "Your goal is to determine whether the perpetrator verbally abused the victim, i.e., through non-physical means."
)
verbal_task = (
    "Analyze the following trauma narrative and determine whether the perpetrator (the person responsible for the harm) verbally abused the victim (the person harmed) through non-physical means. "
    "Non-physical abuse includes behaviors such as: "
    "- Emotional manipulation or control "
    "- Verbal aggression, threats, or intimidation "
    "- Psychological coercion, gaslighting, or stalking "
    "- Constant criticism, humiliation, or belittling "
    "- Isolation from loved ones or support networks "
    "Consider only information explicitly stated in the post. Avoid making assumptions or inferences. "
    "Return the result strictly in the following format: \n"
    '''{"Verbal": "Yes/No"}''' "\n"
    "Return only this format with no additional text, explanations, or options. Do not provide any other output. Below is the input text: "
)

process_batches(df, content_list, 'Verbal', 'AbuseType', verbal_input_system, verbal_task, 'Verbal', batch_size = 1000)


In [None]:
economic_input_system = (
    "You are an expert NLP model specialized in analyzing trauma narratives related to sexual violence on social media platforms. "
    "Your goal is to determine whether the perpetrator economically abused the victim. "
)

economic_task = (
    "Analyze the following trauma narrative and determine whether the perpetrator (the person responsible for the harm) economically abused the victim (the person harmed). "
    "Economic Abuse is defined as controlling or restricting a person's access to financial resources to create dependence, limit autonomy, or force compliance. "
    "This includes preventing employment, withholding money, or coercing financial decisions. \n"
    "Consider only information explicitly stated in the post. Avoid making assumptions or inferences. "
    "Return the result strictly in the following format: \n" 
    '''{"Economic": "Yes/No"}''' "\n"
    "Return only this format with no additional text, explanations, or options. Do not provide any other output. Below is the input text: "
)

process_batches(df, content_list, 'Economic', 'AbuseType', economic_input_system, economic_task, 'Economic', batch_size = 1000)


In [None]:
technological_input_system = (
    "You are an expert NLP model specialized in analyzing trauma narratives related to sexual violence on social media platforms. "
    "Your goal is to determine whether the perpetrator technologically abused the victim. "
)

technological_task = (
    "Analyze the following trauma narrative and determine whether the perpetrator (the person responsible for the harm) technologically abused the victim (the person harmed). "
    "Technological abuse is defined as the use of digital tools to stalk, harass, monitor, or control someone, such as cyberstalking, unauthorized tracking, non-consensual sharing of images, or online harassment. "
    "It often extends real-world abuse into virtual spaces. \n"
    "Consider only information explicitly stated in the post. Avoid making assumptions or inferences. "
    "Return the result strictly in the following format: \n" 
    '''{"Technological": "Yes/No"}''' "\n"
    "Return only this format with no additional text, explanations, or options. Do not provide any other output. Below is the input text: "
)

process_batches(df, content_list, 'Technological', 'AbuseType', technological_input_system, technological_task, 'Technological', batch_size = 1000)


In [None]:
sexualharassment_input_system = (
"You are an expert NLP model specialized in analyzing trauma narratives related to sexual violence on social media platforms. "
"Your goal is to determine whether the perpetrator sexually harassed the victim through non-physical behaviors."
)
sexualharassment_task = (
"Analyze the following trauma narrative and determine whether the perpetrator sexually harassed the victim through non-physical behaviors. "
"Sexual harassment is defined as unwanted verbal or non-verbal behaviors, such as: "
"- Unwelcome comments, jokes, or remarks of a sexual nature "
"- Displaying explicit images or content "
"- Sending unwanted explicit messages or images "
"- Making unwanted advances or propositions "
"- Engaging in online stalking or cyberbullying "
"Excludes physical contact or assault. Consider only information explicitly stated in the post. Avoid making assumptions or inferences. "
"Return the result strictly in the following format: \n"
'''{"SexualHarassment": "Yes/No"}''' "\n"
"Return only this format with no additional text, explanations, or options. Do not provide any other output. Below is the input text: "
)

process_batches(df, content_list, 'SexualHarassment', 'AbuseType', sexualharassment_input_system, sexualharassment_task, 'SexualHarassment', batch_size = 1000)


In [None]:
sexualassault_input_system = (
    "You are an expert NLP model specialized in analyzing trauma narratives related to sexual violence on social media platforms. "
    "Your goal is to determine whether the perpetrator sexually assaulted the victim. "
)

sexualassault_task = (
    "Analyze the following trauma narrative and determine whether the perpetrator (the person responsible for the harm) sexually assaulted the victim (the person harmed). "
    "Sexual Assault is defined as any non-consensual sexual act, ranging from unwanted touching of any body part to penetration, obtained through force, coercion, manipulation, or incapacitation. \n"
    "Consider only information explicitly stated in the post. Avoid making assumptions or inferences. "
    "Return the result strictly in the following format: \n" 
    '''{"SexualAssault": "Yes/No"}''' "\n"
    "Return only this format with no additional text, explanations, or options. Do not provide any other output. Below is the input text: "
)

process_batches(df, content_list, 'SexualAssault', 'AbuseType', sexualassault_input_system, sexualassault_task, 'SexualAssault', batch_size = 1000)


## Coping Feature Extraction

In [None]:
confront_abuser_input_system = (
    "You are an expert NLP model specialized in analyzing trauma narratives related to sexual violence on social media platforms. "
    "Your goal is to determine whether the victim confronted the perpetrator about the violence they inflicted. "
)

confront_abuser_task = (
    "Analyze the following trauma narrative and determine whether the victim (the person harmed) confronted the perpetrator (the person responsible for the harm) about the violence inflicted. "
    "Confrontation includes direct verbal or written communication in which the victim addresses the abuse, expresses their feelings, or demands accountability. "
    "Consider only information explicitly stated in the post. Avoid making assumptions or inferences. "
    "Return the result strictly in the following format: \n" 
    '''{"ConfrontAbuser": "Yes/No"}''' "\n"
    "Return only this format with no additional text, explanations, or options. Do not provide any other output. Below is the input text: "
)

process_batches(df, content_list, 'ConfrontAbuser', 'Coping', confront_abuser_input_system, confront_abuser_task, 'ConfrontAbuser', batch_size = 1000)


In [None]:
reporting_input_system = (
    "You are an expert NLP model specialized in analyzing trauma narratives related to sexual violence on social media platforms. "
    "Your goal is to determine whether the victim reported the violence inflicted by the perpetrator to any authority figure or institution. "
)

reporting_task = (
    "Analyze the following trauma narrative and determine whether the victim (the person harmed) reported the violence inflicted by the "
    "perpetrator (the person responsible for the harm) to any authority figure or institution. "
    "Authority reporting includes filing an official complaint, contacting law enforcement or the police, reporting to workplace HR, "
    "school officials (e.g., teachers, principals, counselors), university administration, or any other organizational authority. "
    "Consider only information explicitly stated in the post. Avoid making assumptions or inferences. "
    "Return the result strictly in the following format: \n" 
    '''{"Reporting": "Yes/No"}''' "\n"
    "Return only this format with no additional text, explanations, or options. Do not provide any other output. Below is the input text: "
)

process_batches(df, content_list, 'Reporting', 'Coping', reporting_input_system, reporting_task, 'Reporting', batch_size = 1000)


In [None]:
severing_ties_input_system = (
    "You are an expert NLP model specialized in analyzing trauma narratives related to sexual violence on social media platforms. "
    "Your goal is to determine whether the victim attempted to or successfully severed ties with the perpetrator. "
)

severing_ties_task = (
    "Analyze the following trauma narrative and determine whether the victim (the person harmed) attempted to or successfully severed ties with the perpetrator (the person responsible for the harm). "
    "Severing ties means ending the relationship. This includes actions such as avoiding ccontactcutting off communication, blocking the perpetrator, moving away, or severing social, financial, or legal connections. "
    "Consider only information explicitly stated in the post. Avoid making assumptions or inferences. "
    "Return the result strictly in the following format:\n" 
    '''{"SeveringTies": "Yes/No"}''' "\n"
    "Return only this format with no additional text, explanations, or options. Do not provide any other output. Below is the input text:"
)

process_batches(df, content_list, 'SeveringTies', 'Coping', severing_ties_input_system, severing_ties_task, 'SeveringTies', batch_size=1000)


## Intent Feature Extraction

In [None]:
seeking_legal_classification_input_system = (
    "You are an expert NLP model specialized in analyzing trauma narratives related to sexual violence on social media platforms. "
    "Your goal is to identify if the victim is seeking clarification on the legal definition or classification of the violence they experienced."
)
seeking_legal_classification_task = (
    "Analyze the following social media post and determine whether the victim is asking for help understanding the legal implications or classification of their experience. "
    "This includes questions about whether their experience meets the legal definition of a specific crime or form of harm, such as sexual assault, domestic violence, or harassment. "
    "Look for explicit phrases, such as 'Is this considered [crime]?' or 'Does this qualify as [form of harm]?' "
    "Consider only information explicitly stated in the post. Avoid making assumptions or inferences. "
    "Return the result strictly in the following format:\n"
    '''{"SeekingLegalClassification": "Yes/No"}''' "\n"
    "Return only this format with no additional text, explanations, or options. Do not provide any other output. Below is the input text:"
)

process_batches(df, content_list, 'SeekingLegalClassification', 'Intent', seeking_legal_classification_input_system, seeking_legal_classification_task, 'SeekingLegalClassification', batch_size=1000)


In [None]:
seeking_support_input_system = (
    "You are an expert NLP model specialized in analyzing trauma narratives related to sexual violence on social media platforms. "    
    "Your goal is to determine whether the victim is seeking support from readers on the platform. "
)

seeking_support_task = (
    "Analyze the following social media post and determine whether the victim is seeking support. "
    "This includes but is not limited to expressing distress, sharing their emotions, asking for reassurance, asking for ways to recover from a traumatic and abusive experience, "
    "seeking advice on dealing with the aftermath of violence, seeking comfort and validation, looking for support from people who have had similar experiences, and so on. "
    "Consider only information explicitly stated in the post. Avoid making assumptions or inferences. "
    "Return the result strictly in the following format:\n" 
    '''{"SeekingSupport": "Yes/No"}''' "\n"
    "Return only this format with no additional text, explanations, or options. Do not provide any other output. Below is the input text:"
)

process_batches(df, content_list, 'SeekingSupport', 'Intent', seeking_support_input_system, seeking_support_task, 'SeekingSupport', batch_size=1000)


In [None]:
seeking_legal_advice_input_system = (
    "You are an expert NLP model specialized in analyzing trauma narratives related to sexual violence on social media platforms. "    
    "Your goal is to determine whether the victim is seeking legal advice about their experience with violence from readers on the platform. "
)

seeking_legal_advice_task = (
    "Analyze the following social media post and determine whether the victim is asking for legal advice. "
    "This includes inquiries about what legal actions to take, whether they should report to the police or other authorities, or how to proceed with legal steps. "
    "For instance, the victim might ask: 'What should I do if I want to press charges?' or 'How do I file for a restraining order?' "
    "Consider only information explicitly stated in the post. Avoid making assumptions or inferences. "
    "Return the result strictly in the following format:\n" 
    '''{"SeekingLegalAdvice": "Yes/No"}''' "\n"
    "Return only this format with no additional text, explanations, or options. Do not provide any other output. Below is the input text:"
)

process_batches(df, content_list, 'SeekingLegalAdvice', 'Intent', seeking_legal_advice_input_system, seeking_legal_advice_task, 'SeekingLegalAdvice', batch_size=1000)



## Impact Feature Extraction

In [None]:
nightmares_input_system = (
    "You are an expert NLP model specialized in analyzing trauma narratives related to sexual violence on social media platforms. "
    "Your goal is to determine whether the victim experiences nightmares, flashbacks, or intrusive memories as the result of the violence they experienced."
)

nightmares_task = (
    "Analyze the following social media post and determine whether the victim experiences nightmares, flashbacks, or intrusive memories as the result of the violence they experienced. "
    "This includes symptoms such as flashbacks and nightmares. "
    "Consider only the information explicitly stated in the post. Avoid making assumptions or inferences. "
    "Return the result strictly in the following format:\n" 
    '''{"Nightmares": "Yes/No"}''' "\n"
    "Return only this format with no additional text, explanations, or options. Do not provide any other output. Below is the input text:"
)

process_batches(df, content_list, 'Nightmares', 'Impact', nightmares_input_system, nightmares_task, 'Nightmares', batch_size=1000)


In [None]:
physical_injury_input_system = (
    "You are an expert NLP model specialized in analyzing trauma narratives related to sexual violence on social media platforms. "
    "Your goal is to determine whether the victim is describing physical injuries they sustained as a result of the violence they experienced."
)

physical_injury_task = (
    "Analyze the following social media post and determine whether the victim is describing physical injuries. "
    "This includes references to bruises, cuts, wounds, broken bones, burns, or any other physical harm inflicted by the perpetrator. "
    "Consider only the information explicitly stated in the post. Avoid making assumptions or inferences. "
    "Return the result strictly in the following format:\n" 
    '''{"PhysicalInjury": "Yes/No"}''' "\n"
    "Return only this format with no additional text, explanations, or options. Do not provide any other output. Below is the input text:"
)

process_batches(df, content_list, 'PhysicalInjury', 'Impact', physical_injury_input_system, physical_injury_task, 'PhysicalInjury', batch_size=1000)


In [None]:
legal_barriers_input_system = (
    "You are an expert NLP model specialized in analyzing trauma narratives related to sexual violence on social media platforms. "
    "Your goal is to determine whether the victim is describing legal barriers they are facing as a result of the violence they experienced."
)

legal_barriers_task = (
    "Analyze the following social media post and determine whether the victim is describing legal barriers. "
    "This includes challenges in accessing legal resources, facing difficulties in filing reports, dealing with a lack of legal support, or experiencing injustice within the legal system. "
    "For example:\n"
    "- A victim might mention that they cannot afford legal representation and are unable to pursue their case.\n"
    "- A victim might describe being turned away by the police when they tried to file a report.\n"
    "- A victim might express frustration about a lack of legal resources or support available to them.\n"
    "- A victim might discuss how the legal system failed to deliver justice or protect them.\n"
    "Consider only the information explicitly stated in the post. Avoid making assumptions or inferences. "
    "Return the result strictly in the following format:\n" 
    '''{"LegalBarriers": "Yes/No"}''' "\n"
    "Return only this format with no additional text, explanations, or options. Do not provide any other output. Below is the input text:"
)

process_batches(df, content_list, 'LegalBarriers', 'Impact', legal_barriers_input_system, legal_barriers_task, 'LegalBarriers', batch_size=1000)


In [None]:
financial_instability_input_system = (
    "You are an expert NLP model specialized in analyzing trauma narratives related to sexual violence on social media platforms. "
    "Your goal is to determine whether the victim is describing financial instability as a result of the violence they encountered."
)

financial_instability_task = (
    "Analyze the following social media post and determine whether the victim mentions experiencing 'Financial Instability'. "
    "This includes job loss or increased expenses caused by the abuse. Victims may struggle to regain independence or cover basic needs. "
    "Example: 'I lost my job and can’t afford rent because of the time I took off to recover.'"
    "Consider only the information explicitly stated in the post. Avoid making assumptions or inferences. "
    "Return the result strictly in the following format:\n" 
    '''{"FinancialInstability": "Yes/No"}''' "\n"
    "Return only this format with no additional text, explanations, or options. Do not provide any other output. Below is the input text:"
)

process_batches(df, content_list, 'FinancialInstability', 'Impact', financial_instability_input_system, financial_instability_task, 'FinancialInstability', batch_size=1000)


In [None]:
self_harming_input_system = (
    "You are an expert NLP model specialized in analyzing trauma narratives related to sexual violence on social media platforms. "
    "Your goal is to determine whether the victim is describing self-harming behaviors as a result of the violence they experienced."
)

self_harming_task = (
    "Analyze the following social media post and determine whether the victim is describing self-harming behaviors. "
    "This includes references to cutting, burning, overdosing (OD), or any other form of self-injury as a way of coping with the trauma. "
    "This also includes the thought of self-harming. "
    "Consider only the information explicitly stated in the post. Avoid making assumptions or inferences. "
    "Return the result strictly in the following format:\n" 
    '''{"SelfHarming": "Yes/No"}''' "\n"
    "Return only this format with no additional text, explanations, or options. Do not provide any other output. Below is the input text:"
)

process_batches(df, content_list, 'SelfHarming', 'Impact', self_harming_input_system, self_harming_task, 'SelfHarming', batch_size=1000)
