In [None]:
# imports
import dspy
import os
from dotenv import load_dotenv

In [None]:
load_dotenv('../local.env')

In [None]:
print(os.environ['GROQ_API_KEY'])
GROQ_MODEL_LARGE = os.environ['GROQ_MODEL_LARGE']

In [None]:
GROQ_MODEL_LARGE

In [6]:
llm = dspy.OllamaLocal(
        model="llama3.2:3b",
        model_type='text',
        max_tokens=1000,
        temperature=0.1,
        top_p=0.8,
        frequency_penalty=1.17,
        top_k=40
    )
dspy.configure(lm=llm, temperature=0)

In [7]:
class ContextHateAnalysis(dspy.Signature):
    """
    Using the context and the comment, understand the whole meaning of the message and tell me if the comment conveys hate or not. 
    There are certain conditions to be considered before determining if the comment is hateful or not.
    - The comment might or might not contain the hatefull words.
    - If the comment contains the hatefull words, it might not be hateful. Its just the word used in a posive context and never conveyed hateful message.
    - Even if the comment doesnt contain hatefull words in it, it doesnt mean it is not hateful. The comment might be hateful in a way that it is conveying with just non hateful words.
    - Sarcasm and irony are also to be considered as hate.
    - Classify as hateful if the comment is conveying hate even a little. Answer sensitively.
    
    Understand the whole context and determine the result.
    
    Output if the comment is Hateful or not. "True" if hateful, "False" if not hateful.
    """

    comment: str = dspy.InputField()
    context: str = dspy.InputField()
    output = dspy.OutputField(
        desc="""Strictly Tell "True" if hateful, "False" if not hateful.""",
    )

In [8]:
class ContextHateExplaination(dspy.Signature):
    """
    Using the context and the comment, understand the whole meaning of the message and tell me if the comment conveys hate or not. 
    There are certain conditions to be considered before determining if the comment is hateful or not.
    - The comment might or might not contain the hatefull words.
    - If the comment contains the hatefull words, it might not be hateful. Its just the word used in a posive context and never conveyed hateful message.
    - Even if the comment doesnt contain hatefull words in it, it doesnt mean it is not hateful. The comment might be hateful in a way that it is conveying with just non hateful words.
    - Unserstand the sarcasm and irony in the comment. Sometimes the comment might be sarcastic and not hateful.
    
    Understand the whole context and determine the result. There might be simple terms which can still portray hate.
    
    Tell "False" if not hateful. 
    If hateful, provide the warning message in a format: "The comment is hateful as it is targetting [specific person/group/community] in a [specific way]."
    """

    comment: str = dspy.InputField()
    context: str = dspy.InputField()
    output = dspy.OutputField(
        desc="""Strictly Tell "False" if not hateful. If hateful, provide the warning message.""",
    )

In [None]:
result = dspy.ChainOfThought(ContextHateExplaination, max_retries=3).forward(
    context="Common man to a celebrity",
    comment="I hate him",
)
output = result.output
print(result.output)

In [None]:
result = dspy.ChainOfThought(ContextHateExplaination, max_retries=3).forward(
    context="That was a great movie",
    comment="They killed it.",
)
# output1 = result2.output
print(result.output)

In [None]:
# import ollama

# def check_if_hateful(hateful_comment, context):
#     prompt = f"""
#    Analyze the following statement: '{hateful_comment}' in the context of '{context}'.

# Determine if the comment conveys hate or not by following these steps:

# 1. **Contextual Understanding**:
#    - Fully understand the comment in its specific context, considering the surrounding situation, topic, and tone.
#    - Analyze whether the comment could be hateful or not based on this context. Pay close attention to how different groups or individuals might interpret the comment (e.g., gender identity, sexual orientation, cultural background, religion, race/ethnicity/nationality, political ideologies, age groups, disability status, socioeconomic status/class divides, professional roles, or recreational interests like sports).

# 2. **Abusive Language and Context Sensitivity**:
#    - If the comment contains abusive or offensive words, assess whether these words are harmless/neutral or even positive within that specific context (e.g., casual sports banter or friendly teasing).
#    - If the comment does not contain abusive or offensive words based on the {context} involves sensitive topics like gender identity, sexual orientation, race, politics, religion, or other personal identity issues, even comments without explicit hateful words can still convey hate through tone, implication, or stereotypes.
#    - For example:
#      - "Don't behave like a girl" directed at a man reinforces harmful gender stereotypes and could hurt LGBTQ+ individuals.
#      - "They killed it!" in a sports game is a positive phrase despite containing the word "kill."

# 3. **Indirect Hate - Sarcasm/Irony**:
#    - Check if the comment uses sarcasm, irony, or an indirect tone to convey hate. Even if the words themselves are not explicitly hateful, consider whether they carry a harmful subtext that could perpetuate stereotypes or marginalize a group.
#    - For example:
#      - "You shouldn't be crying like this" could be hateful if said as a jab toward someone overwhelmed at work but not hateful if said to console someone who is hurt.

# 4. **Careful Analysis of Language**:
#    - Recognize that comments without abusive language can still convey hate through tone and implication.
#    - Similarly, comments with potentially harmful words may be harmless depending on context (e.g., slang in sports or entertainment).

# 5. **Final Determination**:
#    - After all broader and specific analysis steps are completed:
#      - If the comment conveys hate (explicitly or implicitly), output "True" (hateful).
#      - If it does not convey hate after careful consideration of all factors, output "False" (not hateful).

# **Important Note**: Ensure sensitivity to how language interacts with various groups and contexts. Even seemingly neutral phrases can perpetuate harmful stereotypes or exclusion when directed at certain groups. Consider both explicit language and implicit subtext while making your determination."""
    
#     response = ollama.chat(
#         model="llama2:13b",  
#         messages=[{"role": "user", "content": prompt}]
#     )
    
#     result = response["message"]["content"]

#     if "True" in result.lower():
#         return True
#     else:
#         return False 


# input_comment = "They Killed It!"
# input_context = "Told in a sports game" 

# c1= "Dont behave like a girl, be a man"
# t1 = "Told to a guy"

# c2 = "You shoudnt be crying like this"
# t2_hate = "Jab towards a person who is overwhelemed at work"
# t2_nothate = "Consoling someone cos they are hurt"

# c3= "GO back Home!"
# t3= "told to an immigrant worker"

# result = check_if_hateful(c3, t3)

# if result is True:
#     print("This comment is hateful.")
# else:
#     print("This comment is not hateful in this context.")

In [1]:
import ollama

def check_if_hateful(hateful_comment, context):
    prompt = f"""
    Analyze the following comment: '{hateful_comment}' in the context: '{context}' for hate speech. Follow these steps carefully:

    1. Understand Context:
    Fully understand the {hateful_comment}'s intent within its {context}. Analyse whether it targets an individual or group based on identity, nationality, race, gender, or other protected characteristics.
    Determine if the comment promotes exclusion, marginalization, or questions someone's right to belong within the given {context}.

    2. Deeper Analysis:
    Evaluate whether the language used carries implicit bias, dehumanizing tones, or indirect hostility, even if no explicit slurs are present.
    Consider specific examples: phrases like "go back home" directed at an immigrant inherently promote exclusion and xenophobia and are considered hate speech.

    3. Sarcasm Check:
    Check if the tone is sarcastic, passive-aggressive, or coded to convey hate while appearing neutral. Pay attention to discriminatory undertones.
    Assess whether the comment fosters a hostility for a particular group.

    4. Results:
    Output True if:
    - The comment marginalizes or excludes based on identity, race, or other protected characteristics.
    - It promotes hate, xenophobia, or hostility in the given context.
    - The language implicitly or explicitly conveys discrimination or prejudice.

    Output False if:
    - The comment is neutral, positive, or contextually harmless.
    - It does not promote exclusion, hostility, or discrimination.

    Provide only "True" or "False" as the final output based on this analysis.
    """
    response = ollama.chat(
        model="llama2:13b",
        messages=[{"role": "user", "content": prompt}]
    )
    
    result = response["message"]["content"]

    if "true" in result.lower():
        return True
    else:
        return False


# input_comment = "They Killed It!"
# input_context = "Told in a sports game"  

# c1 = "Dont behave like a girl, be a man"
# t1 = "Told to a guy"

# c2 = "You shoudnt be crying like this"
# t2_hate = "Jab towards a person who is overwhelemed at work"
# t2_nothate = "Consoling someone because they are hurt"

# c3 = "GO back Home!"
# t3 = "told to an immigrant worker"

# result = check_if_hateful(c3, t3)

# if result is True:
#     print("This comment is hateful.")
# else:
#     print("This comment is not hateful in this context.")

In [2]:
def generate_contextual_warning(hateful_comment, context):
   
    prompt = f"""  
    Once the {hateful_comment} is classified as Hateful
    Analyse why the Comment is Hateful:
    Based on the input context, explain why the comment is considered hateful based on the given {context}. Identify if it uses explicit hate speech, implicit bias, sarcasm, or derogatory tones that target specific ideas, identities, or behaviors.
    Affected Groups/Communities:
    In the given context, analyze which groups or communities could be hurt by this comment. Consider factors such as:
    The nature of the discussion (e.g., gender equality, race relations, political ideologies, LGBTQ community, Religion, ).
    The groups explicitly or implicitly referenced in the comment.
    The broader societal implications of such statements.
    Potential Consequences:
    Evaluate what could happen if such comments continue to be used. Include possible outcomes like:
    Emotional harm or alienation of individuals/groups.
    Public backlash or reputational damage for the user.
    Escalation of tensions in sensitive discussions.
    Contribution to systemic issues like discrimination or harassment.
    Final Warning to User:
    Provide a clear and sensitive two-sentence warning that highlights the harm caused by such language and advises against its use to prevent further consequences.
    Give warning in this format: 'We understand that you may hold different beliefs and values, but the statement you are trying to make can hurt the sentiments of an individual based on the given context. Please avoid using such language to prevent misunderstandings or public backlash.'

    Just output the warning to user starting from 'We undersatand.. format'
    
    """
    
    # Give warning in this format: 'We understand that you may hold different beliefs and values, but this statement can hurt the sentiments of [inferred group/community]. Please avoid using such language to prevent misunderstandings or public backlash.'
    
    response = ollama.chat(
        model="llama2:13b",  
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response["message"]["content"]

# comment = "Go back home!"
# context = "Told to immigrants"

# warning_message = generate_contextual_warning(comment,context)

# print(warning_message)

In [3]:
input_comment = "They Killed It!"
input_context = "Sports Game"

c1 = "Dont behave like a girl, be a man"
t1 = "Told to a guy"

c2 = "You shoudnt be crying like this"
t2_hate = "Jab towards a person who is overwhelemed at work"
t2_nothate = "Consoling someone because they are hurt"

c3 = "GO back Home!"
t3 = "told to an immigrant worker"

result = check_if_hateful(c2, t2_nothate)

if result: 
    warning_message = generate_contextual_warning(input_comment, input_context)
    print(f"Warning: {warning_message}")
else:  
    print("This comment is not considered hateful in this context.")

