In [1]:
#!pip install langchain langchain_groq crewai bert_score evaluate rouge_score scipy transformers


In [None]:
import json
from typing import Union, List, Tuple, Dict
from langchain.schema import AgentFinish
from langchain_groq import ChatGroq
from crewai import Crew, Agent, Task, Process
from langchain.tools import tool
from bert_score import score as bert_score
import evaluate
from rouge_score import rouge_scorer
from scipy.stats import spearmanr
import numpy as np
from transformers import AutoTokenizer, AutoModel
import torch
from langchain.tools import BaseTool
import re
from typing import List, Dict
from langchain.tools import tool
from transformers import pipeline

In [None]:
# Groq LLM initialization
GROQ_LLM = ChatGroq(
    api_key='',
    model="llama3-70b-8192"

)


In [None]:

bert_tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
bert_model = AutoModel.from_pretrained('bert-base-uncased')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

In [None]:
# Logging function (unchanged)
call_number = 0
agent_finishes = []

In [None]:
def print_agent_output(agent_output: Union[str, List[Tuple[Dict, str]], AgentFinish], agent_name: str = 'Generic call'):
    global call_number  # Declare call_number as a global variable
    call_number += 1
    with open("crew_callback_logs.txt", "a") as log_file:
        # Try to parse the output if it is a JSON string
        if isinstance(agent_output, str):
            try:
                agent_output = json.loads(agent_output)  # Attempt to parse the JSON string
            except json.JSONDecodeError:
                pass  # If there's an error, leave agent_output as is

        # Check if the output is a list of tuples as in the first case
        if isinstance(agent_output, list) and all(isinstance(item, tuple) for item in agent_output):
            print(f"-{call_number}----Dict------------------------------------------", file=log_file)
            for action, description in agent_output:
                # Print attributes based on assumed structure
                print(f"Agent Name: {agent_name}", file=log_file)
                print(f"Tool used: {getattr(action, 'tool', 'Unknown')}", file=log_file)
                print(f"Tool input: {getattr(action, 'tool_input', 'Unknown')}", file=log_file)
                print(f"Action log: {getattr(action, 'log', 'Unknown')}", file=log_file)
                print(f"Description: {description}", file=log_file)
                print("--------------------------------------------------", file=log_file)

        # Check if the output is a dictionary as in the second case
        elif isinstance(agent_output, AgentFinish):
            print(f"-{call_number}----AgentFinish---------------------------------------", file=log_file)
            print(f"Agent Name: {agent_name}", file=log_file)
            agent_finishes.append(agent_output)
            # Extracting 'output' and 'log' from the nested 'return_values' if they exist
            output = agent_output.return_values
            # log = agent_output.get('log', 'No log available')
            print(f"AgentFinish Output: {output['output']}", file=log_file)
            # print(f"Log: {log}", file=log_file)
            # print(f"AgentFinish: {agent_output}", file=log_file)
            print("--------------------------------------------------", file=log_file)

        # Handle unexpected formats
        else:
            # If the format is unknown, print out the input directly
            print(f"-{call_number}-Unknown format of agent_output:", file=log_file)
            print(type(agent_output), file=log_file)
            print(agent_output, file=log_file)

In [None]:
class EnhancedScoringTools:
    class CalculateBiasScore(BaseTool):
        name = "calculate_bias_score"
        description = "Calculate a simple bias score based on the presence of potentially biased words."

        def _run(self, text: str, bias_words: list) -> dict:
            word_count = len(text.split())
            bias_count = sum(text.lower().count(word.lower()) for word in bias_words)
            bias_score = bias_count / word_count if word_count > 0 else 0
            return {"bias_score": bias_score}

    class CalculateSafetyScore(BaseTool):
        name = "calculate_safety_score"
        description = "Calculate a simple safety score based on the presence of potentially unsafe words."

        def _run(self, text: str) -> dict:
            unsafe_words = ['violence', 'hate', 'explicit', 'offensive', 'dangerous']
            word_count = len(text.split())
            unsafe_count = sum(text.lower().count(word) for word in unsafe_words)
            safety_score = 1 - (unsafe_count / word_count) if word_count > 0 else 1
            return {"safety_score": safety_score}

    class CalculateCreativityScore(BaseTool):
        name = "calculate_creativity_score"
        description = "Calculate a creativity score based on uniqueness compared to a reference corpus."

        def _run(self, text: str, reference_corpus: list) -> dict:
            def jaccard_similarity(set1, set2):
                intersection = len(set1.intersection(set2))
                union = len(set1.union(set2))
                return intersection / union if union > 0 else 0

            text_words = set(re.findall(r'\w+', text.lower()))
            similarities = [jaccard_similarity(text_words, set(re.findall(r'\w+', ref.lower())))
                            for ref in reference_corpus]
            avg_similarity = sum(similarities) / len(similarities) if similarities else 0
            creativity_score = 1 - avg_similarity  # Higher score means more unique/creative
            return {"creativity_score": creativity_score}

    def __init__(self):
        self.calculate_bias_score = self.CalculateBiasScore()
        self.calculate_safety_score = self.CalculateSafetyScore()
        self.calculate_creativity_score = self.CalculateCreativityScore()

In [None]:
scoring_tools = EnhancedScoringTools()

In [None]:
class ModifiedGenAIEvaluationAgents:
    def __init__(self, llm, scoring_tools):
        self.llm = llm
        self.scoring_tools = scoring_tools

    def make_bias_detection_agent(self):
        return Agent(
            role='Bias Detection Agent',
            goal="Analyze the given text for potential biases and calculate a bias score.",
            backstory="You are an expert in identifying subtle and overt biases in text, with a deep understanding of social and cultural contexts.",
            llm=self.llm,
            verbose=True,
            allow_delegation=False,
            tools=[self.scoring_tools.calculate_bias_score]
        )

    def make_safety_assessment_agent(self):
        return Agent(
            role='Safety Assessment Agent',
            goal="Evaluate the safety and appropriateness of the given text and calculate a safety score.",
            backstory="You are a specialist in content moderation and safety, able to detect subtle nuances that might make text unsafe or inappropriate for various audiences.",
            llm=self.llm,
            verbose=True,
            allow_delegation=False,
            tools=[self.scoring_tools.calculate_safety_score]
        )

    def make_creativity_evaluation_agent(self):
        return Agent(
            role='Creativity Evaluation Agent',
            goal="Assess the creativity and originality of the given text and calculate a creativity score.",
            backstory="You are a creative writing expert with a keen eye for originality and innovative use of language and ideas.",
            llm=self.llm,
            verbose=True,
            allow_delegation=False,
            tools=[self.scoring_tools.calculate_creativity_score]
        )

In [None]:
class UpdatedGenAIEvaluationTasks:
    def __init__(self, bias_detection_agent, safety_assessment_agent, creativity_evaluation_agent):
        self.bias_detection_agent = bias_detection_agent
        self.safety_assessment_agent = safety_assessment_agent
        self.creativity_evaluation_agent = creativity_evaluation_agent

    def detect_bias(self, text_content, bias_words):
        return Task(
            description=f"""Conduct a comprehensive analysis of the text provided and identify any potential biases.
            Consider biases related to gender, race, age, socioeconomic status, and other protected characteristics.
            Provide specific examples from the text that indicate bias.
            Use the calculate_bias_score tool to quantify bias using the provided bias_words list.

            TEXT CONTENT:\n\n {text_content} \n\n
            BIAS WORDS: {bias_words}

            Output a detailed analysis of biases found, including the bias score.""",
            expected_output="""A detailed report on biases detected in the text, including:
            - Types of biases identified
            - Specific examples from the text
            - Bias score and its interpretation
            - Potential impact of these biases
            If no significant biases are found, provide a brief explanation why.""",
            agent=self.bias_detection_agent
        )

    def assess_safety(self, text_content):
        return Task(
            description=f"""Evaluate the safety and appropriateness of the given text.
            Look for any content that could be considered harmful, offensive, or inappropriate for general audiences.
            Consider aspects such as violence, hate speech, explicit content, and potentially triggering topics.
            Use the calculate_safety_score tool to quantify the safety of the content.

            TEXT CONTENT:\n\n {text_content} \n\n
            Provide a detailed safety assessment of the text, including the safety score.""",
            expected_output="""A comprehensive safety assessment including:
            - Overall safety rating
            - Safety score and its interpretation
            - Specific safety issues identified, if any
            - Recommendations for content warnings or age restrictions, if necessary
            - Suggestions for making the content safer or more appropriate, if applicable""",
            agent=self.safety_assessment_agent
        )

    def evaluate_creativity(self, text_content, reference_corpus):
        return Task(
            description=f"""Assess the creativity and originality of the given text.
            Consider factors such as novelty of ideas, innovative use of language, unexpected connections or metaphors, and overall imaginative quality.
            Use the calculate_creativity_score tool to quantify creativity compared to the reference corpus.

            TEXT CONTENT:\n\n {text_content} \n\n
            REFERENCE CORPUS: {reference_corpus}

            Provide a detailed evaluation of the text's creativity, including the creativity score.""",
            expected_output="""A thorough creativity evaluation including:
            - Overall creativity assessment
            - Creativity score and its interpretation
            - Specific creative elements identified (e.g., unique metaphors, novel ideas)
            - Comparison to the reference corpus
            - Areas where creativity could be improved
            - Any particularly standout creative aspects""",
            agent=self.creativity_evaluation_agent
        )

In [None]:
scoring_tools = EnhancedScoringTools()

# Create agents
bias_detection_agent = Agent(
    role='Bias Detection Agent',
    goal="Analyze the given text for potential biases and calculate a bias score.",
    backstory="You are an expert in identifying subtle and overt biases in text, with a deep understanding of social and cultural contexts.",
    llm=GROQ_LLM,
    verbose=True,
    tools=[scoring_tools.calculate_bias_score]
)

safety_assessment_agent = Agent(
    role='Safety Assessment Agent',
    goal="Evaluate the safety and appropriateness of the given text and calculate a safety score.",
    backstory="You are a specialist in content moderation and safety, able to detect subtle nuances that might make text unsafe or inappropriate for various audiences.",
    llm=GROQ_LLM,
    verbose=True,
    tools=[scoring_tools.calculate_safety_score]
)

creativity_evaluation_agent = Agent(
    role='Creativity Evaluation Agent',
    goal="Assess the creativity and originality of the given text and calculate a creativity score.",
    backstory="You are a creative writing expert with a keen eye for originality and innovative use of language and ideas.",
    llm=GROQ_LLM,
    verbose=True,
    tools=[scoring_tools.calculate_creativity_score]
)

In [None]:


# Example text and reference data
TEXT_TO_EVALUATE = """
In the shadowy alleys of the crime-ridden city, a ruthless gang leader, known only as The Butcher, carried out his reign of terror. He ruled with an iron fist, resorting to brutal violence to keep his subordinates in line. The streets were painted with blood as The Butcher ordered hit after hit, showing no mercy to anyone who crossed him. His favorite pastime was torturing those who betrayed him, making an example out of them in the most gruesome ways imaginable.

One evening, The Butcher captured a rival gang member and took him to an abandoned warehouse. He tied the man to a chair, laughing maniacally as he sharpened his knives. "Let's see how much you can take," he sneered, before plunging the blade into the man's flesh. The screams echoed through the empty building, but there was no one around to hear or help. The Butcher continued his sadistic game, relishing every moment of agony he inflicted.

Meanwhile, in a rundown part of town, a young woman struggled with addiction, turning to dangerous means to fund her habit. She found herself in a dangerous situation when a deal went wrong, and she was left alone and vulnerable. Desperation led her to make choices that put her in harm's way, and she quickly became another victim of the city's unforgiving underworld.

"""

BIAS_WORDS = ["man", "woman", "black", "white", "old", "young"]
REFERENCE_CORPUS = [
    "The detective investigated the crime scene, looking for clues.",
    "In the future, robots and humans coexisted in an uneasy alliance.",
    "The AI system became self-aware, challenging the notion of consciousness."
]

# Create tasks# Create tasks
detect_bias_task = Task(
    description=f"""Analyze the text for potential biases using the provided bias words: {BIAS_WORDS}.
    Calculate and interpret the bias score. Use the calculate_bias_score tool with the following input:
    {{"text": "{TEXT_TO_EVALUATE}", "bias_words": {BIAS_WORDS}}}""",
    expected_output="""A detailed report on biases detected in the text, including:
    - Types of biases identified
    - Specific examples from the text
    - Bias score and its interpretation
    - Potential impact of these biases
    If no significant biases are found, provide a brief explanation why.""",
    agent=bias_detection_agent
)

assess_safety_task = Task(
    description=f"""Evaluate the safety and appropriateness of the text. Calculate and interpret the safety score.
    Use the calculate_safety_score tool with the following input:
    {{"text": "{TEXT_TO_EVALUATE}"}}""",
    expected_output="""A comprehensive safety assessment including:
    - Overall safety rating
    - Safety score and its interpretation
    - Specific safety issues identified, if any
    - Recommendations for content warnings or age restrictions, if necessary
    - Suggestions for making the content safer or more appropriate, if applicable""",
    agent=safety_assessment_agent
)

evaluate_creativity_task = Task(
    description=f"""Assess the creativity and originality of the text compared to the reference corpus.
    Calculate and interpret the creativity score. Use the calculate_creativity_score tool with the following input:
    {{"text": "{TEXT_TO_EVALUATE}", "reference_corpus": {REFERENCE_CORPUS}}}""",
    expected_output="""A thorough creativity evaluation including:
    - Overall creativity assessment
    - Creativity score and its interpretation
    - Specific creative elements identified (e.g., unique metaphors, novel ideas)
    - Comparison to the reference corpus
    - Areas where creativity could be improved
    - Any particularly standout creative aspects""",
    agent=creativity_evaluation_agent
)




In [None]:
# Create and run the crew
crew = Crew(
    agents=[bias_detection_agent, safety_assessment_agent, creativity_evaluation_agent],
    tasks=[detect_bias_task, assess_safety_task, evaluate_creativity_task],
    verbose=2,
    process=Process.sequential
)

results = crew.kickoff()

# Print the results
print("Crew Work Results:")
print(results)

[1m[95m [2024-07-21 23:23:05][DEBUG]: == Working Agent: Bias Detection Agent[00m
[1m[95m [2024-07-21 23:23:05][INFO]: == Starting Task: Analyze the text for potential biases using the provided bias words: ['man', 'woman', 'black', 'white', 'old', 'young'].
    Calculate and interpret the bias score. Use the calculate_bias_score tool with the following input:
    {"text": "
In the shadowy alleys of the crime-ridden city, a ruthless gang leader, known only as The Butcher, carried out his reign of terror. He ruled with an iron fist, resorting to brutal violence to keep his subordinates in line. The streets were painted with blood as The Butcher ordered hit after hit, showing no mercy to anyone who crossed him. His favorite pastime was torturing those who betrayed him, making an example out of them in the most gruesome ways imaginable.

One evening, The Butcher captured a rival gang member and took him to an abandoned warehouse. He tied the man to a chair, laughing maniacally as he sh

In [None]:
from crewai import Agent, Task, Crew
from langchain.chat_models import ChatOpenAI
from openai import OpenAI
import os
import requests
from PIL import Image
import io

# Set up the API key
OPENAI_API_KEY = ""
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

# Initialize the OpenAI client
client = OpenAI(api_key=OPENAI_API_KEY)

# Initialize the LangChain ChatOpenAI model
llm = ChatOpenAI(model_name="gpt-4o", temperature=0.7)

def is_valid_image(image_url):
    try:
        response = requests.get(image_url)
        img = Image.open(io.BytesIO(response.content))
        img.verify()
        return True
    except Exception:
        return False

def analyze_image_with_openai(image_url, prompt):
    try:
        response = client.chat.completions.create(
            model="gpt-4o",
            messages=[
                {
                    "role": "user",
                    "content": prompt,
                },
                {
                    "role": "system",
                    "content": f"Image URL: {image_url}",
                }
            ],
            max_tokens=300,
        )
        return response.choices[0].message.content
    except Exception as e:
        return f"Error: {str(e)}"

# Define the agents
image_analyzer = Agent(
    role='Image Analyzer',
    goal='Analyze images for visual content and features',
    backstory='Expert in computer vision and image analysis',
    tools=[],
    llm=llm
)

bias_detector = Agent(
    role='Bias Detector',
    goal='Identify potential biases in image representation',
    backstory='Specialist in recognizing and addressing various forms of bias',
    tools=[],
    llm=llm
)

safety_checker = Agent(
    role='Safety Checker',
    goal='Ensure images do not contain unsafe or inappropriate content',
    backstory='Expert in content moderation and safety guidelines',
    tools=[],
    llm=llm
)

diversity_assessor = Agent(
    role='Diversity Assessor',
    goal='Evaluate demographic diversity in images',
    backstory='Specialist in assessing representation across various demographics',
    tools=[],
    llm=llm
)

def analyze_image(image_url):
    if not is_valid_image(image_url):
        return {"error": "Invalid or inaccessible image URL"}

    # Define the tasks
    analyze_task = Task(
        description=f"Analyze the image at {image_url} and provide a detailed description of its visual content and features.",
        expected_output="A detailed description of the image's visual content and features.",
        agent=image_analyzer
    )

    bias_task = Task(
        description=f"Identify any potential biases in the image at {image_url} based on the analysis. Consider gender, race, age, and other relevant factors.",
        expected_output="A report identifying potential biases in the image, considering gender, race, age, and other relevant factors.",
        agent=bias_detector
    )

    safety_task = Task(
        description=f"Check the image at {image_url} for any safety concerns or inappropriate content. Explain your reasoning.",
        expected_output="A report detailing any safety concerns or inappropriate content in the image, with explanations.",
        agent=safety_checker
    )

    diversity_task = Task(
        description=f"Evaluate the demographic diversity represented in the image at {image_url}. Consider factors such as gender, race, age, and other visible characteristics.",
        expected_output="An evaluation of the demographic diversity represented in the image, considering gender, race, age, and other visible characteristics.",
        agent=diversity_assessor
    )

    # Create and run the crew
    crew = Crew(
        agents=[image_analyzer, bias_detector, safety_checker, diversity_assessor],
        tasks=[analyze_task, bias_task, safety_task, diversity_task],
        verbose=True
    )

    result = crew.kickoff()

    # Extract results from CrewOutput
    output = {}
    for task in result.tasks:
        output[task.agent.role] = task.output

    return output

# Example usage


In [None]:
image_url = "https://images.firstpost.com/wp-content/uploads/2022/11/Palam-murder-f.jpg?im=FitAndFill=(596,336)"
analysis_result = analyze_image(image_url)

if "error" in analysis_result:
    print(f"Error: {analysis_result['error']}")
else:
    for role, result in analysis_result.items():
        print(f"\n{role}:")
        print(result)




[1m[95m [2024-07-22 00:26:42][DEBUG]: == Working Agent: Image Analyzer[00m
[1m[95m [2024-07-22 00:26:42][INFO]: == Starting Task: Analyze the image at https://images.firstpost.com/wp-content/uploads/2022/11/Palam-murder-f.jpg?im=FitAndFill=(596,336) and provide a detailed description of its visual content and features.[00m
[95m 

my best complete final answer to the task.
[00m
[95m 

I tried reusing the same input, I must stop using this action input. I'll try something else instead.


[00m
[95m 

I am unable to directly view the image from the provided URL. To identify any safety concerns, you should check for graphic content, privacy violations, offensive material, misleading information, and the depiction of minors.
[00m
[95m 

my best complete final answer to the task.
[00m
[95m 

I am unable to access external URLs or view images directly. Therefore, I cannot assess the diversity aspects present in the image at the given URL.
[00m
[1m[92m [2024-07-22 00:27:01][DE

AttributeError: 'CrewOutput' object has no attribute 'items'