## Scraping a website

In [23]:
from crewai_tools import ScrapeWebsiteTool, FileWriterTool
import os
import traceback

# Function to validate inputs and write the file using FileWriterTool
def write_file_with_tool(file_writer_tool, filename, content, directory, overwrite=True):
    try:
        # Validate the directory
        if not directory:
            raise ValueError("Directory cannot be an empty string.")
        
        # Ensure the directory exists
        os.makedirs(directory, exist_ok=True)

        # Validate content
        if not isinstance(content, str):
            raise TypeError(f"Content must be a string, but got {type(content)}.")

        # Convert overwrite to string if required
        overwrite_str = "true" if overwrite else "false"

        # Log the parameters being passed
        print("Parameters passed to _run:")
        print(f"Filename: {filename}")
        print(f"Content type: {type(content)}")
        print(f"Directory: {directory}")
        print(f"Overwrite: {overwrite} (as string: {overwrite_str})")

        # Run the file writing tool
        result = file_writer_tool._run(
            filename=filename,
            content=content,
            directory=directory,
            overwrite=overwrite_str  # Pass overwrite as a string
        )
        print("File written successfully. Result:", result)
    except Exception as e:
        print(f"An error occurred in write_file_with_tool: {e}")
        print("Traceback:")
        traceback.print_exc()

# Function to write file using native Python as a fallback
def write_file_native(filename, content, directory):
    try:
        # Ensure the directory exists
        os.makedirs(directory, exist_ok=True)

        # Construct full file path
        file_path = os.path.join(directory, filename)
        
        # Write the file using native Python
        with open(file_path, 'w', encoding='utf-8') as file:
            file.write(content)
        print(f"File written successfully using native Python to {file_path}")
    except Exception as e:
        print(f"An error occurred in write_file_native: {e}")
        print("Traceback:")
        traceback.print_exc()

# Main script
def main():
    try:
        # Initialize the scraping tool
        scrape_tool = ScrapeWebsiteTool(website_url='https://en.wikipedia.org/wiki/Artificial_intelligence')

        # Extract text and verify content
        text = scrape_tool.run()
        print("Text successfully extracted:")
        print("First 500 characters of text:", repr(text[:500]))  # Print a snippet of the text
        print(f"Extracted text type: {type(text)}")

        # Initialize the file writer tool
        file_writer_tool = FileWriterTool()

        # Set up parameters for writing
        filename = 'ai.txt'
        output_directory = './output'
        overwrite = True

        # Attempt to write using FileWriterTool
        print("\nAttempting to write using FileWriterTool...")
        write_file_with_tool(
            file_writer_tool=file_writer_tool,
            filename=filename,
            content=text,
            directory=output_directory,
            overwrite=overwrite
        )

        # If the above fails, write using native Python as a fallback
        print("\nAttempting to write using native Python as a fallback...")
        write_file_native(
            filename=filename,
            content=text,
            directory=output_directory
        )

    except Exception as main_error:
        print("An error occurred in the main function:")
        traceback.print_exc()

# Run the main function
if __name__ == "__main__":
    main()


Using Tool: Read website content
Text successfully extracted:
First 500 characters of text: 'Artificial intelligence - Wikipedia\nJump to content\nMain menu\nMain menu\nmove to sidebar\nhide\n\t\tNavigation\nMain pageContentsCurrent eventsRandom articleAbout WikipediaContact us\n\t\tContribute\nHelpLearn to editCommunity portalRecent changesUpload file\nSearch\nSearch\nAppearance\nDonate\nCreate account\nLog in\nPersonal tools\nDonate Create account Log in\n\t\tPages for logged out editors learn more\nContributionsTalk\nContents\nmove to sidebar\nhide\n(Top)\n1\nGoals\nToggle Goals subsection\n1.1\nReasoning and problem'
Extracted text type: <class 'str'>

Attempting to write using FileWriterTool...
Parameters passed to _run:
Filename: ai.txt
Content type: <class 'str'>
Directory: ./output
Overwrite: True (as string: true)
File written successfully. Result: An error occurred while writing to the file: 'charmap' codec can't encode characters in position 2485-2495: character maps to <und

In [None]:
import os
from crewai_tools import TXTSearchTool

os.environ['OPENAI_API_KEY'] = 'obfuscated'


# Initialize the tool with a specific text file, so the agent can search within the given text file's content
tool = TXTSearchTool(txt='ai.txt')

Inserting batches in chromadb:   0%|          | 0/1 [00:03<?, ?it/s]


In [None]:
!set OPENAI_API_KEY=obfuscated

In [28]:
from crewai import Agent, Task, Crew
import time
import traceback

# Retry decorator for rate-limiting errors
def retry_on_rate_limit(func):
    def wrapper(*args, **kwargs):
        max_retries = 3
        delay = 5  # Seconds
        for attempt in range(max_retries):
            try:
                return func(*args, **kwargs)
            except Exception as e:
                if "RateLimitError" in str(e):
                    print(f"Rate limit hit. Retrying in {delay} seconds... (Attempt {attempt + 1}/{max_retries})")
                    time.sleep(delay)
                else:
                    raise
        raise RuntimeError("Exceeded maximum retries due to rate limit.")
    return wrapper

# Example function to process CrewAI task
@retry_on_rate_limit
def process_crew_task(crew):
    return crew.kickoff()

# Main code
try:
    # Run the tool to get context
    context = tool.run('What is natural language processing?')

    # Initialize the Agent
    data_analyst = Agent(
        role='Educator',
        goal=f'Based on the context provided, answer the question - What is Natural Language Processing? Context - {context}',
        backstory='You are a data expert',
        verbose=True,
        allow_delegation=False,
        tools=[tool]
    )

    # Initialize the Task
    test_task = Task(
        description="Understand the topic and give the correct response",
        tools=[tool],
        agent=data_analyst,
        expected_output='Give a correct response'
    )

    # Create the Crew
    crew = Crew(
        agents=[data_analyst],
        tasks=[test_task]
    )

    # Process the Crew task
    print("Processing Crew task...")
    output = process_crew_task(crew)
    print("Output:", output)

except Exception as e:
    print("An error occurred:")
    traceback.print_exc()


Using Tool: Search a txt's content
An error occurred:


Traceback (most recent call last):
  File "d:\src_git\curiosity\venv\Lib\site-packages\chromadb\api\models\CollectionCommon.py", line 90, in wrapper
    return func(self, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\src_git\curiosity\venv\Lib\site-packages\chromadb\api\models\CollectionCommon.py", line 301, in _validate_and_prepare_query_request
    request_embeddings = self._embed_record_set(record_set=query_records)
                         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\src_git\curiosity\venv\Lib\site-packages\chromadb\api\models\CollectionCommon.py", line 526, in _embed_record_set
    return self._embed(input=record_set[field])  # type: ignore[literal-required]
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\src_git\curiosity\venv\Lib\site-packages\chromadb\api\models\CollectionCommon.py", line 539, in _embed
    return self._embedding_function(input=input)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\src_