Initializing TOOL

In [1]:
import os
import requests
import urllib.parse
from pydantic import BaseModel
from crewai import Agent, Task, Crew, LLM
from crewai.tools.structured_tool import CrewStructuredTool
def get_uniprot(function_keyword: str):
    if not function_keyword:
        raise ValueError("Function keyword must be a non-empty string.")
    # Build and encode the query
    query = f'((cc_function:"{function_keyword}"))'
    encoded_query = urllib.parse.quote(query)
    url = f"https://rest.uniprot.org/uniprotkb/search?format=json&query={encoded_query}&size=1"
    print("Requesting URL:", url)
    try:
        response = requests.get(url)
        response.raise_for_status()
        data = response.json()
    except Exception as e:
        print("Error querying UniProt:", e)
        return None
    if not data:
        print("No UniProt entries found for function:", function_keyword)
        return None
    return data

# Define the schema for the tool's input using Pydantic
class UniprotInput(BaseModel):
    function_keyword: str

# Updated wrapper that accepts a parameter matching the schema field
def uniprot_tool_wrapper(function_keyword: str) -> dict:
    result = get_uniprot(function_keyword)
    if result is None:
        return {"error": "No data found or an error occurred while querying UniProt."}
    return result

# Create and return the structured tool for UniProt querying
def create_uniprot_tool():
    return CrewStructuredTool.from_function(
        name="UniProt Fetcher",
        description="Fetches UniProt entries based on a function keyword using the UniProt REST API.",
        args_schema=UniprotInput,
        func=uniprot_tool_wrapper,
    )

# Instantiate the UniProt fetcher tool
uniprot_tool = create_uniprot_tool()


Initializing Crew

In [3]:

import pydantic
from crewai import Agent, Task, Crew, LLM
import os



GROQ_API_KEY=""
os.environ["GROQ_API_KEY"]=GROQ_API_KEY

llm = LLM(
    model="groq/gemma2-9b-it",
    temperature=0.7
)
planner = Agent(
    role="Content Planner",
    goal="Plan series of steps and procedure to create a protein based on given input : accurate content on {userinput}",
    backstory="",
    allow_delegation=False,
	verbose=True,
    llm=llm
)
query_generator = Agent(
    role="uniprot_query_generator",
    goal="Generates a UniProt query from a given protein function: {userinput}"
         "Ensure the query retrieves relevant proteins",
    backstory=
            "Designed as a highly specialized bioinformatics assistant"
            " A bioinformatics assistant trained to construct precise UniProt queries based on a protein function description: {userinput}. "
            " this agent leverages natural language processing (NLP) techniques to transform textual descriptions of protein functions into precise UniProt queries."
            " With an in-depth understanding of protein ontology, biochemical pathways, and molecular interactions, the agent ensures that every generated query retrieves highly relevant protein data. Whether working with simple function descriptions or complex multi-functional proteins, the agent efficiently extracts key terms, maps them to appropriate UniProt search fields, and optimizes query parameters for high recall and precision."
            " This capability aids researchers, bioinformaticians, and computational biologists in quickly identifying proteins of interest, expediting research workflows in structural biology, drug discovery, and synthetic biology.",
    tools=[uniprot_tool],
    verbose=True,
    llm=llm,
    output_pydantic = UniprotInput
)
plan = Task(
    description=(
        "1. Extract key biological terms from a given protein function description. "
            " {userinput}.\n"
        "2. Map extracted terms to UniProt search fields  "
            "and controlled vocabularies.\n"
        "3. Generate a structured UniProt query optimized  "
            "for accuracy and recall.\n"
        "4. Validate and refine the query to ensure relevant search results."
    ),
    expected_output="Uniprot query for the given protein function description",
    agent=query_generator
)
crew = Crew(
    agents=[query_generator],
    tasks=[plan],
    verbose=True
)
result = crew.kickoff(inputs={"userinput": "I want a protein that can help in the digestion of food."})
from IPython.display import Markdown
Markdown(str(result))

Overriding of current TracerProvider is not allowed


[1m[95m# Agent:[00m [1m[92muniprot_query_generator[00m
[95m## Task:[00m [92m1. Extract key biological terms from a given protein function description.  I want a protein that can help in the digestion of food..
2. Map extracted terms to UniProt search fields  and controlled vocabularies.
3. Generate a structured UniProt query optimized  for accuracy and recall.
4. Validate and refine the query to ensure relevant search results.[00m
Requesting URL: https://rest.uniprot.org/uniprotkb/search?format=json&query=%28%28cc_function%3A%22digestion%22%29%29&size=1


[1m[95m# Agent:[00m [1m[92muniprot_query_generator[00m
[95m## Using tool:[00m [92mUniProt Fetcher[00m
[95m## Tool Input:[00m [92m
"{\"function_keyword\": \"digestion\"}"[00m
[95m## Tool Output:[00m [92m
{'results': [{'entryType': 'UniProtKB reviewed (Swiss-Prot)', 'primaryAccession': 'P80111', 'uniProtkbId': 'ANTR_AQUCT', 'entryAudit': {'firstPublicDate': '1992-08-01', 'lastAnnotationUpdateDate': '2024-05-29

{"function_keyword": "digestion"}