Initializing TOOL

In [1]:
import os
import json
import requests
import urllib.parse
from pydantic import BaseModel
from crewai.tools.structured_tool import CrewStructuredTool
def get_uniprot(function_keyword: str):
    if not function_keyword:
        raise ValueError("Function keyword must be a non-empty string.")
    # Build and encode the query
    query = f'((cc_function:"{function_keyword}"))'
    encoded_query = urllib.parse.quote(query)
    url = f"https://rest.uniprot.org/uniprotkb/search?format=json&query={encoded_query}&size=1"
    print("Requesting URL:", url)
    try:
        response = requests.get(url)
        response.raise_for_status()
        data = response.json()
    except Exception as e:
        print("Error querying UniProt:", e)
        return None
    if not data:
        print("No UniProt entries found for function:", function_keyword)
        return None
    # Define the filename for the new JSON file
    filename = f"uniprot_{function_keyword.replace(' ', '_')}.json"
    
    # Save the data to a new JSON file
    with open(filename, "w") as f:
        json.dump(data, f, indent=4)  # Create a new JSON file with pretty formatting
    
    print(f"Data saved to {filename}")
    return data

# Define the schema for the tool's input using Pydantic
class UniprotInput(BaseModel):
    function_keyword: str

# Updated wrapper that accepts a parameter matching the schema field
def uniprot_tool_wrapper(function_keyword: str) -> dict:
    result = get_uniprot(function_keyword)
    if result is None:
        return {"error": "No data found or an error occurred while querying UniProt."}
    return result

# Create and return the structured tool for UniProt querying
def create_uniprot_tool():
    return CrewStructuredTool.from_function(
        name="UniProt Fetcher",
        description="Fetches UniProt entries based on a function keyword using the UniProt REST API.",
        args_schema=UniprotInput,
        func=uniprot_tool_wrapper,
    )

# Instantiate the UniProt fetcher tool
uniprot_tool = create_uniprot_tool()


Initializing Crew

In [None]:
from crewai import Agent, Task, Crew, LLM
import os
GROQ_API_KEY="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"
os.environ["GROQ_API_KEY"]=GROQ_API_KEY

llm = LLM(
    model="groq/gemma2-9b-it",
    temperature=0.7
)
'''os.environ["GOOGLE_API_KEY"] = "AIzaSyAV_-poIkqoLGRYYqGnWxMKMDy-m4Q-EAw"  
llm = LLM(
    model="gemini/gemini-2.0-flash"
)'''
planner = Agent(
    role="Content Planner",
    goal="Plan series of steps and procedure to create a protein based on given input : accurate content on {userinput}",
    backstory="",
    allow_delegation=False,
	verbose=True,
    llm=llm
)
query_generator = Agent(
    role="uniprot_query_generator",
    goal="Generates a UniProt query from a given protein function: {userinput}"
         "Ensure the query retrieves relevant proteins",
    backstory=
            "Designed as a highly specialized bioinformatics assistant"
            " A bioinformatics assistant trained to construct precise UniProt queries based on a protein function description: {userinput}. "
            " this agent leverages natural language processing (NLP) techniques to transform textual descriptions of protein functions into precise UniProt queries."
            " With an in-depth understanding of protein ontology, biochemical pathways, and molecular interactions, the agent ensures that every generated query retrieves highly relevant protein data. Whether working with simple function descriptions or complex multi-functional proteins, the agent efficiently extracts key terms, maps them to appropriate UniProt search fields, and optimizes query parameters for high recall and precision."
            " This capability aids researchers, bioinformaticians, and computational biologists in quickly identifying proteins of interest, expediting research workflows in structural biology, drug discovery, and synthetic biology.",
    tools=[uniprot_tool],
    verbose=True,
    llm=llm,
    output_pydantic = UniprotInput
)

uniprot_query_assurance_agent = Agent(
    role="query_assurance_agent",
    goal="Ensures the generated UniProt query is accurate and relevant to the protein function: {userinput}. "
         "Verify that the query retrieves the correct proteins and aligns with the intended function.",
    backstory=
        "This agent acts as a quality control specialist for bioinformatics queries, "
        "particularly focusing on UniProt search queries. "
        "It ensures that the query generated by the 'uniprot_query_generator' agent is accurate, "
        "targeting the right proteins based on the user's input function. "
        "The agent uses domain knowledge in molecular biology, protein ontology, and database query optimization "
        "to validate if the query retrieves the intended set of proteins. "
        "It cross-checks query terms with established protein families, functions, and molecular interactions "
        "to reduce false positives and increase query precision. "
        "In cases of ambiguity or overly broad queries, it provides feedback to optimize the query "
        "for better specificity and recall. "
        "This ensures that researchers spend minimal time sifting through irrelevant data.",
    tools=[uniprot_tool],
    verbose=True,
    llm=llm,
)


LLM value is already an LLM object
LLM value is already an LLM object
LLM value is already an LLM object


In [3]:
plan = Task(
    description=(
        "1. Extract key biological terms from a given protein function description. "
            " {userinput}.\n"
        "2. Map extracted terms to UniProt search fields  "
            "and controlled vocabularies.\n"
        "3. Generate a structured UniProt query optimized  "
            "for accuracy and recall.\n"
        "4. Validate and refine the query to ensure relevant search results."
    ),
    expected_output="Uniprot query for the given protein function description",
    agent=query_generator
)

query_review = Task(
    description="Review the UniProt query generated by the uniprot_query_generator agent.",
    expected_input="A UniProt query string generated from a protein function description, along with the original user input.",
    expected_output="A validation report stating whether the query is accurate and retrieves relevant proteins. "
                    "If the query is suboptimal, provide suggestions to improve it.",
    steps=[
        "Receive the generated UniProt query and the original protein function description.",
        "Submit the query to the UniProt database using uniprot_tool.",
        "Analyze the retrieved proteins and compare their functions to the intended protein function.",
        "Identify mismatches or broad/irrelevant results.",
        "If the query is highly accurate, approve it.",
        "If the query is inaccurate or overly broad, provide suggestions for improvement.",
        "Ensure the suggestions are aligned with the intended function."
    ],
    acceptance_criteria=[
        "The query retrieves proteins that strongly match the intended function.",
        "The query does not produce irrelevant or overly broad results.",
        "Suggestions for improvement are practical and enhance query precision.",
        "The validation report clearly explains why the query is accepted or rejected."
    ],
    agent=uniprot_query_assurance_agent
)


In [4]:
crew = Crew(
    agents=[query_generator,uniprot_query_assurance_agent],
    tasks=[plan,query_review],
    verbose=True
)
result = crew.kickoff(inputs={"userinput": "I want a protein that can help in the digestion of food."})
from IPython.display import Markdown
Markdown(str(result))

[1m[95m# Agent:[00m [1m[92muniprot_query_generator[00m
[95m## Task:[00m [92m1. Extract key biological terms from a given protein function description.  I want a protein that can help in the digestion of food..
2. Map extracted terms to UniProt search fields  and controlled vocabularies.
3. Generate a structured UniProt query optimized  for accuracy and recall.
4. Validate and refine the query to ensure relevant search results.[00m


ERROR:root:LiteLLM call failed: litellm.AuthenticationError: geminiException - {
  "error": {
    "code": 400,
    "message": "API key not valid. Please pass a valid API key.",
    "status": "INVALID_ARGUMENT",
    "details": [
      {
        "@type": "type.googleapis.com/google.rpc.ErrorInfo",
        "reason": "API_KEY_INVALID",
        "domain": "googleapis.com",
        "metadata": {
          "service": "generativelanguage.googleapis.com"
        }
      },
      {
        "@type": "type.googleapis.com/google.rpc.LocalizedMessage",
        "locale": "en-US",
        "message": "API key not valid. Please pass a valid API key."
      }
    ]
  }
}





LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m



ERROR:root:LiteLLM call failed: litellm.AuthenticationError: geminiException - {
  "error": {
    "code": 400,
    "message": "API key not valid. Please pass a valid API key.",
    "status": "INVALID_ARGUMENT",
    "details": [
      {
        "@type": "type.googleapis.com/google.rpc.ErrorInfo",
        "reason": "API_KEY_INVALID",
        "domain": "googleapis.com",
        "metadata": {
          "service": "generativelanguage.googleapis.com"
        }
      },
      {
        "@type": "type.googleapis.com/google.rpc.LocalizedMessage",
        "locale": "en-US",
        "message": "API key not valid. Please pass a valid API key."
      }
    ]
  }
}





LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m



ERROR:root:LiteLLM call failed: litellm.AuthenticationError: geminiException - {
  "error": {
    "code": 400,
    "message": "API key not valid. Please pass a valid API key.",
    "status": "INVALID_ARGUMENT",
    "details": [
      {
        "@type": "type.googleapis.com/google.rpc.ErrorInfo",
        "reason": "API_KEY_INVALID",
        "domain": "googleapis.com",
        "metadata": {
          "service": "generativelanguage.googleapis.com"
        }
      },
      {
        "@type": "type.googleapis.com/google.rpc.LocalizedMessage",
        "locale": "en-US",
        "message": "API key not valid. Please pass a valid API key."
      }
    ]
  }
}





LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m



ERROR:root:LiteLLM call failed: litellm.AuthenticationError: geminiException - {
  "error": {
    "code": 400,
    "message": "API key not valid. Please pass a valid API key.",
    "status": "INVALID_ARGUMENT",
    "details": [
      {
        "@type": "type.googleapis.com/google.rpc.ErrorInfo",
        "reason": "API_KEY_INVALID",
        "domain": "googleapis.com",
        "metadata": {
          "service": "generativelanguage.googleapis.com"
        }
      },
      {
        "@type": "type.googleapis.com/google.rpc.LocalizedMessage",
        "locale": "en-US",
        "message": "API key not valid. Please pass a valid API key."
      }
    ]
  }
}





LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m



ERROR:root:LiteLLM call failed: litellm.AuthenticationError: geminiException - {
  "error": {
    "code": 400,
    "message": "API key not valid. Please pass a valid API key.",
    "status": "INVALID_ARGUMENT",
    "details": [
      {
        "@type": "type.googleapis.com/google.rpc.ErrorInfo",
        "reason": "API_KEY_INVALID",
        "domain": "googleapis.com",
        "metadata": {
          "service": "generativelanguage.googleapis.com"
        }
      },
      {
        "@type": "type.googleapis.com/google.rpc.LocalizedMessage",
        "locale": "en-US",
        "message": "API key not valid. Please pass a valid API key."
      }
    ]
  }
}





LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m



ERROR:root:LiteLLM call failed: litellm.AuthenticationError: geminiException - {
  "error": {
    "code": 400,
    "message": "API key not valid. Please pass a valid API key.",
    "status": "INVALID_ARGUMENT",
    "details": [
      {
        "@type": "type.googleapis.com/google.rpc.ErrorInfo",
        "reason": "API_KEY_INVALID",
        "domain": "googleapis.com",
        "metadata": {
          "service": "generativelanguage.googleapis.com"
        }
      },
      {
        "@type": "type.googleapis.com/google.rpc.LocalizedMessage",
        "locale": "en-US",
        "message": "API key not valid. Please pass a valid API key."
      }
    ]
  }
}





LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m



KeyboardInterrupt: 