In [1]:
import os
import json
import requests
import urllib.parse
from pydantic import BaseModel
from crewai.tools.structured_tool import CrewStructuredTool
from crewai import Agent, Task, Crew, LLM

# -----------------------------------------------
# Part 1: UniProt Query Generation Workflow
# -----------------------------------------------

def get_uniprot(function_keyword: str):
    if not function_keyword:
        raise ValueError("Function keyword must be a non-empty string.")
    # Build and encode the query
    query = f'((cc_function:"{function_keyword}"))'
    encoded_query = urllib.parse.quote(query)
    url = f"https://rest.uniprot.org/uniprotkb/search?format=json&query={encoded_query}&size=1"
    print("Requesting URL:", url)
    try:
        response = requests.get(url)
        response.raise_for_status()
        data = response.json()
    except Exception as e:
        print("Error querying UniProt:", e)
        return None
    if not data:
        print("No UniProt entries found for function:", function_keyword)
        return None
    # Save the data to a new JSON file
    filename = f"uniprot_{function_keyword.replace(' ', '_')}.json"
    with open(filename, "w") as f:
        json.dump(data, f, indent=4)
    print(f"Data saved to {filename}")
    return data

# Define the schema for the tool's input
class UniprotInput(BaseModel):
    function_keyword: str

# Wrapper for the UniProt tool
def uniprot_tool_wrapper(function_keyword: str) -> dict:
    result = get_uniprot(function_keyword)
    if result is None:
        return {"error": "No data found or an error occurred while querying UniProt."}
    return result

# Create the structured tool for UniProt querying
def create_uniprot_tool():
    return CrewStructuredTool.from_function(
        name="UniProt Fetcher",
        description="Fetches UniProt entries based on a function keyword using the UniProt REST API.",
        args_schema=UniprotInput,
        func=uniprot_tool_wrapper,
    )

uniprot_tool = create_uniprot_tool()

# Set API key and instantiate LLM for query generation
GROQ_API_KEY = "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"
os.environ["GROQ_API_KEY"] = GROQ_API_KEY

llm = LLM(
    model="groq/gemma2-9b-it",
    temperature=0.7
)

# Agent to generate a UniProt query from a protein function description
query_generator = Agent(
    role="uniprot_query_generator",
    goal="Generates a UniProt query from a given protein function: {userinput}. Ensure the query retrieves relevant proteins",
    backstory="Designed as a highly specialized bioinformatics assistant to construct precise UniProt queries.",
    tools=[uniprot_tool],
    verbose=True,
    llm=llm,
    output_pydantic=UniprotInput
)

# Agent to validate and refine the generated UniProt query
uniprot_query_assurance_agent = Agent(
    role="query_assurance_agent",
    goal="Ensures the generated UniProt query is accurate and relevant to the protein function: {userinput}. Verify that the query retrieves the correct proteins and aligns with the intended function.",
    backstory="This agent acts as a quality control specialist for bioinformatics queries, ensuring that the query targets the right proteins.",
    tools=[uniprot_tool],
    verbose=True,
    llm=llm,
)

# Define tasks for the UniProt query workflow
plan = Task(
    description=(
        "1. Extract key biological terms from the protein function description: {userinput}.\n"
        "2. Map these terms to UniProt search fields and controlled vocabularies.\n"
        "3. Generate a structured UniProt query optimized for accuracy and recall.\n"
        "4. Validate and refine the query to ensure relevant search results."
    ),
    expected_output="UniProt query for the given protein function description",
    agent=query_generator
)

query_review = Task(
    description="Review the UniProt query generated by the uniprot_query_generator agent.",
    expected_input="A UniProt query string generated from a protein function description, along with the original user input.",
    expected_output=("A validation report stating whether the query is accurate and retrieves relevant proteins. "
                     "If the query is suboptimal, provide suggestions to improve it."),
    steps=[
        "Receive the generated UniProt query and the original protein function description.",
        "Submit the query to the UniProt database using uniprot_tool.",
        "Analyze the retrieved proteins and compare their functions to the intended protein function.",
        "Identify mismatches or overly broad results.",
        "Approve the query if highly accurate, otherwise provide improvement suggestions."
    ],
    acceptance_criteria=[
        "The query retrieves proteins that strongly match the intended function.",
        "The query does not produce irrelevant or overly broad results.",
        "Suggestions for improvement are practical and enhance query precision.",
        "The validation report clearly explains the decision."
    ],
    agent=uniprot_query_assurance_agent
)

# -----------------------------------------------
# Part 2: RF Diffusion Control Workflow
# -----------------------------------------------

# Disable CrewAI telemetry if necessary
os.environ["CREWAI_DISABLE_TELEMETRY"] = "true"

# Define the tool to fetch protein site information from cache
from crewai.tools import tool

@tool("get_protein__site_info")
def get_protein__site_info(protein_id: str) -> str:
    """Fetches the protein site information from cache."""
    loc = r"cache\uniprot\{protein_id}.json".format(protein_id=protein_id)
    with open(loc, 'r') as f:
        data = json.load(f)
        print("used")
        return str(data['results'][0]['features'])
    print("No data found")
    return None

# Load RF Diffusion manual context from file
rf_diff_context_path = r"Protein-Designing-With-Agents\config\RF_diff_context.txt"
with open(rf_diff_context_path, 'r') as f:
    RF_Dif_manual = f.read()

# Set Gemini API key and instantiate Gemini LLM for protein design tasks
os.environ["GEMINI_API_KEY"] = "AIzaSyDQSOPPaW8BVWXny1ycBfO_tF9jJiYiuag"
gemini_llm = LLM(
    model="gemini/gemini-2.0-flash",
    temperature=0.7
)

# Agent for protein scaffolding analysis based on UniProt features
protein_expert_agent = Agent(
    role="Protein Scaffolding Specialist",
    goal="Assist in creating protein scaffolds by identifying which motifs should be masked or preserved based on UniProt features.",
    backstory="Expert in computational protein design with experience in analyzing protein structural and functional data.",
    llm=gemini_llm,
    tool=[get_protein__site_info],
    tools_verbose=True,
    verbose=True
)

# Agent for translating protein analysis into RF Diffusion configuration strategies
RF_Diffusion_Expert = Agent(
    role="RF Diffusion Expert",
    goal=("Translate the protein scaffolding requirements into specific RF Diffusion implementation strategies, "
          "specifying which motifs to mask versus preserve and providing technical details."),
    backstory="Expert in diffusion-based generative modeling for protein design. Manual context: {RF_Dif_manual}".format(RF_Dif_manual=RF_Dif_manual),
    llm=gemini_llm,
    verbose=True
)

# Task for analyzing protein UniProt features and determining motif preservation/masking.
# Updated to use the "session" input key instead of separate "protein" and "function" keys.
protein_analysis_task = Task(
    description=("Analyze the provided UniProt features in JSON format, identifying all functional sites (active sites, binding sites, metal-binding sites, etc.). "
                 "Interpret the user's desired protein function based on the session identifier: {session}. "
                 "Determine which motifs should be preserved as anchors and which regions masked for redesign. "
                 "Identify the most appropriate scaffolding approach and provide clear reasoning for your decisions, including motif positions. "
                 "The session identifier should be used to look up the corresponding protein details."),
    expected_output=("A detailed JSON report containing an analysis of UniProt features, "
                     "the identified motifs to be preserved or masked with justification, "
                     "and the recommended scaffolding approach."),
    agent=protein_expert_agent
)

# Task for generating the RF Diffusion configuration script based on the protein analysis
RF_Diffusion_configuration_task = Task(
    description=("Using the analysis from the previous task, generate a configuration script for running the RF Diffusion process. "
                 "Include model settings, diffusion parameters, input file paths, output directory locations, "
                 "and any optional parameters (such as contigmap)."),
    expected_output="A valid terminal script containing the RF Diffusion configuration with all necessary parameters.",
    agent=RF_Diffusion_Expert,
    context=[protein_analysis_task]
)

# -----------------------------------------------
# Combine all Agents and Tasks into a Single Crew
# -----------------------------------------------

combined_crew = Crew(
    agents=[query_generator, uniprot_query_assurance_agent, protein_expert_agent, RF_Diffusion_Expert],
    tasks=[plan, query_review, protein_analysis_task, RF_Diffusion_configuration_task],
    verbose=True
)

# Kickoff the combined crew workflow with the updated inputs
combined_inputs = {
    "userinput": "I want a protein that can help in DNA binding.",
    "session": "01"
}

result = combined_crew.kickoff(inputs=combined_inputs)
print(result)


LLM value is already an LLM object
LLM value is already an LLM object
LLM value is already an LLM object
LLM value is already an LLM object
[1m[95m# Agent:[00m [1m[92muniprot_query_generator[00m
[95m## Task:[00m [92m1. Extract key biological terms from the protein function description: I want a protein that can help in DNA binding..
2. Map these terms to UniProt search fields and controlled vocabularies.
3. Generate a structured UniProt query optimized for accuracy and recall.
4. Validate and refine the query to ensure relevant search results.[00m
Requesting URL: https://rest.uniprot.org/uniprotkb/search?format=json&query=%28%28cc_function%3A%22DNA%20binding%22%29%29&size=1
Data saved to uniprot_DNA_binding.json


[1m[95m# Agent:[00m [1m[92muniprot_query_generator[00m
[95m## Using tool:[00m [92mUniProt Fetcher[00m
[95m## Tool Input:[00m [92m
"{\"function_keyword\": \"DNA binding\"}"[00m
[95m## Tool Output:[00m [92m
{'results': [{'entryType': 'UniProtKB unrevi

ERROR:root:LiteLLM call failed: litellm.RateLimitError: RateLimitError: GroqException - {"error":{"message":"Rate limit reached for model `gemma2-9b-it` in organization `org_01jnzbwaptee1s9tnfbn21hknk` service tier `on_demand` on tokens per minute (TPM): Limit 15000, Used 14190, Requested 3048. Please try again in 8.952s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing","type":"tokens","code":"rate_limit_exceeded"}}





LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m



ERROR:root:LiteLLM call failed: litellm.RateLimitError: RateLimitError: GroqException - {"error":{"message":"Rate limit reached for model `gemma2-9b-it` in organization `org_01jnzbwaptee1s9tnfbn21hknk` service tier `on_demand` on tokens per minute (TPM): Limit 15000, Used 13998, Requested 3048. Please try again in 8.183999999s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing","type":"tokens","code":"rate_limit_exceeded"}}





LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m



ERROR:root:LiteLLM call failed: litellm.RateLimitError: RateLimitError: GroqException - {"error":{"message":"Rate limit reached for model `gemma2-9b-it` in organization `org_01jnzbwaptee1s9tnfbn21hknk` service tier `on_demand` on tokens per minute (TPM): Limit 15000, Used 13836, Requested 3048. Please try again in 7.534s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing","type":"tokens","code":"rate_limit_exceeded"}}





LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m



ERROR:root:LiteLLM call failed: litellm.RateLimitError: RateLimitError: GroqException - {"error":{"message":"Rate limit reached for model `gemma2-9b-it` in organization `org_01jnzbwaptee1s9tnfbn21hknk` service tier `on_demand` on tokens per minute (TPM): Limit 15000, Used 13667, Requested 3048. Please try again in 6.858s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing","type":"tokens","code":"rate_limit_exceeded"}}





LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m



ERROR:root:LiteLLM call failed: litellm.RateLimitError: RateLimitError: GroqException - {"error":{"message":"Rate limit reached for model `gemma2-9b-it` in organization `org_01jnzbwaptee1s9tnfbn21hknk` service tier `on_demand` on tokens per minute (TPM): Limit 15000, Used 13510, Requested 3048. Please try again in 6.23s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing","type":"tokens","code":"rate_limit_exceeded"}}





LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m



ERROR:root:LiteLLM call failed: litellm.RateLimitError: RateLimitError: GroqException - {"error":{"message":"Rate limit reached for model `gemma2-9b-it` in organization `org_01jnzbwaptee1s9tnfbn21hknk` service tier `on_demand` on tokens per minute (TPM): Limit 15000, Used 13356, Requested 3048. Please try again in 5.615s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing","type":"tokens","code":"rate_limit_exceeded"}}





LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m



ERROR:root:LiteLLM call failed: litellm.RateLimitError: RateLimitError: GroqException - {"error":{"message":"Rate limit reached for model `gemma2-9b-it` in organization `org_01jnzbwaptee1s9tnfbn21hknk` service tier `on_demand` on tokens per minute (TPM): Limit 15000, Used 13170, Requested 3048. Please try again in 4.87s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing","type":"tokens","code":"rate_limit_exceeded"}}





LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m



ERROR:root:LiteLLM call failed: litellm.RateLimitError: RateLimitError: GroqException - {"error":{"message":"Rate limit reached for model `gemma2-9b-it` in organization `org_01jnzbwaptee1s9tnfbn21hknk` service tier `on_demand` on tokens per minute (TPM): Limit 15000, Used 13005, Requested 3048. Please try again in 4.211999999s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing","type":"tokens","code":"rate_limit_exceeded"}}





LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m



ERROR:root:LiteLLM call failed: litellm.RateLimitError: RateLimitError: GroqException - {"error":{"message":"Rate limit reached for model `gemma2-9b-it` in organization `org_01jnzbwaptee1s9tnfbn21hknk` service tier `on_demand` on tokens per minute (TPM): Limit 15000, Used 12827, Requested 3048. Please try again in 3.497s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing","type":"tokens","code":"rate_limit_exceeded"}}





LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m



ERROR:root:LiteLLM call failed: litellm.RateLimitError: RateLimitError: GroqException - {"error":{"message":"Rate limit reached for model `gemma2-9b-it` in organization `org_01jnzbwaptee1s9tnfbn21hknk` service tier `on_demand` on tokens per minute (TPM): Limit 15000, Used 12627, Requested 3048. Please try again in 2.697s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing","type":"tokens","code":"rate_limit_exceeded"}}





LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m



ERROR:root:LiteLLM call failed: litellm.RateLimitError: RateLimitError: GroqException - {"error":{"message":"Rate limit reached for model `gemma2-9b-it` in organization `org_01jnzbwaptee1s9tnfbn21hknk` service tier `on_demand` on tokens per minute (TPM): Limit 15000, Used 12457, Requested 3048. Please try again in 2.017999999s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing","type":"tokens","code":"rate_limit_exceeded"}}





LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m



ERROR:root:LiteLLM call failed: litellm.RateLimitError: RateLimitError: GroqException - {"error":{"message":"Rate limit reached for model `gemma2-9b-it` in organization `org_01jnzbwaptee1s9tnfbn21hknk` service tier `on_demand` on tokens per minute (TPM): Limit 15000, Used 12273, Requested 3048. Please try again in 1.284s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing","type":"tokens","code":"rate_limit_exceeded"}}





LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m



ERROR:root:LiteLLM call failed: litellm.RateLimitError: RateLimitError: GroqException - {"error":{"message":"Rate limit reached for model `gemma2-9b-it` in organization `org_01jnzbwaptee1s9tnfbn21hknk` service tier `on_demand` on tokens per minute (TPM): Limit 15000, Used 12107, Requested 3048. Please try again in 619ms. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing","type":"tokens","code":"rate_limit_exceeded"}}





LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m



[1m[95m# Agent:[00m [1m[92muniprot_query_generator[00m
[95m## Using tool:[00m [92mUniProt Fetcher[00m
[95m## Tool Input:[00m [92m
"{\"function_keyword\": \"DNA binding\"}"[00m
[95m## Tool Output:[00m [92m
I tried reusing the same input, I must stop using this action input. I'll try something else instead.

[00m


ERROR:root:LiteLLM call failed: litellm.RateLimitError: RateLimitError: GroqException - {"error":{"message":"Rate limit reached for model `gemma2-9b-it` in organization `org_01jnzbwaptee1s9tnfbn21hknk` service tier `on_demand` on tokens per minute (TPM): Limit 15000, Used 17162, Requested 3119. Please try again in 21.125s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing","type":"tokens","code":"rate_limit_exceeded"}}





LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m



ERROR:root:LiteLLM call failed: litellm.RateLimitError: RateLimitError: GroqException - {"error":{"message":"Rate limit reached for model `gemma2-9b-it` in organization `org_01jnzbwaptee1s9tnfbn21hknk` service tier `on_demand` on tokens per minute (TPM): Limit 15000, Used 17009, Requested 3119. Please try again in 20.515s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing","type":"tokens","code":"rate_limit_exceeded"}}





LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m



ERROR:root:LiteLLM call failed: litellm.RateLimitError: RateLimitError: GroqException - {"error":{"message":"Rate limit reached for model `gemma2-9b-it` in organization `org_01jnzbwaptee1s9tnfbn21hknk` service tier `on_demand` on tokens per minute (TPM): Limit 15000, Used 16850, Requested 3119. Please try again in 19.876s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing","type":"tokens","code":"rate_limit_exceeded"}}





LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m



ERROR:root:LiteLLM call failed: litellm.RateLimitError: RateLimitError: GroqException - {"error":{"message":"Rate limit reached for model `gemma2-9b-it` in organization `org_01jnzbwaptee1s9tnfbn21hknk` service tier `on_demand` on tokens per minute (TPM): Limit 15000, Used 16653, Requested 3119. Please try again in 19.088s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing","type":"tokens","code":"rate_limit_exceeded"}}





LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m



ERROR:root:LiteLLM call failed: litellm.RateLimitError: RateLimitError: GroqException - {"error":{"message":"Rate limit reached for model `gemma2-9b-it` in organization `org_01jnzbwaptee1s9tnfbn21hknk` service tier `on_demand` on tokens per minute (TPM): Limit 15000, Used 16493, Requested 3119. Please try again in 18.45s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing","type":"tokens","code":"rate_limit_exceeded"}}





LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m



KeyboardInterrupt: 

In [None]:
import os
import json
import requests
import urllib.parse
from pydantic import BaseModel
from crewai.tools.structured_tool import CrewStructuredTool
from crewai import Agent, Task, Crew, LLM

# -----------------------------------------------
# Part 1: UniProt Query Generation Workflow
# -----------------------------------------------

def get_uniprot(function_keyword: str):
    if not function_keyword:
        raise ValueError("Function keyword must be a non-empty string.")
    # Build and encode the query
    query = f'((cc_function:"{function_keyword}"))'
    encoded_query = urllib.parse.quote(query)
    url = f"https://rest.uniprot.org/uniprotkb/search?format=json&query={encoded_query}&size=1"
    print("Requesting URL:", url)
    try:
        response = requests.get(url)
        response.raise_for_status()
        data = response.json()
    except Exception as e:
        print("Error querying UniProt:", e)
        return None
    if not data:
        print("No UniProt entries found for function:", function_keyword)
        return None
    # Save the data to a new JSON file
    filename = f"uniprot_{function_keyword.replace(' ', '_')}.json"
    with open(filename, "w") as f:
        json.dump(data, f, indent=4)
    print(f"Data saved to {filename}")
    return data

# Define the schema for the tool's input
class UniprotInput(BaseModel):
    function_keyword: str

# Wrapper for the UniProt tool
def uniprot_tool_wrapper(function_keyword: str) -> dict:
    result = get_uniprot(function_keyword)
    if result is None:
        return {"error": "No data found or an error occurred while querying UniProt."}
    return result

# Create the structured tool for UniProt querying
def create_uniprot_tool():
    return CrewStructuredTool.from_function(
        name="UniProt Fetcher",
        description="Fetches UniProt entries based on a function keyword using the UniProt REST API.",
        args_schema=UniprotInput,
        func=uniprot_tool_wrapper,
    )

uniprot_tool = create_uniprot_tool()

# Instantiate Gemini LLM (used for both parts)
gemini_llm = LLM(
    model="gemini/gemini-2.0-flash",
    temperature=0.7
)

# Agent to generate a UniProt query from a protein function description using Gemini LLM
query_generator = Agent(
    role="uniprot_query_generator",
    goal="Generates a UniProt query from a given protein function: {userinput}. Ensure the query retrieves relevant proteins",
    backstory="Designed as a highly specialized bioinformatics assistant to construct precise UniProt queries.",
    tools=[uniprot_tool],
    verbose=True,
    llm=gemini_llm,
    output_pydantic=UniprotInput
)

# Agent to validate and refine the generated UniProt query using Gemini LLM
uniprot_query_assurance_agent = Agent(
    role="query_assurance_agent",
    goal="Ensures the generated UniProt query is accurate and relevant to the protein function: {userinput}. Verify that the query retrieves the correct proteins and aligns with the intended function.",
    backstory="This agent acts as a quality control specialist for bioinformatics queries, ensuring that the query targets the right proteins.",
    tools=[uniprot_tool],
    verbose=True,
    llm=gemini_llm,
)

# Define tasks for the UniProt query workflow
plan = Task(
    description=(
        "1. Extract key biological terms from the protein function description: {userinput}.\n"
        "2. Map these terms to UniProt search fields and controlled vocabularies.\n"
        "3. Generate a structured UniProt query optimized for accuracy and recall.\n"
        "4. Validate and refine the query to ensure relevant search results."
    ),
    expected_output="UniProt query for the given protein function description",
    agent=query_generator
)

query_review = Task(
    description="Review the UniProt query generated by the uniprot_query_generator agent.",
    expected_input="A UniProt query string generated from a protein function description, along with the original user input.",
    expected_output=("A validation report stating whether the query is accurate and retrieves relevant proteins. "
                     "If the query is suboptimal, provide suggestions to improve it."),
    steps=[
        "Receive the generated UniProt query and the original protein function description.",
        "Submit the query to the UniProt database using uniprot_tool.",
        "Analyze the retrieved proteins and compare their functions to the intended protein function.",
        "Identify mismatches or overly broad results.",
        "Approve the query if highly accurate, otherwise provide improvement suggestions."
    ],
    acceptance_criteria=[
        "The query retrieves proteins that strongly match the intended function.",
        "The query does not produce irrelevant or overly broad results.",
        "Suggestions for improvement are practical and enhance query precision.",
        "The validation report clearly explains the decision."
    ],
    agent=uniprot_query_assurance_agent
)

# -----------------------------------------------
# Part 2: RF Diffusion Control Workflow
# -----------------------------------------------

# Disable CrewAI telemetry if necessary
os.environ["CREWAI_DISABLE_TELEMETRY"] = "true"

# Define the tool to fetch protein site information from cache
from crewai.tools import tool

@tool("get_protein__site_info")
def get_protein__site_info(protein_id: str) -> str:
    """Fetches the protein site information from cache."""
    loc = r"C:\Users\91900\Documents\Engg\Sem-4\IBS\Projects\Protein-Designing-With-Agents\cache\uniprot\{protein_id}.json".format(protein_id=protein_id)
    with open(loc, 'r') as f:
        data = json.load(f)
        print("used")
        return str(data['results'][0]['features'])
    print("No data found")
    return None

# Load RF Diffusion manual context from file
rf_diff_context_path = r"C:\Users\91900\Documents\Engg\Sem-4\IBS\Projects\Protein-Designing-With-Agents\config\RF_diff_context.txt"
with open(rf_diff_context_path, 'r') as f:
    RF_Dif_manual = f.read()

# Agent for protein scaffolding analysis based on UniProt features using Gemini LLM
protein_expert_agent = Agent(
    role="Protein Scaffolding Specialist",
    goal="Assist in creating protein scaffolds by identifying which motifs should be masked or preserved based on UniProt features.",
    backstory="Expert in computational protein design with experience in analyzing protein structural and functional data.",
    llm=gemini_llm,
    tool=[get_protein__site_info],
    tools_verbose=True,
    verbose=True
)

# Agent for translating protein analysis into RF Diffusion configuration strategies using Gemini LLM
RF_Diffusion_Expert = Agent(
    role="RF Diffusion Expert",
    goal=("Translate the protein scaffolding requirements into specific RF Diffusion implementation strategies, "
          "specifying which motifs to mask versus preserve and providing technical details."),
    backstory="Expert in diffusion-based generative modeling for protein design. Manual context: {RF_Dif_manual}",
    llm=gemini_llm,
    verbose=True
)

# Task for analyzing protein UniProt features and determining motif preservation/masking.
# Updated to use the "session" input key instead of separate "protein" and "function" keys.
protein_analysis_task = Task(
    description=("Analyze the provided UniProt features in JSON format, identifying all functional sites (active sites, binding sites, metal-binding sites, etc.). "
                 "Interpret the user's desired protein function based on the session identifier: {session}. "
                 "Determine which motifs should be preserved as anchors and which regions masked for redesign. "
                 "Identify the most appropriate scaffolding approach and provide clear reasoning for your decisions, including motif positions. "
                 "The session identifier should be used to look up the corresponding protein details."),
    expected_output=("A detailed JSON report containing an analysis of UniProt features, "
                     "the identified motifs to be preserved or masked with justification, "
                     "and the recommended scaffolding approach."),
    agent=protein_expert_agent
)

# Task for generating the RF Diffusion configuration script based on the protein analysis
RF_Diffusion_configuration_task = Task(
    description=("Using the analysis from the previous task, generate a configuration script for running the RF Diffusion process. "
                 "Include model settings, diffusion parameters, input file paths, output directory locations, "
                 "and any optional parameters (such as contigmap)."),
    expected_output="A valid terminal script containing the RF Diffusion configuration with all necessary parameters.",
    agent=RF_Diffusion_Expert,
    context=[protein_analysis_task]
)

# -----------------------------------------------
# Combine all Agents and Tasks into a Single Crew
# -----------------------------------------------

combined_crew = Crew(
    agents=[query_generator, uniprot_query_assurance_agent, protein_expert_agent, RF_Diffusion_Expert],
    tasks=[plan, query_review, protein_analysis_task, RF_Diffusion_configuration_task],
    verbose=True,
    
    
)

# Kickoff the combined crew workflow with the updated inputs
combined_inputs = {
    "userinput": "I want a protein that can help in DNA binding.",
    "session": "01"
    
}

result = combined_crew.kickoff(inputs=combined_inputs)
print(result)