In [12]:
import os
import json
from pydantic import BaseModel
from crewai.tools.structured_tool import CrewStructuredTool
from crewai import Agent, Task, Crew, LLM
import requests
import urllib.parse
from pydantic import BaseModel, Field
from typing import List, Literal
from pathlib import Path
from crewai.tools import tool


In [13]:
from storage_manager import StorageManager
from tools.uniprot_extended_tool import toolset
from tools.rossetta_tool import PyRosettaWrapper
from tools.rf_dif_tool import run_rf_diffusion
from config.rf_diff_script_format import  RFDiffusionScriptConfig

In [14]:


llm = LLM(
    model="groq/gemma2-9b-it",
    temperature=0.7
)
os.environ["GEMINI_API_KEY"] = "AIzaSyDQSOPPaW8BVWXny1ycBfO_tF9jJiYiuag"
os.environ["MEM0_API_KEY"] = "m0-3wFDlHPJEB4GMP6HKSsHpGlm3Kt1s6xmvYQi5IuP"
gemini_llm = LLM(
    model="gemini/gemini-2.0-flash",
    temperature=0.7
)

In [15]:

rf_diff_context_path = r"config/RF_diff_context.txt"
with open(rf_diff_context_path, 'r') as f:
    RF_Dif_manual = f.read()

In [16]:

storage = StorageManager()
current_path = os.path.join(os.getcwd(), str(storage.get_session_path()))

print(current_path)

/home/bharath-sooryaa-m/Documents/BIO/proj/Protein-Designing-With-Agents/cache/session_20250408-183717_112e8430


TOOLS

In [17]:


crewtool = toolset(current_path)
Rosettatool = PyRosettaWrapper(current_path)
@tool("uniprot_fetch_tool")
def uniprot_fetch_tool(query: str) -> str:
    """This tool provides an acces to fetch data from UniProtKB using the UniProt REST API.
    the input has to be strictly a string """
    return crewtool.uniprot_fetch_tool(query=query)


@tool("get_protein__site_info")
def get_protein__site_info(protein_id: str) -> str:
    """Fetches the protein site information from cache."""
    return crewtool.get_protein__site_info(protein_id=protein_id)

@tool("get_protein__function_info")
def get_protein__function_info(protein_id: str) -> str:
    """Fetches the protein function information from cache."""
    return crewtool.get_all_function()

'''@tool("rf_diff_tool")
def rf_diff_tool(script:dict,protein_id: str,) -> str:
    """This tool provides an access to fetch data from RF_diff
    the input has to be strictly a RFDiffusionScriptConfig first is the script and the second is the protein_id, the tool will automatically access the pdb file """
    print(RFDiffusionScriptConfig.get_script_with_dict(script))
    return crewtool.rfdifcom(script=RFDiffusionScriptConfig.get_script_with_dict(script), pdb_id=protein_id)'''

@tool("rf_diff_tool")
def rf_diff_tool(script:dict,protein_id: str,) -> str:
    """This tool provides an access to fetch data from RF_diff
    the input has to be strictly a RFDiffusionScriptConfig first is the script and the second is the protein_id, the tool will automatically access the pdb file """
    print(RFDiffusionScriptConfig.get_script_with_dict(script,dir=current_path))
    return run_rf_diffusion(script_str=RFDiffusionScriptConfig.get_script_with_dict(script,dir=current_path), pdb_file_path= current_path+"/pdb/"+protein_id+".pdb")

@tool("rosseta_tool")
def rosseta_tool(pdb_id: str) -> str:
    """This tool provides an access to fetch data from Rosetta
    the input has to be strictly a string first is the script and the second is the protein_id, the tool will automatically access the pdb file """
    return Rosettatool.run(pdb_file=pdb_id)


PyRosettaWrapper initialized with directory: /home/bharath-sooryaa-m/Documents/BIO/proj/Protein-Designing-With-Agents/cache/session_20250408-183717_112e8430


AGENTS

In [18]:

query_generator = Agent(
    role="uniprot_query_generator",
    goal="Generates a UniProt query from a given protein function: {userinput}. Ensure the query retrieves relevant proteins",
    backstory="Designed as a highly specialized bioinformatics assistant to construct precise UniProt queries.",
    
    verbose=True,
    llm=gemini_llm
)

uniprot_query_assurance_agent = Agent(
    role="query_assurance_agent",
    goal="Ensures the generated UniProt query is accurate and relevant to the protein function: {userinput}. Verify that the query retrieves the correct proteins and aligns with the intended function."
          "RUN the uniprot_fetch_tool, if it doesn't work then the query is suboptimal, provide suggestions to improve it."
          "uniprot_fetch_tool only works with a parathesis query in string format"
          "Query rules:"
          "-the query has to be string with paranthesis and semicolon and characters no other special characters allowed"
          "-avoid metioning \"query\" as it is unnecessary",
    backstory="This agent acts as a quality control specialist for bioinformatics queries, ensuring that the query targets the right proteins.",
    tools=[uniprot_fetch_tool],
    verbose=True,
    llm=gemini_llm,
)
protein_selector_agent = Agent(
    role="Protein Selection Expert",
    goal="Select the best protein that matches the given function requirement from a provided list. Access the list of proteins from the tool.",
    backstory="You are an expert in protein biochemistry and bioinformatics, with a keen ability to match protein functions to desired roles.",
    verbose=True,
    llm=gemini_llm,
    tools=[get_protein__function_info]
)

protein_expert_agent = Agent(
    role="Protein Scaffolding Specialist",
    goal="Assist in creating protein scaffolds by identifying which motifs should be masked or preserved based on UniProt features.",
    backstory="Expert in computational protein design with experience in analyzing protein structural and functional data.",
    llm=gemini_llm,
    tool=[get_protein__site_info],
    tools_verbose=True,
    verbose=True
)
RF_Diffusion_Expert = Agent(
    role="RF Diffusion Expert",
    goal=("Translate the protein scaffolding requirements into specific RF Diffusion implementation strategies, "
          "specifying which motifs to mask versus preserve and providing technical details."),
    backstory="Expert in diffusion-based generative modeling for protein design. Manual context: {RF_Dif_manual}",
    llm=gemini_llm,
    verbose=True
)
RF_Diffusion_Script_Verifier = Agent(
    role="RF Diffusion Script Verifier",
    goal=(
        "Verify the generated RF Diffusion configuration script for correctness, "
        "remove any unnecessary or redundant lines, ensure all parameters and file paths "
        "are valid, then execute the script via the RF Diffusion tool and confirm a successful run."
    ),
    backstory="Expert in RF Diffusion process scripts, code review, and pipeline execution. Manual context: {RF_Dif_manual}",
    llm=gemini_llm,
    tools=[rf_diff_tool],     # <-- add your RF Diffusion execution tool here
    verbose=True
)



TASKS

In [19]:
# Define tasks for the UniProt query workflow
plan = Task(
    description=(
        "1. Extract key biological terms from the protein function description: {userinput}.\n"
        "2. Map these terms to UniProt search fields and controlled vocabularies.\n"
        "3. Generate a string within paranthesis UniProt query optimized for accuracy and recall.\n"
        "4. Validate and refine the query to ensure relevant search results."
    ),
    expected_output="UniProt query for the given protein function description",
    agent=query_generator
)

query_review = Task(
    description="Review the UniProt query generated by the uniprot_query_generator agent.",
    expected_input="A UniProt query string generated from a protein function description, along with the original user input.",
    expected_output=(
                     "If the query is suboptimal, provide suggestions to improve it."
                     "After a successful query return the protein ID"),
    steps=[
        "Receive the generated UniProt query and the original protein function description.",
        "Submit the query to the UniProt database using uniprot_tool.",
        "Analyze the retrieved proteins and compare their functions to the intended protein function.",
        "resolve the query using the uniprot_fetch_tool unitil the problem is fixed",
        "Approve the query if it runs successfully and retrieves relevant proteins.",
    ],
    acceptance_criteria=[
        "The query retrieves proteins that strongly match the intended function.",
        "The query does not produce irrelevant or overly broad results.",
        "Suggestions for improvement are practical and enhance query precision.",
        "The validation report clearly explains the decision."
    ],
    agent=uniprot_query_assurance_agent
)
selection_task = Task(
    description="Choose the best protein from the provided list that performs the target function:{function}. try to choose only limited proteins",
    expected_output="A JSON object detailing the selected protein and its function.",
    agent=protein_selector_agent
)
protein_analysis_task = Task(
    description=("Analyze the provided UniProt features in JSON format, identifying all functional sites (active sites, binding sites, metal-binding sites, etc.). "
                 "Interpret the user's desired protein function described in natural language and determine which motifs should be preserved as anchors and which regions masked for redesign. "
                 "Identify the most appropriate scaffolding approach and provide clear reasoning for your decisions, including motif positions. "
                 "unction to be preserved: {function}. use the protein id from previous task"),
    expected_output=("A detailed JSON report containing an analysis of UniProt features, the identified motifs to be preserved or masked with justification, "
                     "and the recommended scaffolding approach."),
    agent=protein_expert_agent,
    context=[selection_task]
    #context=[protein_analysis_task]
)

# Task for generating the RF Diffusion configuration script based on the protein analysis
RF_Diffusion_configuration_task = Task(
    description=("Using the analysis from the previous task, generate a configuration script for running the RF Diffusion process. "
                 "Include model settings, diffusion parameters, input file paths, output directory locations, and any optional parameters (such as contigmap)."),
    expected_output="A valid terminal script containing the RF Diffusion configuration with all necessary parameters."
     "You should make sure sure that the pdb file should be the uniprot ID which will be given from the previous task",
    agent=RF_Diffusion_Expert,
    context=[protein_analysis_task],
    output_pydantic=RFDiffusionScriptConfig,
)

RF_Diffusion_script_verification_task = Task(
    description=(
        "1) Take the output configuration script from the previous task.  "
        "2) Check syntax and parameter correctness, remove any unnecessary or redundant lines.  "
        "3) Run the cleaned script using rf_diffusion_tool.run(script_path).  "
        "4) Capture and verify the execution logs to ensure it completes without errors."
        "5) PDB file is avilable and will be automatically fetched by the tool don't worry about it. just provide the id"
        "6)If the script fails then modify it and rerun until it succeeds. "
        "7) If the error persists for more than 10 tries, provide a detailed explanation of the issue and suggest potential solutions."
    ),
    expected_output=(
        "— A cleaned and validated RF Diffusion configuration script, ready to run.  \n"
        "— Execution logs showing successful completion (no errors). if error is persists show that"
    ),
    agent=RF_Diffusion_Script_Verifier,
    context=[RF_Diffusion_configuration_task],
    output_pydantic=RFDiffusionScriptConfig
    
)



CREW

In [20]:
combined_crew = Crew(
    agents=[query_generator, uniprot_query_assurance_agent,protein_selector_agent, protein_expert_agent, RF_Diffusion_Expert,RF_Diffusion_Script_Verifier],
    tasks=[plan, query_review,selection_task, protein_analysis_task, RF_Diffusion_configuration_task,RF_Diffusion_script_verification_task],
    verbose=True,
)

RUN

In [21]:
combined_inputs = {
    "userinput": "I want a protein that can help in the DNA binding.",
    "function": "I want a protein that can help in the DNA binding.",
    "RF_Dif_manual":RF_Dif_manual
}

result = combined_crew.kickoff(inputs=combined_inputs)
print(result)

[1m[95m# Agent:[00m [1m[92muniprot_query_generator[00m
[95m## Task:[00m [92m1. Extract key biological terms from the protein function description: I want a protein that can help in the DNA binding..
2. Map these terms to UniProt search fields and controlled vocabularies.
3. Generate a string within paranthesis UniProt query optimized for accuracy and recall.
4. Validate and refine the query to ensure relevant search results.[00m




[1m[95m# Agent:[00m [1m[92muniprot_query_generator[00m
[95m## Final Answer:[00m [92m
(go:"GO:0003677")[00m




[1m[95m# Agent:[00m [1m[92mquery_assurance_agent[00m
[95m## Task:[00m [92mReview the UniProt query generated by the uniprot_query_generator agent.[00m


https://rest.uniprot.org/uniprotkb/search?query=%28go%3A%22GO%3A0003677%22%29&format=json&size=5




[1m[95m# Agent:[00m [1m[92mquery_assurance_agent[00m
[95m## Thought:[00m [92mThe goal is to ensure the UniProt query accurately targets proteins involved in DNA binding based on the provided GO term. I need to use the `uniprot_fetch_tool` to test the query and analyze the results. If the results are not satisfactory, I will refine the query and re-test until the correct proteins are retrieved.[00m
[95m## Using tool:[00m [92muniprot_fetch_tool[00m
[95m## Tool Input:[00m [92m
"{\"query\": \"(go:\\\"GO:0003677\\\")\"}"[00m
[95m## Tool Output:[00m [92m
[][00m


https://rest.uniprot.org/uniprotkb/search?query=%28go%3A%22GO%3A0003677%22%29+AND+%28reviewed%3Ayes%29&format=json&size=5
Error during UniProt API request: 400 Client Error: Bad Request for url: https://rest.uniprot.org/uniprotkb/search?query=%28go%3A%22GO%3A0003677%22%29+AND+%28reviewed%3Ayes%29&format=json&size=5




[1m[95m# Agent:[00m [1m[92mquery_assurance_agent[00m
[95m## Thought:[00m [92mThought: The `uniprot_fetch_tool` returned an empty list. This indicates that the query `(go:"GO:0003677")` is not effectively retrieving proteins associated with DNA binding. I need to refine the query to improve its accuracy. I will try adding `AND reviewed:yes` to filter for manually reviewed entries.[00m
[95m## Using tool:[00m [92muniprot_fetch_tool[00m
[95m## Tool Input:[00m [92m
"{\"query\": \"(go:\\\"GO:0003677\\\") AND (reviewed:yes)\"}"[00m
[95m## Tool Output:[00m [92m
Error during UniProt API request: 400 Client Error: Bad Request for url: https://rest.uniprot.org/uniprotkb/search?query=%28go%3A%22GO%3A0003677%22%29+AND+%28reviewed%3Ayes%29&format=json&size=5[00m


https://rest.uniprot.org/uniprotkb/search?query=%28go_id%3AGO_0003677%29+AND+%28reviewed%3Atrue%29&format=json&size=5
Error during UniProt API request: 400 Client Error: Bad Request for url: https://rest.uniprot.org/uniprotkb/search?query=%28go_id%3AGO_0003677%29+AND+%28reviewed%3Atrue%29&format=json&size=5




[1m[95m# Agent:[00m [1m[92mquery_assurance_agent[00m
[95m## Thought:[00m [92m00
Details: Invalid syntax: expecting end of string but found ':' at position 4
Thought: The query `(go:"GO:0003677") AND (reviewed:yes)` resulted in a "Bad Request" error, indicating a syntax error in the query. The error message "expecting end of string but found ':' at position 4" suggests that the issue might be with the GO term syntax or the way the `AND` operator is used. Let's try `(go_id:GO_0003677) AND (reviewed:true)`.[00m
[95m## Using tool:[00m [92muniprot_fetch_tool[00m
[95m## Tool Input:[00m [92m
"{\"query\": \"(go_id:GO_0003677) AND (reviewed:true)\"}"[00m
[95m## Tool Output:[00m [92m
Error during UniProt API request: 400 Client Error: Bad Request for url: https://rest.uniprot.org/uniprotkb/search?query=%28go_id%3AGO_0003677%29+AND+%28reviewed%3Atrue%29&format=json&size=5


You ONLY have access to the following tools, and should NEVER make up tools that are not listed here:

[91m Received None or empty response from LLM call.[00m
[91m An unknown error occurred. Please check the details below.[00m
[91m Error details: Invalid response from LLM call - None or empty.[00m
[91m An unknown error occurred. Please check the details below.[00m
[91m Error details: Invalid response from LLM call - None or empty.[00m


[1m[95m# Agent:[00m [1m[92mquery_assurance_agent[00m
[95m## Task:[00m [92mReview the UniProt query generated by the uniprot_query_generator agent.[00m




[1m[95m# Agent:[00m [1m[92mquery_assurance_agent[00m
[95m## Thought:[00m [92mThe user wants me to validate the UniProt query (go:"GO:0003677") and ensure it accurately retrieves proteins involved in DNA binding. I need to use the uniprot_fetch_tool to execute the query and analyze the results. If the query is suboptimal, I will suggest improvements.[00m
[95m## Using tool:[00m [92muniprot_fetch_tool[00m
[95m## Tool Input:[00m [92m
"{\"query\": \"(go:\\\"GO:0003677\\\")\"}"[00m
[95m## Tool Output:[00m [92m
[][00m


https://rest.uniprot.org/uniprotkb/search?query=%28go%3A%22GO%3A0003677%22%29+AND+%28reviewed%3Atrue%29&format=json&size=5




[1m[95m# Agent:[00m [1m[92mquery_assurance_agent[00m
[95m## Thought:[00m [92mThought:The query returned no results. This suggests the query might be too specific or there might be an issue with the way the Gene Ontology term "GO:0003677" (DNA binding) is being used in the query. I need to refine the query to broaden the search and ensure it effectively captures proteins with DNA-binding function. A more general approach might involve searching for proteins annotated with the "DNA binding" GO term directly or exploring related GO terms.[00m
[95m## Using tool:[00m [92muniprot_fetch_tool[00m
[95m## Tool Input:[00m [92m
"{\"query\": \"(go:\\\"GO:0003677\\\") AND (reviewed:true)\"}"[00m
[95m## Tool Output:[00m [92m
[][00m


https://rest.uniprot.org/uniprotkb/search?query=%28%28keyword%3A%22DNA+binding%22%29+AND+%28reviewed%3Atrue%29%29&format=json&size=5




[1m[95m# Agent:[00m [1m[92mquery_assurance_agent[00m
[95m## Thought:[00m [92mThought:The query still returns no results even with the reviewed filter. Let me refine the query to include more general terms and filters.[00m
[95m## Using tool:[00m [92muniprot_fetch_tool[00m
[95m## Tool Input:[00m [92m
"{\"query\": \"((keyword:\\\"DNA binding\\\") AND (reviewed:true))\"}"[00m
[95m## Tool Output:[00m [92m
[]


You ONLY have access to the following tools, and should NEVER make up tools that are not listed here:

Tool Name: uniprot_fetch_tool
Tool Arguments: {'query': {'description': None, 'type': 'str'}}
Tool Description: This tool provides an acces to fetch data from UniProtKB using the UniProt REST API.
    the input has to be strictly a string 

IMPORTANT: Use the following format in your response:

```
Thought: you should always think about what to do
Action: the action to take, only one name of [uniprot_fetch_tool], just the name, exactly as it's written.
Action I

https://rest.uniprot.org/uniprotkb/search?query=%28go%3AGO%3A0003677%29&format=json&size=5
Error during UniProt API request: 400 Client Error: Bad Request for url: https://rest.uniprot.org/uniprotkb/search?query=%28go%3AGO%3A0003677%29&format=json&size=5




[1m[95m# Agent:[00m [1m[92mquery_assurance_agent[00m
[95m## Thought:[00m [92mThought:The query still returns no results, even with the keyword "DNA binding". It seems there may be an issue with how the UniProt API is interpreting the query. Let me try a simpler query with just the GO term ID, without the quotes.[00m
[95m## Using tool:[00m [92muniprot_fetch_tool[00m
[95m## Tool Input:[00m [92m
"{\"query\": \"(go:GO:0003677)\"}"[00m
[95m## Tool Output:[00m [92m
Error during UniProt API request: 400 Client Error: Bad Request for url: https://rest.uniprot.org/uniprotkb/search?query=%28go%3AGO%3A0003677%29&format=json&size=5[00m




[1m[95m# Agent:[00m [1m[92mquery_assurance_agent[00m
[95m## Thought:[00m [92m00
Details: Message: Invalid syntax. Expecting quoted string for field 'go' but got 'GO'. at position: 4
Thought:The error message indicates that the GO term should be enclosed in quotes. Let me try that again.[00m
[95m## Using tool:[00m [92muniprot_fetch_tool[00m
[95m## Tool Input:[00m [92m
"{\"query\": \"(go:\\\"GO:0003677\\\")\"}"[00m
[95m## Tool Output:[00m [92m
[][00m


https://rest.uniprot.org/uniprotkb/search?query=%28keyword%3ADNA-binding%29&format=json&size=5
Data saved to /home/bharath-sooryaa-m/Documents/BIO/proj/Protein-Designing-With-Agents/cache/session_20250408-183717_112e8430/uniprot/A0A0C5B5G6.json
Data saved to /home/bharath-sooryaa-m/Documents/BIO/proj/Protein-Designing-With-Agents/cache/session_20250408-183717_112e8430/uniprot/A1A519.json
Data saved to /home/bharath-sooryaa-m/Documents/BIO/proj/Protein-Designing-With-Agents/cache/session_20250408-183717_112e8430/uniprot/A6NCS4.json
Data saved to /home/bharath-sooryaa-m/Documents/BIO/proj/Protein-Designing-With-Agents/cache/session_20250408-183717_112e8430/uniprot/O00409.json
Data saved to /home/bharath-sooryaa-m/Documents/BIO/proj/Protein-Designing-With-Agents/cache/session_20250408-183717_112e8430/uniprot/O00712.json
Starting to search PDB file from AlphaFoldDB...
PDB file downloaded successfully.
Starting to search PDB file from AlphaFoldDB...
PDB file downloaded successfully.
Startin



[1m[95m# Agent:[00m [1m[92mquery_assurance_agent[00m
[95m## Thought:[00m [92mThought:Still no results. It appears there might be a problem with the UniProt database or the way the GO terms are indexed. Let me try a broader search using the keyword "DNA-binding" without any GO terms.[00m
[95m## Using tool:[00m [92muniprot_fetch_tool[00m
[95m## Tool Input:[00m [92m
"{\"query\": \"(keyword:DNA-binding)\"}"[00m
[95m## Tool Output:[00m [92m
['A0A0C5B5G6', 'A1A519', 'A6NCS4', 'O00409', 'O00712']


You ONLY have access to the following tools, and should NEVER make up tools that are not listed here:

Tool Name: uniprot_fetch_tool
Tool Arguments: {'query': {'description': None, 'type': 'str'}}
Tool Description: This tool provides an acces to fetch data from UniProtKB using the UniProt REST API.
    the input has to be strictly a string 

IMPORTANT: Use the following format in your response:

```
Thought: you should always think about what to do
Action: the action to take,



[1m[95m# Agent:[00m [1m[92mquery_assurance_agent[00m
[95m## Final Answer:[00m [92m
The initial query (go:"GO:0003677") was suboptimal as it returned no results. A better query is "(keyword:DNA-binding)", which returns the following protein IDs: ['A0A0C5B5G6', 'A1A519', 'A6NCS4', 'O00409', 'O00712'].[00m




[1m[95m# Agent:[00m [1m[92mProtein Selection Expert[00m
[95m## Task:[00m [92mChoose the best protein from the provided list that performs the target function:I want a protein that can help in the DNA binding.. try to choose only limited proteins[00m




[1m[95m# Agent:[00m [1m[92mProtein Selection Expert[00m
[95m## Thought:[00m [92mI need to select the best protein from the list ['A0A0C5B5G6', 'A1A519', 'A6NCS4', 'O00409', 'O00712'] that is involved in DNA binding. To do this, I need to get the function information for each protein.[00m
[95m## Using tool:[00m [92mget_protein__function_info[00m
[95m## Tool Input:[00m [92m
"{\"protein_id\": \"A0A0C5B5G6\"}"[00m
[95m## Tool Output:[00m [92m
{'O00409': ['Acts as a transcriptional repressor. May be involved in DNA damage-inducible cell cycle arrests (checkpoints)'], 'O00712': ["Transcriptional activator of GFAP, essential for proper brain development (PubMed:30388402). Recognizes and binds the palindromic sequence 5'-TTGGCNNNNNGCCAA-3' present in viral and cellular promoters and in the origin of replication of adenovirus type 2. These proteins are individually capable of activating transcription and replication"], 'A0A0C5B5G6': ["Regulates insulin sensitivity and met



[1m[95m# Agent:[00m [1m[92mProtein Selection Expert[00m
[95m## Thought:[00m [92mThought: I have the function information for A0A0C5B5G6, A6NCS4, O00409 and O00712. Now I need the function information for A1A519.[00m
[95m## Using tool:[00m [92mget_protein__function_info[00m
[95m## Tool Input:[00m [92m
"{\"protein_id\": \"A1A519\"}"[00m
[95m## Tool Output:[00m [92m
{'O00409': ['Acts as a transcriptional repressor. May be involved in DNA damage-inducible cell cycle arrests (checkpoints)'], 'O00712': ["Transcriptional activator of GFAP, essential for proper brain development (PubMed:30388402). Recognizes and binds the palindromic sequence 5'-TTGGCNNNNNGCCAA-3' present in viral and cellular promoters and in the origin of replication of adenovirus type 2. These proteins are individually capable of activating transcription and replication"], 'A0A0C5B5G6': ["Regulates insulin sensitivity and metabolic homeostasis (PubMed:25738459, PubMed:33468709). Inhibits the folate cy



[1m[95m# Agent:[00m [1m[92mProtein Selection Expert[00m
[95m## Final Answer:[00m [92m
{"protein_id": "O00712", "function": "Transcriptional activator of GFAP, essential for proper brain development (PubMed:30388402). Recognizes and binds the palindromic sequence 5'-TTGGCNNNNNGCCAA-3' present in viral and cellular promoters and in the origin of replication of adenovirus type 2. These proteins are individually capable of activating transcription and replication"}[00m




[1m[95m# Agent:[00m [1m[92mProtein Scaffolding Specialist[00m
[95m## Task:[00m [92mAnalyze the provided UniProt features in JSON format, identifying all functional sites (active sites, binding sites, metal-binding sites, etc.). Interpret the user's desired protein function described in natural language and determine which motifs should be preserved as anchors and which regions masked for redesign. Identify the most appropriate scaffolding approach and provide clear reasoning for your decisions, including motif positions. unction to be preserved: I want a protein that can help in the DNA binding.. use the protein id from previous task[00m




LiteLLM.Info: If you need to debug this error, use `litellm._turn_on_debug()'.



ERROR:root:LiteLLM call failed: litellm.RateLimitError: litellm.RateLimitError: VertexAIException - {
  "error": {
    "code": 429,
    "message": "You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits.",
    "status": "RESOURCE_EXHAUSTED",
    "details": [
      {
        "@type": "type.googleapis.com/google.rpc.QuotaFailure",
        "violations": [
          {
            "quotaMetric": "generativelanguage.googleapis.com/generate_content_free_tier_requests",
            "quotaId": "GenerateRequestsPerMinutePerProjectPerModel-FreeTier",
            "quotaDimensions": {
              "location": "global",
              "model": "gemini-2.0-flash"
            },
            "quotaValue": "15"
          }
        ]
      },
      {
        "@type": "type.googleapis.com/google.rpc.Help",
        "links": [
          {
            "description": "Learn more about Gemini AP

[91m Error during LLM call: litellm.RateLimitError: litellm.RateLimitError: VertexAIException - {
  "error": {
    "code": 429,
    "message": "You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits.",
    "status": "RESOURCE_EXHAUSTED",
    "details": [
      {
        "@type": "type.googleapis.com/google.rpc.QuotaFailure",
        "violations": [
          {
            "quotaMetric": "generativelanguage.googleapis.com/generate_content_free_tier_requests",
            "quotaId": "GenerateRequestsPerMinutePerProjectPerModel-FreeTier",
            "quotaDimensions": {
              "location": "global",
              "model": "gemini-2.0-flash"
            },
            "quotaValue": "15"
          }
        ]
      },
      {
        "@type": "type.googleapis.com/google.rpc.Help",
        "links": [
          {
            "description": "Learn more about Gemini API q

RateLimitError: litellm.RateLimitError: litellm.RateLimitError: VertexAIException - {
  "error": {
    "code": 429,
    "message": "You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits.",
    "status": "RESOURCE_EXHAUSTED",
    "details": [
      {
        "@type": "type.googleapis.com/google.rpc.QuotaFailure",
        "violations": [
          {
            "quotaMetric": "generativelanguage.googleapis.com/generate_content_free_tier_requests",
            "quotaId": "GenerateRequestsPerMinutePerProjectPerModel-FreeTier",
            "quotaDimensions": {
              "location": "global",
              "model": "gemini-2.0-flash"
            },
            "quotaValue": "15"
          }
        ]
      },
      {
        "@type": "type.googleapis.com/google.rpc.Help",
        "links": [
          {
            "description": "Learn more about Gemini API quotas",
            "url": "https://ai.google.dev/gemini-api/docs/rate-limits"
          }
        ]
      },
      {
        "@type": "type.googleapis.com/google.rpc.RetryInfo",
        "retryDelay": "47s"
      }
    ]
  }
}


In [None]:
from rosseta.rosseta_run import PyRosettaWrapper
ros_obj = PyRosettaWrapper(current_path)
ros_obj.run(pdb_file="pdb/output.pdb")

PyRosettaWrapper initialized with directory: /home/bharath-sooryaa-m/Documents/BIO/proj/Protein-Designing-With-Agents/cache/session_20250408-182640_87724bea


NameError: name 'initialize_pyrosetta' is not defined