In [1]:
from crewai_tools import ScrapeWebsiteTool
import os
# Disable CrewAI telemetry to avoid SSL certificate errors
os.environ["CREWAI_DISABLE_TELEMETRY"] = "true"

# Example: Scraping UniProt or other protein databases
uniprot_scraper = ScrapeWebsiteTool(website_url='https://en.wikipedia.org/wiki/Scaffold_protein')



In [2]:
from crewai.tools import tool
import json
@tool("get_protein__site_info")
def get_protein__site_info(protein_id: str) -> str:
    """fetches the protein site information from cache"""
    loc = r"D:\PROJECT\PROTEIN_MAKER\Protein-Designing-With-Agents\cache\uniprot\{protein_id}.json".format(protein_id=protein_id)
    with open(loc, 'r') as f:
        data = json.load(f)
        print("used")
        return str(data['results'][0]['features'])
    print("No data found")
    return None

KNOWLEDGE

In [3]:
from crewai.knowledge.source.string_knowledge_source import StringKnowledgeSource

loc = "D:\PROJECT\PROTEIN_MAKER\Protein-Designing-With-Agents\config\RF_diff_context.txt"
with open(loc, 'r') as f:
    data = f.read()
RF_Dif_manual = data
    

In [4]:
import os
from crewai import Agent, Task, Crew, LLM

# Set your Google API key as an environment variable
os.environ["GEMINI_API_KEY"] = "AIzaSyDQSOPPaW8BVWXny1ycBfO_tF9jJiYiuag"

# Use CrewAI's native LLM integration
gemini_llm = LLM(
    model="gemini/gemini-2.0-flash",  # Specify the exact model
    temperature=0.7
)

# Create your protein expert agent
protein_expert_agent = Agent(
    role="You are a Protein Scaffolding Specialist with expertise in computational protein design, "
         "specifically in identifying and manipulating functional sites for protein engineering. You analyze protein structural "
         "and functional data to create optimized scaffolds for desired biochemical functions.",
    goal="Your goal is to assist users in creating protein scaffolds by identifying which motifs"
         "should be masked or preserved based on UniProt features (active sites, binding sites, etc.)"
         "scaffolding for implementing the protein design workflow."
         "Utilize get_protein__site_info to fetch the protein site information from cache",
    backstory="You’ve spent years at the intersection of structural biology and computational "
              "design, working with cutting-edge deep learning approaches for protein engineering. Your "
              "expertise began with traditional side-chain and backbone grafting methods but evolved to "
              "incorporate modern neural network-based approaches like constrained hallucination and "
              "inpainting. You’ve successfully designed numerous functional proteins, including enzymes, "
              "receptor traps, and protein-binding scaffolds. Your knowledge spans both classical protein "
              "design principles and state-of-the-art deep learning methods for scaffolding "
              "functional sites."
,
    llm=gemini_llm,
    tool = [get_protein__site_info],
    tools_verbose=True,
    verbose=True
)

RF_Diffusion_Expert =Agent(
    role=   "You are an RF Diffusion Expert specializing in protein design through diffusion-based "
            "generative modeling. You provide detailed guidance on implementing RFdiffusion for "
            "protein scaffolding, with a specific focus on motif masking strategies and diffusion "
            "parameter optimization."
            ,
    goal=   "Your goal is to translate the protein scaffolding requirements identified by the "
            "previous agent into specific RF Diffusion implementation strategies, clearly "
            "specifying which motifs should be masked versus preserved, and providing technical "
            "details on diffusion parameters and constraints to achieve the desired protein "
            "scaffold."
            ,
    backstory=  "You've been at the forefront of developing and applying diffusion-based models "
                "for protein design since the inception of RFdiffusion. Your expertise spans the "
                "technical aspects of the diffusion process, including noise scheduling, "
                "conditioning strategies, and motif-preserving constraints. You've successfully "
                "applied RFdiffusion to design novel enzymes, protein binders, and functional "
                "scaffolds across diverse applications from therapeutics to industrial catalysts."
                "There are also given manual that you can use: {RF_Dif_manual}".format(RF_Dif_manual=RF_Dif_manual)
            ,
    llm=gemini_llm,
    verbose=True
)

LLM value is already an LLM object
LLM value is already an LLM object


In [5]:
from crewai import Task

protein_analysis_task = Task(
    description="You analyze the provided UniProt features in JSON format, identifying all functional sites (active sites, binding sites, metal-binding sites, etc.)."
                "You interpret the user's desired protein function described in natural language."
                "You determine which functional motifs should be preserved as anchors and which regions should be masked for redesign based on the relationship between existing features and the desired function."
                "You identify the most appropriate scaffolding approach (side-chain grafting, backbone grafting, constrained hallucination, or inpainting) based on the complexity of the functional site and desired modifications."
                "You provide clear reasoning for motif selection and masking decisions, referencing the specific functional requirements."
                "mention the position of the motif to be masked or preserved"
                "Given protein id:{protein}"
                "Function to be preserved:{function}"
                ,
    expected_output="You deliver a detailed report containing:"

                    "-Your analysis of the provided UniProt features and their relevance to the desired function."
                    "-Your identification of specific motifs to be preserved or masked, with justification."
                    "-Your recommended scaffolding approach, with an explanation."
                    "-Output should be in a json format"
                    
,
    agent=protein_expert_agent
)


RF_Diffusion_configuration_task = Task(
     description="You are provided with the necessary configuration details to run the rfdiffusion process. "
                "Your objective is to interpret the user's desired settings and generate a configuration script that includes all required parameters. "
                "This script should specify model settings, diffusion parameters, input file paths, output directory locations, and any optional parameters. "
,
    expected_output="Your output must be a valid terminal script containing rf diffusion script with all the necessary parameters like contigmap",
    agent=RF_Diffusion_Expert,
    context=[protein_analysis_task]
)


In [6]:
from crewai import Crew

protein_crew = Crew(
    agents=[protein_expert_agent, RF_Diffusion_Expert],
    tasks=[protein_analysis_task,RF_Diffusion_configuration_task],  # Use the task object directly, not protein_expert_agent.tasks
)

# Execute the crew workflow correctly
result = protein_crew.kickoff(inputs={"protein": "P49593","function":"DNA binding"})
print(result)


[1m[95m# Agent:[00m [1m[92mYou are a Protein Scaffolding Specialist with expertise in computational protein design, specifically in identifying and manipulating functional sites for protein engineering. You analyze protein structural and functional data to create optimized scaffolds for desired biochemical functions.[00m
[95m## Task:[00m [92mYou analyze the provided UniProt features in JSON format, identifying all functional sites (active sites, binding sites, metal-binding sites, etc.).You interpret the user's desired protein function described in natural language.You determine which functional motifs should be preserved as anchors and which regions should be masked for redesign based on the relationship between existing features and the desired function.You identify the most appropriate scaffolding approach (side-chain grafting, backbone grafting, constrained hallucination, or inpainting) based on the complexity of the functional site and desired modifications.You provide cl

In [7]:
print(result)

```bash
#!/bin/bash

./scripts/run_inference.py \
  'contigmap.contigs=[1-13/A14-25/26-99/A100-102/103-149/A150-153/154-200]' \
  'contigmap.inpaint_seq=[A100-102/A150-153]' \
  inference.input_pdb=input.pdb \
  inference.output_prefix=rfdiffusion_output/design \
  inference.num_designs=10
```


In [8]:
import os
# Disable CrewAI telemetry to avoid SSL certificate errors
os.environ["CREWAI_DISABLE_TELEMETRY"] = "true"
