<a href="https://colab.research.google.com/github/oluwafemidiakhoa/MLprject/blob/main/Bioinformatics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Install necessary libraries:

In [None]:
!pip install jupyter groq biopython


Collecting jupyter
  Downloading jupyter-1.0.0-py2.py3-none-any.whl.metadata (995 bytes)
Collecting biopython
  Downloading biopython-1.84-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting qtconsole (from jupyter)
  Downloading qtconsole-5.5.2-py3-none-any.whl.metadata (5.1 kB)
Collecting qtpy>=2.4.0 (from qtconsole->jupyter)
  Downloading QtPy-2.4.1-py3-none-any.whl.metadata (12 kB)
Collecting jedi>=0.16 (from ipython>=5.0.0->ipykernel->jupyter)
  Using cached jedi-0.19.1-py2.py3-none-any.whl.metadata (22 kB)
Downloading jupyter-1.0.0-py2.py3-none-any.whl (2.7 kB)
Downloading biopython-1.84-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.2/3.2 MB[0m [31m75.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading qtconsole-5.5.2-py3-none-any.whl (123 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m123.4/123.4 kB[0m [31m11.3 MB/s[0m eta [36m0:00:00[0

## Initialize Required Libraries

In [None]:
import os
import json
from groq import Groq
from Bio.Seq import Seq
from Bio.SeqUtils import gc_fraction
from Bio import Entrez, SeqIO


## Set API Key and Model

In [None]:
api_key = "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"
client = Groq(api_key=api_key)
model = "llama3-groq-70b-8192-tool-use-preview"


## Define Functions

In [21]:
def analyze_sequence(sequence: str):
    """Return basic analysis of a genetic sequence"""
    seq_obj = Seq(sequence)
    return {
        'length': len(seq_obj),
        'gc_content': gc_fraction(seq_obj) * 100  # Convert to percentage
    }

def identify_gene_function(gene_id: str):
    """Fetch gene function from NCBI with error handling"""
    try:
        Entrez.email = "your.email@example.com"
        handle = Entrez.efetch(db="gene", id=gene_id, rettype="gb", retmode="text")
        record = SeqIO.read(handle, "genbank")
        handle.close()
        return {
            'gene_id': gene_id,
            'function': record.annotations.get('comment', 'No function available')
        }
    except ValueError:
        return {
            'gene_id': gene_id,
            'function': 'Gene function not found. Please verify the gene ID.'
        }

def predict_protein_structure(sequence: str):
    """Dummy function to simulate protein structure prediction"""
    return f"Predicted structure for protein with sequence: {sequence[:10]}... (truncated)"


## Define Messages and Tools

In [22]:
messages = [
    {"role": "system", "content": "You are a helpful assistant that provides bioinformatics analyses."},
    {
        "role": "user",
        "content": "Analyze the sequence 'ATGCGTACGTAGCTAGCTAGCTAGCTA', identify gene function for gene ID '7157', and predict protein structure for sequence 'MVHLTPEEKSAVTALWGKVNVDEVGGEAL'."
    },
]

tools = [
    {
        "type": "function",
        "function": {
            "name": "analyze_sequence",
            "description": "Analyzes a given genetic sequence",
            "parameters": {
                "type": "object",
                "properties": {"sequence": {"type": "string", "description": "The genetic sequence"}},
                "required": ["sequence"]
            }
        }
    },
    {
        "type": "function",
        "function": {
            "name": "identify_gene_function",
            "description": "Identifies gene function for a given gene ID",
            "parameters": {
                "type": "object",
                "properties": {"gene_id": {"type": "string", "description": "The gene ID"}},
                "required": ["gene_id"]
            }
        }
    },
    {
        "type": "function",
        "function": {
            "name": "predict_protein_structure",
            "description": "Predicts protein structure for a given sequence",
            "parameters": {
                "type": "object",
                "properties": {"sequence": {"type": "string", "description": "The protein sequence"}},
                "required": ["sequence"]
            }
        }
    }
]


## Run the Completion Request

In [23]:
response = client.chat.completions.create(
    model=model, messages=messages, tools=tools, tool_choice="auto", max_tokens=4096
)

response_message = response.choices[0].message


## Process Tool Calls

In [24]:
tool_calls = response_message.tool_calls

messages.append(
    {
        "role": "assistant",
        "tool_calls": [
            {"id": tool_call.id, "function": {"name": tool_call.function.name, "arguments": tool_call.function.arguments}, "type": tool_call.type}
            for tool_call in tool_calls
        ]
    }
)

available_functions = {
    "analyze_sequence": analyze_sequence,
    "identify_gene_function": identify_gene_function,
    "predict_protein_structure": predict_protein_structure
}

for tool_call in tool_calls:
    function_name = tool_call.function.name
    function_to_call = available_functions[function_name]
    function_args = json.loads(tool_call.function.arguments)
    function_response = function_to_call(**function_args)

    messages.append({"role": "tool", "content": json.dumps(function_response), "tool_call_id": tool_call.id})


## Display Final Results

In [25]:
response = client.chat.completions.create(
    model=model, messages=messages, tools=tools, tool_choice="auto", max_tokens=4096
)

print(response.choices[0].message.content)


The analysis of the sequence 'ATGCGTACGTAGCTAGCTAGCTAGCTA' shows a GC content of 48.15% and a length of 27 nucleotides. For the gene ID '7157', the function could not be found. Please verify the gene ID. The protein structure for the sequence 'MVHLTPEEKSAVTALWGKVNVDEVGGEAL' has been predicted.
