In [9]:
import subprocess
import zipfile
import os

def run_fastqc(input_file, output_dir="."):
    """
    Runs FastQC on the given input file.
    
    Parameters:
        input_file (str): Path to the input FASTQ file.
        output_dir (str, optional): Directory to save the FastQC report. If not passed, uses current directory.
        
    Returns:
        int: Return code from the FastQC command.
    """
    command = ["fastqc", input_file]
    if output_dir:
        command += ["-o", output_dir]
    result = subprocess.run(command)
    return result.returncode

def unzip_fastqc_report(zip_path, extract_to=None):
    """
    Unzips a FastQC .zip report file.
    
    Parameters:
        zip_path (str): Path to the FastQC .zip file.
        extract_to (str, optional): Directory to extract files to. If None, extracts to the same directory as the zip file.
    """
    if extract_to is None:
        extract_to = os.path.dirname(zip_path)
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_to)
    print(f"Extracted {zip_path} to {extract_to}")



In [10]:
def parse_basic_statistics(fastqc_data_path):
    """
    Parses the 'Basic Statistics' section from a FastQC fastqc_data.txt file.
    
    Parameters:
        fastqc_data_path (str): Path to the fastqc_data.txt file.
        
    Returns:
        dict: Dictionary of basic statistics.
    """
    stats = {}
    with open(fastqc_data_path, 'r') as f:
        lines = f.readlines()
    in_section = False
    for line in lines:
        if line.startswith(">>Basic Statistics"):
            in_section = True
            continue
        if in_section:
            if line.startswith(">>END_MODULE"):
                break
            if line.startswith("#") or line.strip() == "":
                continue
            if "\t" in line:
                key, value = line.strip().split("\t", 1)
                stats[key] = value
    return stats


In [None]:
#run_fastqc("1_control_18S_2019_minq7.fastq")

In [None]:
"""
# Example usage and testing of the FastQC utility functions

# 1. Run FastQC on a sample file (uncomment and set your file path)
# fastq_file = "1_control_18S_2019_minq7.fastq"
# run_fastqc(fastq_file)

# 2. Unzip the FastQC report (uncomment and set your zip path)
zip_path = "1_control_18S_2019_minq7_fastqc.zip"
unzip_fastqc_report(zip_path)

# 3. Parse Basic Statistics from fastqc_data.txt
fastqc_data_path = "1_control_18S_2019_minq7_fastqc/fastqc_data.txt"
basic_stats = parse_basic_statistics(fastqc_data_path)
print("Basic Statistics:")
for k, v in basic_stats.items():
    print(f"{k}: {v}")
"""


In [None]:
# Import libraries
from dotenv import load_dotenv
from agents import Runner, trace, function_tool, Agent, OpenAIChatCompletionsModel, SQLiteSession
from openai import AsyncOpenAI
import os, requests, time, re
from bs4 import BeautifulSoup
from datetime import datetime

# Load environment variables
load_dotenv(override=True)

# Define LLM model

# Define the OpenAI client pointing to a local LLM server
client = AsyncOpenAI(base_url="http://localhost:11434/v1")

# Define model using the local LLM server client
# use gpt-oss which is better for tool usage and reasoning even though it takes time
model = OpenAIChatCompletionsModel(model = "gpt-oss",openai_client= client)

#model = "gpt-4.1-mini"

fastqc_agent_instructions = (
        "You are a specialized agent for Fastqc. You are provided with tools to run fastqc, unzip the output, read the summary and provide output."
    )

fastqc_agent = Agent(
        name="fastqc_agent",
        instructions=fastqc_agent_instructions,
        model=model,
        tools=[function_tool(run_fastqc), function_tool(unzip_fastqc_report), function_tool(parse_basic_statistics)],
    )

query = "Fastqc quality of my input fastq file: 1_control_18S_2019_minq7.fastq."
result = await Runner.run(fastqc_agent, query)
result.final_output

SyntaxError: unterminated string literal (detected at line 24) (3046090527.py, line 24)