In [1]:
import subprocess   

# This script executes a command and saves the output to a specified file.
def ExecuteCmd(command_str, output_file='output_cmd/output.txt'):  
  
    with open(output_file, 'w') as f:
        result = subprocess.run(
            command_str,
            shell=True,
            stderr=f, 
            stdout=f,
            text=True,
            executable='/bin/bash'
        )
    return f"Output saved to {output_file}"

In [2]:
from dotenv import load_dotenv
import os
from google import genai

load_dotenv()
api_key = os.getenv("GEMINI_API_KEY")

client = genai.Client()

In [3]:
import json

# Change path to the task you like to run
file_path = "Datasets/network_analysis_2/na2.json"

# Load the JSON content
with open(file_path, "r") as f:
    data = json.load(f)

# Extract parts from the JSON
scenario = data["scenario"]
tools = data["tools"]
questions = data["questions"]
file=data["files"]



In [4]:
#generate the first command for each question
def Tool1(history):
    response = client.models.generate_content(
        model="gemini-2.5-flash",
        contents="\n".join([str(h) for h in history])
    )
    
    return response.text

# takes the command from tool1 and reads its output from a file,then generates a new command or returns the final answear
def tool2(history, command):
    with open('output_cmd/output.txt', 'r') as f:
        output = f.read()
    output = output[:1000]
    history.append({"previous command": [command], "command output": [output]})

    response = client.models.generate_content(
        model="gemini-2.5-flash",
        contents="\n".join([str(h) for h in history])
    )
    return response.text


In [5]:
from Prompts import prompter
i = -1
nb_questions = len(questions)
history = []
final_answers= ["Answer not found"] * nb_questions  # Initialize with default answer
previous_input = None
output1 = None
# Main control loop
while i < nb_questions:
    user_input = input("\nChoose:\n1. Next Question\n2. Analyze/Continue\n3. Exit\n>>> ")
    if previous_input is None and user_input not in ["1", "3"]:
        print("❌ You must start with option 1.")
        continue

    
    if user_input == previous_input and user_input == "1":
        print("❌ You already chose 1. You must alternate between 1 and 2.")
        continue

    previous_input = user_input  # Store the last input for validation
    
    # start the task or jump to the next question
    if user_input == "1":
        if i == nb_questions - 1:  # condition that stops the loop when all questions are answered
            print("✅ Task finished.")
            break
        i += 1
        prompt = prompter.prompt_tool1_zero_shot(i, scenario, tools, file, questions)    #first prompt for tool1
        history = [{"role": "system", "text": [prompt]}]  # resets the history
        output1 = Tool1(history)
        print(f"🛠️ Command generated for question {i+1} by tool 1: {output1}")
        ExecuteCmd(output1)  # Execute the command generated by Tool1
        
        prompt = prompter.prompt_tool2_zero_shot(i, scenario, tools, file, questions)    #first prompt for tool2
        history=[{"role": "system", "text": [prompt]}]  # initialized here so tool 2  has the prompt in the start of responding to a quest 
    
    # further analyzing the same question
    elif user_input == "2":
        output1 = tool2(history, output1)
        print(f"📜{history}")
        if "final answer:" in output1.lower():
            final_answers[i] = output1.lower().split("final answer:")[-1].strip()
            print(f"✅ Answer to the question {i+1}: {final_answers[i]}, click 1 to move to next question")
            
        else:
            print(f"🛠️ New command: {output1}, press 2 to execute it ")
            ExecuteCmd(output1)
        
    # exit the loop
    elif user_input == "3":
        print("⛔ Stopped by user.")
        break

    else:
        print("⚠️ Invalid input. Please enter 1, 2, or 3.")


🛠️ Command generated for question 1 by tool 1: tshark -r Datasets/network_analysis_2/BTLOPortScan.pcap -T fields -e ip.src | sort | uniq -c | sort -nr
📜[{'role': 'system', 'text': ['\nYou are assisting in solving a Blue Team Labs Online (BTLO)-style cybersecurity investigation challenge using Ubuntu terminal commands.\n\nScenario:\nThe SOC received an alert in their SIEM for ‘Local to Local Port Scanning’ where an internal private IP began scanning another internal system. Can you investigate and determine if this activity is malicious or not? You have been provided a PCAP.\n\nAvailable Tools:\ntshark, tcpdump\n\nTarget File(s):\nDatasets/network_analysis_2/BTLOPortScan.pcap\n\nQuestion:\nWhat is the IP responsible for conducting the port scan activity? (Format: X.X.X.X)\n\n\nInstructions:\nBased strictly on the previous commands and their outputs and the question:\n\n1. If the answer is **clearly** present in the last output, respond with:\n   final answer: <your answer>\n\n2. If the 

KeyboardInterrupt: 

In [6]:
print(final_answers)

['10.251.96.4', '1-277', 'tcp connect scan', 'Answer not found', 'login.php', 'Answer not found', 'Answer not found', 'Answer not found', 'Answer not found', 'Answer not found']


In [7]:
#saves all the answers to a file for evaluation
base_name = os.path.splitext(os.path.basename(file_path))[0]

output_file = os.path.join("Evaluation","tool_augmented","zero_shot", f"{base_name}.txt")

with open(output_file, 'w') as f:
    for answer in final_answers:
        f.write(answer + "\n")

print(f"Answers saved to {output_file}")

Answers saved to Evaluation/tool_augmented/zero_shot/na2.txt
