In [19]:
from dotenv import load_dotenv
import os
from google import genai
from google.genai import types
import re
import subprocess

load_dotenv()
api_key = os.getenv("GEMINI_API_KEY")

client = genai.Client()

In [20]:
import json

# Path to your JSON file
file_path = "Datasets/file_analysis_2/fa2.json"

# Load the JSON content
with open(file_path, "r") as f:
    data = json.load(f)

# Extract parts from the JSON
scenario = data["scenario"]
tools = data["tools"]
questions = data["questions"]
file=data["files"]



In [21]:
def ExecuteCmd(command_str) -> str:
    """Execute a shell command and save its output to a file.

    Args:
        command_str: The shell command to execute as a string.

    Returns:
        A message indicating that the output was saved in a file.
    """
    output_file = 'output_cmd/output.txt'
    with open(output_file, 'w') as f:
        subprocess.run(
            command_str,
            shell=True,
            stderr=f,
            stdout=f,
            text=True,
            executable='/bin/bash'
        )
    return f"Output saved in a file"


# Tool 1: get the next question from a list
def next_question() -> str:
    """This tool reads and returns the next question and can be used to get the first question."""
    global i, questions
    i += 1
    return questions[i-1]

# Tool 2: read the command output file
def read_output() -> str:
    """This tool reads the file where command outputs are stored and returns its content(may be truncated if output is very long)."""
    output_file = 'output_cmd/output.txt'
    with open(output_file, 'r') as f:
        output = f.read()
    if len(output) > 500:
        output = output[:500] + "\n...[truncated]..."
    return output.strip()


In [25]:
prompt= f"""
You are tasked with solving a cybersecurity investigation challenge composed of multiple sequential questions. Use only Ubuntu terminal commands to analyze the provided file and answer each question in order.

Scenario:
{scenario}

Available Command-line tools you can use:
{', '.join(tools)}

Target File:
{', '.join(file)} 

You run in a loop of Thought, Action,  Observation:

- Use Thought to describe your reasoning about the task you have been given.
- Provide thought after each prompt in this format: Thought: <Your reasoning and thinking>
- Use Action to run one of the actions available to you- then output.
- Observation will be the result of running those actions.
- At the end of the loop you output an Answer.

RULE(must obey):
- ALWAYS output Thought before Action in separate interactions
- Observation will be given to you, you only output Thought and Action


You have access to these tools: 
    
    ExecuteCmd:
    e.g. ExecuteCmd("ls -l /home/user")
    Executes a shell command and saves its output to a file.
    Takes command as an argument. 
    The output may be truncated if too long. Returns a confirmation message.

    next_question():
    Doesn't take arguments
    Retrieves the next question from the list of questions in order. 
    This can also be used to get the first question if none have been retrieved yet.

    read_output():
    Doesn't take arguments
    Reads the most recent command output from 'output_cmd/output.txt'. 
    If the content is very long, it returns only the first 500 characters followed by "...[truncated]...".

Example loop:

You will be called with task: Take the next question and solve it by providing a linux command

Thought: I don’t have the question yet, so I need to retrieve it first.(REQUIRED)

Action: next_question()

You will be called again with this:
Observation: "What is the value of the secret token stored in config.txt?"

Thought: I can use cat to read the file and find the secret token value.
Action: ExecuteCmd("cat config.txt")

You will be called again Task by the user: Analyze the previous command's output and generate a new command or return the final answer

Thought: I need to see the contents of the file to extract the secret token.(REQUIRED)
Action: read_output()

You will be called again with this:

Observation: "db_user=admin  db_pass=SuperSecure123  secret_token=FLAG-9f3a1c2b7e"

Thought: I found the secret token value in the file — I can now return the final answer no need to provide another command.
Answer: FLAG-9f3a1c2b7e

You will be called again with this:
Task by the user: Take the next question and solve it by providing a linux command 
""".strip()
print (prompt)

You are tasked with solving a cybersecurity investigation challenge composed of multiple sequential questions. Use only Ubuntu terminal commands to analyze the provided file and answer each question in order.

Scenario:
A suspicious Pwershell script in a .txt file was found on one of our endpoints, can you work out what it does?

Available Command-line tools you can use:
grep, cat, awk, sed, sha256sum, base64, cut, sort, head, tail, find, strings

Target File:
Datasets/file_analysis_2/HDWallpaperEngine.txt 

You run in a loop of Thought, Action,  Observation:

- Use Thought to describe your reasoning about the task you have been given.
- Provide thought after each prompt in this format: Thought: <Your reasoning and thinking>
- Use Action to run one of the actions available to you- then output.
- Observation will be the result of running those actions.
- At the end of the loop you output an Answer.

RULE(must obey):
- ALWAYS output Thought before Action in separate interactions
- Observ

In [27]:
#create a chat session
chat = client.chats.create(
    history=[],
    model="gemini-2.5-flash",
    config=types.GenerateContentConfig(
        thinking_config=types.ThinkingConfig(
            thinking_budget=-1
        ),
        # here are the external functions' declarations that the LLM will call to solve the problem
        #tools=[types.Tool(function_declarations=[ExecuteCmd_dec, next_question_dec, read_output_dec])],
        system_instruction=prompt  # system prompt
    )
)
i=0
answer_list=["answer not found"]*len(questions)

while i<len(questions):
        
        # we limit the yser to enter "1", "2" or "3" to choose what to tell the model
        user_input = int(input("Enter 1 to solve the next question or 2 to further analyse the question: "))
        if user_input == 1:
            query = "Take the next question and solve it by providing a linux command "
            
        elif user_input == 2:
            query = "Analyze the previous command's output and generate a new command or return the final answer"

        elif user_input ==3:break    
        else:
            print("Invalid input. Please enter 1 or 2.")

        #loop 
        j=0 
        MaxAttempts=10
        next_input=query
        available_tools=["ExecuteCmd","next_question","read_output"]
        while j<MaxAttempts:
            j+=1
            #this is the request that triggers gemini
            response= chat.send_message(message=next_input)

            parts=response.candidates[0].content.parts[0]
            
            result=parts.text
            print(result)
            if  "Action" in result:
                
                action = re.split(r"(?i)action\s*:\s*", result)[-1].strip()
                tool_name = re.match(r"([a-zA-Z_][a-zA-Z0-9_]*)\s*\(", action).group(1)        
                
                if tool_name in available_tools: 
                     # tools is a dict { "ExecuteCmd": ExecuteCmd, ... }
                    result_tool = eval(action)
                    next_input = f"Observation: {result_tool}"
                    print(next_input)
                    if tool_name=="ExecuteCmd" and i<len(questions):
                       break
                    else: continue
                      
                else:
                    next_input = "Observation: Tool not found"
                    print(next_input)
                    
             
            match = re.search(r"(?i)answer\s*:\s*(.*)", result)
            if match:
                answer_list[i-1]=match.group(1).strip()  # Extract text after 'Answer:'
            
                break

Action: next_question()
Observation: What is the SHA256 hash value for the PowerShell script file?
Thought: The question asks for the SHA256 hash of the PowerShell script file. The `sha256sum` command is perfect for this. I need to specify the path to the file.
Action: ExecuteCmd("sha256sum Datasets/file_analysis_2/HDWallpaperEngine.txt")
Observation: Output saved in a file
Thought: I have executed the `sha256sum` command. Now I need to read its output to get the hash value and provide it as the answer.
Action: read_output()
Observation: e0b7a2ad2320ac32c262aeb6fe2c6c0d75449c6e34d0d18a531157c827b9754e  Datasets/file_analysis_2/HDWallpaperEngine.txt
Thought: I have the SHA256 hash value from the `read_output()` command. I can now provide the answer.
Answer: e0b7a2ad2320ac32c262aeb6fe2c6c0d75449c6e34d0d18a531157c827b9754e
Action: next_question()
Observation: What email address is used to send and receive emails? (Format: Mailbox@domain.tld)
Thought: I need to find an email address in the

In [45]:
#eval("ExecuteCmd('ip a')")
result="Action: next_question()"
action = re.split(r"(?i)action\s*:\s*", result)[-1].strip()
tool_name = re.match(r"([a-zA-Z_][a-zA-Z0-9_]*)\s*\(", action).group(1) 
print(tool_name)

next_question
