In [1]:
from dotenv import load_dotenv
import os
from google import genai
from google.genai import types
import re
import subprocess

load_dotenv()
api_key = os.getenv("GEMINI_API_KEY")

client = genai.Client()

In [2]:
import json

# Path to your JSON file
file_path = "Datasets/file_analysis_2/fa2.json"

# Load the JSON content
with open(file_path, "r") as f:
    data = json.load(f)

# Extract parts from the JSON
scenario = data["scenario"]
tools = data["tools"]
questions = data["questions"]
file=data["files"]



In [3]:
def ExecuteCmd(command_str) -> str:
    """Execute a shell command and save its output to a file.

    Args:
        command_str: The shell command to execute as a string.

    Returns:
        A message indicating that the output was saved in a file.
    """
    output_file = 'output_cmd/output.txt'
    with open(output_file, 'w') as f:
        subprocess.run(
            command_str,
            shell=True,
            stderr=f,
            stdout=f,
            text=True,
            executable='/bin/bash'
        )
    return f"Output saved in a file"


# Tool 1: get the next question from a list
def next_question() -> str:
    """This tool reads and returns the next question and can be used to get the first question."""
    global i, questions
    i += 1
    return questions[i-1]

# Tool 2: read the command output file
def read_output() -> str:
    """This tool reads the file where command outputs are stored and returns its content(may be truncated if output is very long)."""
    output_file = 'output_cmd/output.txt'
    with open(output_file, 'r') as f:
        output = f.read()
    if len(output) > 500:
        output = output[:500] + "\n...[truncated]..."
    return output.strip()


In [None]:
# These function declarations are passed to the llm, then the llm decides which tool to call
ExecuteCmd_dec = {
    "name": "ExecuteCmd",
    "description": "Execute a shell command and save its output to a file.",
    "parameters": {
        "type": "object",
        "properties": {
            "command_str": {
                "type": "string",
                "description": "The shell command to execute."
            }
        },
        "required": ["command_str"]
    }
}

next_question_dec = {
    "name": "next_question",
    "description": "Reads and returns the next question and can be used to get the first question.",
    "parameters": {
        "type": "object",
        "properties": {},  # no parameters required
        "required": []
    }
}

read_output_dec = {
    "name": "read_output",
    "description": "Reads the file where command outputs are stored and returns its content. Output may be truncated if very long.",
    "parameters": {
        "type": "object",
        "properties": {},  # no parameters required
        "required": []
    }
}


In [None]:
""" You run in a loop of Thought, Action, Observation.
At the end of the loop you output an Answer.
Use Thought to describe your thoughts about the task you have been given.
Use Action to run one of the tools available to you using Function Calling.
Observation will be the result of running those actions.
Provide thought after each prompt in this format: Thought: <Your reasoning and thinking>

The most important rule STRICTLY follow:
- Use function calling only if you have output a Thought in the previous request
- Thought and Function calling should be in different interactions  """

In [21]:
prompt= f"""
You are tasked with solving a cybersecurity investigation challenge composed of multiple sequential questions. Use only Ubuntu terminal commands to analyze the provided file and answer each question in order.

Scenario:
{scenario}

Available Command-line tools you can use:
{', '.join(tools)}

Target File:
{', '.join(file)} 

1. When user says: "Take the next question and solve it by providing a linux command"
   - THOUGHT: Analyze the provided question. Explain your planned approach.
   - ACTION: Use next_question() to retrieve the question

2. After receiving question via Observation:
   - THOUGHT: Design a command to answer the question
   - ACTION: ExecuteCmd(command_str='...')

3. When user says: "Analyze the previous command's output and generate a new command or return the final answer"
   - THOUGHT: Evaluate the output. Determine if answer is present or if further analysis is needed
   - ACTION: EITHER use read_output() OR provide Answer: 

4. After reading output via Observation:
   - THOUGHT: Verify if output contains the answer
   - ACTION: EITHER ExecuteCmd() for refinement OR Answer: 

RULES (MUST OBEY):
1. ALWAYS output Thought before Action in separate interactions
2. NEVER combine Thought and Action in same response
3. For multi-step analysis: chain commands using pipes (|)
4. When answer is found: Output immediately in "Answer: " format




Example session:

Task by the user: Take the next question and solve it by providing a linux command

Thought: I need to retrieve the question
Action: next question retrival

You will be called again with this:

Observation: the next question

Thought: I need to generate a command to solve the question 
Action: Execute command

You will be called again with this:

Task by the user: Analyze the previous command's output and generate a new command or return the final answer

Thought: i need to see the output of the command
Action: read the output

You will be called again with this:

Observation: the output of the command

Thought: I need to check if the answer is in the output, if not i will give another command 
Action: Execute command
OR
Answer: answer to the question

You will be called again with this:
Task by the user: Take the next question and solve it by providing a linux command """.strip()
print (prompt)

You are tasked with solving a cybersecurity investigation challenge composed of multiple sequential questions. Use only Ubuntu terminal commands to analyze the provided file and answer each question in order.

Scenario:
A suspicious Pwershell script in a .txt file was found on one of our endpoints, can you work out what it does?

Available Command-line tools you can use:
grep, awk, sed, sha256sum, base64, cut, sort, wc, head, tail, find, strings, file, stat

Target File:
Datasets/file_analysis_2/HDWallpaperEngine.txt 

1. When user says: "Take the next question and solve it by providing a linux command"
   - THOUGHT: Analyze the provided question. Explain your planned approach.
   - ACTION: Use next_question() to retrieve the question

2. After receiving question via Observation:
   - THOUGHT: Design a command to answer the question
   - ACTION: ExecuteCmd(command_str='...')

3. When user says: "Analyze the previous command's output and generate a new command or return the final answer

In [25]:
#create a chat session
chat = client.chats.create(
            #history=[],
            model="gemini-2.5-flash",
            config=types.GenerateContentConfig(
                    # here are the external functions' declarations that the LLM will call to solve the problem
                    tools=[types.Tool(function_declarations=[ExecuteCmd_dec,next_question_dec,read_output_dec])], 
                    system_instruction=prompt #system prompt
                    
                ))
i=0
answer_list=["answer not found"]*len(questions)

while i<len(questions):
        
        # we limit the yser to enter "1", "2" or "3" to choose what to tell the model
        user_input = int(input("Enter 1 to solve the next question or 2 to further analyse the question: "))
        if user_input == 1:
            query = "Take the next question and solve it by providing a linux command always generate thought before calling"
            
        elif user_input == 2:
            query = "Analyze the previous command's output and generate a new command or return the final answer"

        elif user_input ==3:break    
        else:
            print("Invalid input. Please enter 1 or 2.")

        #loop 
        j=0 
        MaxAttempts=4
        next_input=query
        responselist=[]
        while j<MaxAttempts:
            j+=1
            #this is the request that triggers gemini
            response= chat.send_message(next_input)
            parts=response.candidates[0].content.parts[0]
            # Check for a function call
            if parts.function_call:
                responselist.append(response.candidates)
                tool_call = parts.function_call
                args_str = ", ".join(f"{k}={repr(v)}" for k, v in tool_call.args.items())
                print(f"Action: {tool_call.name}({args_str})")

                if tool_call.name == "ExecuteCmd":
                    result = ExecuteCmd(**tool_call.args)#there is break here
                    print(f"Observation: {result}")
                    break
                elif tool_call.name== "next_question":
                    result = next_question()
                else: result= read_output()

                next_input=f"Observation: {result}"
                print(next_input)
                
            else:

                print(parts.text)                
                match = re.search(r"(?i)answer\s*:\s*(.*)", parts.text)
                if match:
                    answer_list[i-1]=match.group(1).strip()  # Extract text after 'Answer:'
                    break

Action: next_question()
Observation: What is the SHA256 hash value for the PowerShell script file?
Action: ExecuteCmd(command_str='sha256sum Datasets/file_analysis_2/HDWallpaperEngine.txt')
Observation: Output saved in a file
Action: read_output()
Observation: e0b7a2ad2320ac32c262aeb6fe2c6c0d75449c6e34d0d18a531157c827b9754e  Datasets/file_analysis_2/HDWallpaperEngine.txt
Answer: e0b7a2ad2320ac32c262aeb6fe2c6c0d75449c6e34d0d18a531157c827b9754e
Action: next_question()
Observation: What email address is used to send and receive emails? (Format: Mailbox@domain.tld)
Action: ExecuteCmd(command_str="grep -oE '\\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Z|a-z]{2,}\\b' Datasets/file_analysis_2/HDWallpaperEngine.txt")
Observation: Output saved in a file
Action: read_output()
Observation: chaudhariparth454@gmail.com
chaudhariparth454@gmail.com
Answer: chaudhariparth454@gmail.com
Action: next_question()
Observation: What is the password for this email account?
Action: ExecuteCmd(command_str="grep -iE

In [27]:
print(answer_list)

['e0b7a2ad2320ac32c262aeb6fe2c6c0d75449c6e34d0d18a531157c827b9754e', 'chaudhariparth454@gmail.com', 'yjghfdafsd5464562!', '587', 'user32.dll', 'answer not found']
