In [None]:
%pip install google-generativeai



In [None]:
import warnings

warnings.filterwarnings("ignore")

In [None]:
import pandas as pd

# Load the log data from the CSV file
log_data = pd.read_csv('/content/HDFS_2k.log_structured.csv')

# Display the first few rows to understand the structure
print("Sample of the log data:")
print(log_data.head())

Sample of the log data:
   LineId   Date    Time  Pid Level                     Component  \
0       1  81109  203615  148  INFO  dfs.DataNode$PacketResponder   
1       2  81109  203807  222  INFO  dfs.DataNode$PacketResponder   
2       3  81109  204005   35  INFO              dfs.FSNamesystem   
3       4  81109  204015  308  INFO  dfs.DataNode$PacketResponder   
4       5  81109  204106  329  INFO  dfs.DataNode$PacketResponder   

                                             Content EventId  \
0  PacketResponder 1 for block blk_38865049064139...     E10   
1  PacketResponder 0 for block blk_-6952295868487...     E10   
2  BLOCK* NameSystem.addStoredBlock: blockMap upd...      E6   
3  PacketResponder 2 for block blk_82291938032499...     E10   
4  PacketResponder 2 for block blk_-6670958622368...     E10   

                                       EventTemplate  
0  PacketResponder <*> for block blk_<*> terminating  
1  PacketResponder <*> for block blk_<*> terminating  
2  BLOCK* N

In [None]:
# Identify anomalous log lines
anomalous_lines = log_data[log_data['Content'].str.contains("exception", case=False) | (log_data['Level'] == 'WARN')]

# Select a small subset for testing (to manage computation cost)
selected_anomalies = anomalous_lines.head(3)
print("Selected anomalous log lines:")
print(selected_anomalies[['LineId', 'Level', 'Content']])

Selected anomalous log lines:
    LineId Level                                            Content
77      78  WARN  10.251.30.85:50010:Got exception while serving...
78      79  WARN  10.251.126.255:50010:Got exception while servi...
80      81  WARN  10.251.123.132:50010:Got exception while servi...


In [None]:
def get_context(line_id, context_size=2):
    """
    Get surrounding log lines as context for a given LineId.
    Args:
        line_id (int): The LineId of the anomalous log.
        context_size (int): Number of lines before and after to include.
    Returns:
        DataFrame: Context log lines excluding the anomalous line.
    """
    idx = log_data[log_data['LineId'] == line_id].index[0]
    start_idx = max(0, idx - context_size)
    end_idx = min(len(log_data), idx + context_size + 1)
    context = log_data.iloc[start_idx:end_idx]
    return context[context['LineId'] != line_id]

# Test the function
test_anomaly = selected_anomalies.iloc[0]
context = get_context(test_anomaly['LineId'])
print(f"Context for LineId {test_anomaly['LineId']}:")
print(context[['LineId', 'Level', 'Content']])

Context for LineId 78:
    LineId Level                                            Content
75      76  INFO  10.251.39.192:50010 Served block blk_-53419927...
76      77  INFO  10.250.5.237:50010 Served block blk_3166960787...
78      79  WARN  10.251.126.255:50010:Got exception while servi...
79      80  INFO  10.251.71.68:50010 Served block blk_-279453387...


In [None]:
def prepare_prompt(anomalous_line, context_lines):
    """
    Prepare a prompt for the LLM with context and the anomalous log line.
    Args:
        anomalous_line (Series): The anomalous log row.
        context_lines (DataFrame): Context log rows.
    Returns:
        str: The formatted prompt.
    """
    context_str = "\n".join([f"[{row['LineId']}] {row['Content']}" for _, row in context_lines.iterrows()])
    anomalous_str = f"[{anomalous_line['LineId']}] {anomalous_line['Content']}"
    prompt = f"""Below are some software log lines, including one anomalous log line. Please explain why this anomaly might have occurred.

Normal log lines:
{context_str}

Anomalous log line:
{anomalous_str}

Explanation:"""
    return prompt

# Test prompt preparation
sample_prompt = prepare_prompt(test_anomaly, context)
print("Sample Prompt:")
print(sample_prompt)

Sample Prompt:
Below are some software log lines, including one anomalous log line. Please explain why this anomaly might have occurred.

Normal log lines:
[76] 10.251.39.192:50010 Served block blk_-5341992729755584578 to /10.251.39.192
[77] 10.250.5.237:50010 Served block blk_3166960787499091856 to /10.251.43.147
[79] 10.251.126.255:50010:Got exception while serving blk_8376667364205250596 to /10.251.91.159:
[80] 10.251.71.68:50010 Served block blk_-2794533871450434534 to /10.251.199.150

Anomalous log line:
[78] 10.251.30.85:50010:Got exception while serving blk_-2918118818249673980 to /10.251.90.64:

Explanation:


In [None]:
import google.generativeai as genai
import os
from google.colab import userdata

API_KEY = userdata.get('gemini')
genai.configure(api_key=API_KEY)

LLM = "gemini-2.0-flash"
model = genai.GenerativeModel(LLM)

In [None]:
# Test each model on each anomaly
for _, anomaly in selected_anomalies.iterrows():
    context = get_context(anomaly['LineId'])
    prompt = prepare_prompt(anomaly, context)
    messages = []
    messages.append(prompt)
    explanation = model.generate_content(messages).text
    print(f"\nAnomaly LineId: {anomaly['LineId']}")
    print(f"Content: {anomaly['Content']}")
    print(f"\nModel: gemini-2.0-flash") # Print the model name
    print(f"Explanation: {explanation}")


Anomaly LineId: 78
Content: 10.251.30.85:50010:Got exception while serving blk_-2918118818249673980 to /10.251.90.64:

Model: gemini-2.0-flash
Explanation: The anomalous log line is line [78]: `10.251.30.85:50010:Got exception while serving blk_-2918118818249673980 to /10.251.90.64:`

The anomaly lies in the fact that it logs an **exception** while serving a block.  The other lines show successful block serves. While a single exception doesn't necessarily indicate a systemic problem, it warrants investigation.

Here are a few possible reasons why this exception might have occurred:

*   **Network Issues:** A temporary network glitch between the server (10.251.30.85) and the client (10.251.90.64) could have interrupted the data transfer, causing the exception.  This could be packet loss, a transient routing issue, or a firewall momentarily blocking the connection.

*   **Disk I/O Error on the Server:** The server (10.251.30.85) might have encountered a problem reading the block `blk_-2

In [None]:
# Sample anomalous log line to test
anomalous_log = "10.251.30.85:50010:Got exception while serving blk_-2918118818249673980 to /10.251.90.64"

# Function to test and display results for each prompting technique
def test_prompt_techniques(log_line):
    print("Testing Prompt Engineering Techniques for Log Anomaly Explanation\n")

    # 1. Zero-shot Prompting
    print("### Zero-shot Prompting")
    zero_shot_prompt = f"Explain why this log line is considered an anomaly: {log_line}"
    messages = []
    messages.append(zero_shot_prompt)
    zero_shot_response = model.generate_content(messages).text
    print(f"Prompt:\n{zero_shot_prompt}\nResponse:\n{zero_shot_response}\n")

    # 2. Few-shot Prompting
    print("### Few-shot Prompting")
    few_shot_prompt = f"""
    Normal log: PacketResponder 1 for block blk_38865049064139660 terminating
    Explanation: This is a normal termination of a packet responder.

    Anomalous log: 10.251.30.85:50010:Got exception while serving blk_-2918118818249673980 to /10.251.90.64:
    Explanation: This log indicates an exception occurred while serving a block, which is abnormal.

    Now, explain this anomalous log: {log_line}
    """
    messages = []
    messages.append(few_shot_prompt)
    few_shot_response = model.generate_content(messages).text
    print(f"Prompt:\n{few_shot_prompt}\nResponse:\n{few_shot_response}\n")

    # 3. Chain-of-thought Prompting
    print("### Chain-of-thought Prompting")
    cot_prompt = f"""
    Let's think step by step:
    1. What is the normal behavior in logs?
    2. What is different in this anomalous log?
    3. Why might this difference indicate an anomaly?
    Explain the anomaly in this log: {log_line}
    """
    messages = []
    messages.append(cot_prompt)
    cot_response = model.generate_content(messages).text
    print(f"Prompt:\n{cot_prompt}\nResponse:\n{cot_response}\n")

    # 4. Prompt Chaining
    print("### Prompt Chaining")
    # First prompt: Identify key components
    prompt1 = f"Identify the key components in this log: {log_line}"
    messages = []
    messages.append(prompt1)
    response1 = model.generate_content(messages).text
    print(f"Prompt 1:\n{prompt1}\nResponse 1:\n{response1}\n")

    # Second prompt: Explain why it's anomalous
    prompt2 = f"Given the key components: {response1}, explain why this is an anomaly."
    messages.append(prompt2)
    chaining_response = model.generate_content(messages).text
    print(f"Prompt 2:\n{prompt2}\nResponse:\n{chaining_response}\n")

    # 5. Tree-of-thought Prompting
    print("### Tree-of-thought Prompting")
    tot_prompt = f"""
    Consider multiple hypotheses for why this log is anomalous:
    1. Network issue
    2. Block corruption
    3. Resource exhaustion
    For each hypothesis, provide evidence from the log and conclude which is most likely.
    Anomalous log: {log_line}
    """
    messages = []
    messages.append(tot_prompt)
    tot_response = model.generate_content(messages).text
    print(f"Prompt:\n{tot_prompt}\nResponse:\n{tot_response}\n")

# Run the test
test_prompt_techniques(anomalous_log)

Testing Prompt Engineering Techniques for Log Anomaly Explanation

### Zero-shot Prompting
Prompt:
Explain why this log line is considered an anomaly: 10.251.30.85:50010:Got exception while serving blk_-2918118818249673980 to /10.251.90.64
Response:
The log line "10.251.30.85:50010:Got exception while serving blk_-2918118818249673980 to /10.251.90.64" is considered an anomaly because it indicates an error condition (an "exception") during a specific data serving operation. Let's break down why this signifies a problem:

* **`10.251.30.85:50010`:** This is the IP address and port number of the server (likely a data node in a distributed system like Hadoop). This is the *source* of the log message.

* **`Got exception while serving`:** This is the key phrase indicating an error.  "Exception" almost always signifies an unexpected error or problem occurred during program execution. It means something went wrong in the process of serving data.

* **`blk_-2918118818249673980`:** This looks l