In [None]:
import warnings

warnings.filterwarnings("ignore")

In [None]:
import pandas as pd

# Load the log data from the CSV file
log_data = pd.read_csv('/content/HDFS_2k.log_structured.csv')

# Display the first few rows to understand the structure
print("Sample of the log data:")
print(log_data.head())

Sample of the log data:
   LineId   Date    Time  Pid Level                     Component  \
0       1  81109  203615  148  INFO  dfs.DataNode$PacketResponder   
1       2  81109  203807  222  INFO  dfs.DataNode$PacketResponder   
2       3  81109  204005   35  INFO              dfs.FSNamesystem   
3       4  81109  204015  308  INFO  dfs.DataNode$PacketResponder   
4       5  81109  204106  329  INFO  dfs.DataNode$PacketResponder   

                                             Content EventId  \
0  PacketResponder 1 for block blk_38865049064139...     E10   
1  PacketResponder 0 for block blk_-6952295868487...     E10   
2  BLOCK* NameSystem.addStoredBlock: blockMap upd...      E6   
3  PacketResponder 2 for block blk_82291938032499...     E10   
4  PacketResponder 2 for block blk_-6670958622368...     E10   

                                       EventTemplate  
0  PacketResponder <*> for block blk_<*> terminating  
1  PacketResponder <*> for block blk_<*> terminating  
2  BLOCK* N

In [None]:
# Identify anomalous log lines
anomalous_lines = log_data[log_data['Content'].str.contains("exception", case=False) | (log_data['Level'] == 'WARN')]

# Select a small subset for testing (to manage computation cost)
selected_anomalies = anomalous_lines.head(3)
print("Selected anomalous log lines:")
print(selected_anomalies[['LineId', 'Level', 'Content']])

Selected anomalous log lines:
    LineId Level                                            Content
77      78  WARN  10.251.30.85:50010:Got exception while serving...
78      79  WARN  10.251.126.255:50010:Got exception while servi...
80      81  WARN  10.251.123.132:50010:Got exception while servi...


In [None]:
def get_context(line_id, context_size=2):
    """
    Get surrounding log lines as context for a given LineId.
    Args:
        line_id (int): The LineId of the anomalous log.
        context_size (int): Number of lines before and after to include.
    Returns:
        DataFrame: Context log lines excluding the anomalous line.
    """
    idx = log_data[log_data['LineId'] == line_id].index[0]
    start_idx = max(0, idx - context_size)
    end_idx = min(len(log_data), idx + context_size + 1)
    context = log_data.iloc[start_idx:end_idx]
    return context[context['LineId'] != line_id]

# Test the function
test_anomaly = selected_anomalies.iloc[0]
context = get_context(test_anomaly['LineId'])
print(f"Context for LineId {test_anomaly['LineId']}:")
print(context[['LineId', 'Level', 'Content']])

Context for LineId 78:
    LineId Level                                            Content
75      76  INFO  10.251.39.192:50010 Served block blk_-53419927...
76      77  INFO  10.250.5.237:50010 Served block blk_3166960787...
78      79  WARN  10.251.126.255:50010:Got exception while servi...
79      80  INFO  10.251.71.68:50010 Served block blk_-279453387...


In [None]:
def prepare_prompt(anomalous_line, context_lines):
    """
    Prepare a prompt for the LLM with context and the anomalous log line.
    Args:
        anomalous_line (Series): The anomalous log row.
        context_lines (DataFrame): Context log rows.
    Returns:
        str: The formatted prompt.
    """
    context_str = "\n".join([f"[{row['LineId']}] {row['Content']}" for _, row in context_lines.iterrows()])
    anomalous_str = f"[{anomalous_line['LineId']}] {anomalous_line['Content']}"
    prompt = f"""Below are some software log lines, including one anomalous log line. Please explain why this anomaly might have occurred.

Normal log lines:
{context_str}

Anomalous log line:
{anomalous_str}

Explanation:"""
    return prompt

# Test prompt preparation
sample_prompt = prepare_prompt(test_anomaly, context)
print("Sample Prompt:")
print(sample_prompt)

Sample Prompt:
Below are some software log lines, including one anomalous log line. Please explain why this anomaly might have occurred.

Normal log lines:
[76] 10.251.39.192:50010 Served block blk_-5341992729755584578 to /10.251.39.192
[77] 10.250.5.237:50010 Served block blk_3166960787499091856 to /10.251.43.147
[79] 10.251.126.255:50010:Got exception while serving blk_8376667364205250596 to /10.251.91.159:
[80] 10.251.71.68:50010 Served block blk_-2794533871450434534 to /10.251.199.150

Anomalous log line:
[78] 10.251.30.85:50010:Got exception while serving blk_-2918118818249673980 to /10.251.90.64:

Explanation:


In [None]:
from google.colab import userdata
from huggingface_hub import login
import torch

# Log in using the API key
hf_api_key = userdata.get('HF_TOKEN')
login(token=hf_api_key)

In [None]:
# Load model directly
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3")
model = AutoModelForCausalLM.from_pretrained(
    "mistralai/Mistral-7B-Instruct-v0.3",
    device_map="cuda",
    torch_dtype="auto",
    trust_remote_code=True,
)



tokenizer_config.json:   0%|          | 0.00/141k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/587k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.96M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/601 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.55G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

In [None]:
def generate_explanation(model_name, prompt):
    """
    Generate an explanation using the specified LLM.
    Args:
        model_name (str): Name of the LLM model.
        prompt (str): Input prompt for the model.
    Returns:
        str: Generated explanation or error message.
    """
    try:
        generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
        generation_args = {"max_new_tokens": 500, "return_full_text": False, "temperature": 0.0, "do_sample": False,}
        response = generator(prompt, **generation_args)
        return response[0]['generated_text']
    except Exception as e:
        return f"Error with model {model_name}: {str(e)}"

In [None]:
# Test each model on each anomaly
for _, anomaly in selected_anomalies.iterrows():
    context = get_context(anomaly['LineId'])
    prompt = prepare_prompt(anomaly, context)
    print(f"\nAnomaly LineId: {anomaly['LineId']}")
    print(f"Content: {anomaly['Content']}")
    print(f"\nModel: mistralai/Mistral-7B-Instruct-v0.3") # Print the model name
    explanation = generate_explanation("mistralai/Mistral-7B-Instruct-v0.3", prompt) # Pass model name to the function
    print(f"Explanation: {explanation}")

Device set to use cuda
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



Anomaly LineId: 78
Content: 10.251.30.85:50010:Got exception while serving blk_-2918118818249673980 to /10.251.90.64:

Model: mistralai/Mistral-7B-Instruct-v0.3


Device set to use cuda
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Explanation: 
The anomaly in this log line is the negative block number (blk_-2918118818249673980). In the normal log lines, the block numbers are all positive. The negative block number might have occurred due to a programming error, data corruption, or an unusual network event that caused the block to be requested with a negative number. It's also possible that the block number is being represented in a different format or base than the other log lines. Further investigation would be needed to determine the exact cause of the anomaly.

Anomaly LineId: 79
Content: 10.251.126.255:50010:Got exception while serving blk_8376667364205250596 to /10.251.91.159:

Model: mistralai/Mistral-7B-Instruct-v0.3


Device set to use cuda
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Explanation: 
The anomaly in this log line is the negative block number (blk_-2918118818249673980, blk_-2794533871450434534, blk_8376667364205250596). In the normal log lines, the block numbers are positive integers. The negative block numbers might have occurred due to a programming error or a data corruption issue. It could also be a result of an incorrect configuration or a misinterpretation of the block number format. Further investigation would be needed to determine the exact cause of the anomaly.

Anomaly LineId: 81
Content: 10.251.123.132:50010:Got exception while serving blk_3763728533434719668 to /10.251.38.214:

Model: mistralai/Mistral-7B-Instruct-v0.3
Explanation: 
The anomaly in this log line is the block number (blk_3763728533434719668) which is much larger than the other block numbers in the normal log lines. This could indicate a problem with the block being served, such as a corrupted block or a block that is too large to be served. It could also indicate a problem wi