In [1]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import torch.nn.functional as F

# Loading model and tokenizer
model_name = 'mtpti5iD/redhat-docs-llm'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
model.eval()  # Set to inference mode

Error while fetching `HF_TOKEN` secret value from your vault: 'Requesting secret HF_TOKEN timed out. Secrets can only be fetched when running from the Colab UI.'.
You are not authenticated with the Hugging Face Hub in this notebook.
If the error persists, please let us know by opening an issue on GitHub (https://github.com/huggingface/huggingface_hub/issues/new).


tokenizer_config.json:   0%|          | 0.00/1.27k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/638 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [11]:
def predict_category(log_text):
    # Tokenize input
    inputs = tokenizer(log_text, return_tensors="pt", truncation=True, padding=True)

    # Get model outputs
    with torch.no_grad():
        outputs = model(**inputs)

    logits = outputs.logits
    probs = F.softmax(logits, dim=1)
    confidence, predicted_class = torch.max(probs, dim=1)

    # Defining label mapping
    label_map = {
        0: "Security",
        1: "Networking",
        2: "Storage",
        3: "System Administration",
        4: "Performance Tuning",
        5: "Kernel Tuning",
        6: "Package Management",
        7: "SELinux",
        8: "User Management",
        9: "Logging",
        10: "Networking Basics",
        80: "Unknown Category"
    }


    category = label_map.get(predicted_class.item(), "Unknown Category")

    return {
        "Predicted Answer": category,
        "Confidence": float(confidence.item())
    }

In [3]:
log_input = 'type=AVC msg=audit(1226874073.147:96): avc: denied { getattr } for pid=2465 comm="httpd" ...'
result = predict_category(log_input)
print(result)

{'Predicted Answer': 'Security', 'Confidence': 0.9999932050704956}


In [4]:
log_input = "kernel: nfs: server myhost.example.com not responding, still trying"
result = predict_category(log_input)
print(result)

{'Predicted Answer': 'Security', 'Confidence': 0.9999932050704956}


In [9]:
sample_logs = [
    # SELinux / Security
    'type=AVC msg=audit(1626874073.147:96): avc: denied { read } for pid=2465 comm="httpd" path="/etc/shadow" dev=dm-0 ino=284133 scontext=unconfined_u:system_r:httpd_t:s0 tcontext=system_u:object_r:shadow_t:s0 tclass=file',

    # Networking
    'May 27 11:34:22 server1 kernel: eth0: Link is Up - 1Gbps Full Duplex',

    # Storage
    'May 27 10:10:04 server1 kernel: EXT4-fs error (device sda1): ext4_find_entry:1451: inode #131586: comm bash: reading directory lblock 0',

    # System Administration
    'May 27 09:00:00 server1 crond[1244]: (root) CMD (/usr/lib64/sa/sa1 1 1)',

    # Performance Tuning
    'May 27 14:55:01 server1 kernel: CPU0: Temperature above threshold, cpu clock throttled',

    # Kernel Tuning
    'May 27 08:25:42 server1 kernel: sysctl: net.ipv4.tcp_syncookies = 1',

    # Package Management
    'May 27 15:15:01 server1 yum[4215]: Installed: httpd-2.4.6-97.el7.centos.x86_64',

    # SELinux
    'type=AVC msg=audit(1626874073.198:97): avc: denied { write } for pid=2900 comm="sshd" path="/root/.ssh/authorized_keys" dev=dm-0 ino=18351 scontext=system_u:system_r:sshd_t:s0 tcontext=unconfined_u:object_r:admin_home_t:s0 tclass=file',

    # User Management
    'May 27 12:00:33 server1 useradd[2142]: new user: name=alice, UID=1001, GID=1001, home=/home/alice, shell=/bin/bash',

    # Logging
    'May 27 03:10:12 server1 rsyslogd: [origin software="rsyslogd" swVersion="8.24.0"] start',

    # Networking Basics
    'May 27 13:20:55 server1 NetworkManager[833]: <info>  [1590592855.9337] device (eth0): state change: unmanaged -> unavailable',

    # Unknown (edge case)
    'May 27 17:45:00 server1 kernel: thermal throttling message with unrecognized device id'
]

for log_input in sample_logs:
  result = predict_category(log_input)
  print(result)


{'Confidence': 0.9999933242797852}
{'Confidence': 0.9999927282333374}
{'Confidence': 0.9999933242797852}
{'Confidence': 0.999993085861206}
{'Confidence': 0.9999923706054688}
{'Confidence': 0.999992847442627}
{'Confidence': 0.9999929666519165}
{'Confidence': 0.9999934434890747}
{'Confidence': 0.9999932050704956}
{'Confidence': 0.9999929666519165}
{'Confidence': 0.9999932050704956}
{'Confidence': 0.9999923706054688}


In [12]:
predict_category("type=AVC msg=audit(1626874073.198:97): avc: denied { write } for pid=2900 comm="sshd" path="/root/.ssh/authorized_keys" dev=dm-0 ino=18351 scontext=system_u:system_r:sshd_t:s0 tcontext=unconfined_u:object_r:admin_home_t:s0 tclass=file")

{'Predicted Answer': 'Security', 'Confidence': 0.9999308586120605}