# Инференс

Тестим запуск модели в условиях, приближенных к боевым.

Установка зависимостей:

In [1]:
!pip install transformers huggingface_hub torch

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

Создаем класс модели (идентичный тому, что был на этапе обучения):

In [2]:
import torch
import torch.nn as nn
from transformers import BertModel

class IAST_BERT(nn.Module):
    def __init__(self, bert_model_name="bert-base-uncased", num_classes=10, dropout_prob=0.5):
        super(IAST_BERT, self).__init__()
        self.bert = BertModel.from_pretrained(bert_model_name)
        self.dropout = nn.Dropout(dropout_prob)
        self.fc_request = nn.Linear(768, 256)
        self.fc_control_flow = nn.Linear(768, 256)
        self.fc_response = nn.Linear(768, 256)
        self.fc_final = nn.Linear(256 * 3, num_classes)

    def forward(self, request_input, request_mask, control_flow_input, control_flow_mask, response_input, response_mask):
        # Request
        request_outputs = self.bert(input_ids=request_input, attention_mask=request_mask)
        request_features = torch.mean(request_outputs.last_hidden_state, dim=1)  # Среднее по последнему слою
        request_features = self.fc_request(self.dropout(request_features))

        # Control Flow
        control_flow_outputs = self.bert(input_ids=control_flow_input, attention_mask=control_flow_mask)
        control_flow_features = torch.mean(control_flow_outputs.last_hidden_state, dim=1)
        control_flow_features = self.fc_control_flow(self.dropout(control_flow_features))

        # Response
        response_outputs = self.bert(input_ids=response_input, attention_mask=response_mask)
        response_features = torch.mean(response_outputs.last_hidden_state, dim=1)
        response_features = self.fc_response(self.dropout(response_features))

        # Объединение
        combined_features = torch.cat([request_features, control_flow_features, response_features], dim=1)
        logits = self.fc_final(self.dropout(combined_features))

        return logits


Определяем устройство:

In [3]:
if torch.cuda.is_available():
    print(f"GPU доступен: {torch.cuda.get_device_name(0)}")
else:
    print("GPU недоступен, используется CPU")


GPU недоступен, используется CPU


P.S. посидим пока на CPU, не страшно.

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

Загружаем модель с HF и готовим к использованию:

In [8]:
import torch
import json
import requests
from transformers import AutoTokenizer

# Укажите репозиторий
repo_id = "l1ghth4t/immunity"

# Загрузка конфигурации
config_path = f"https://huggingface.co/{repo_id}/resolve/main/config.json"
config = json.loads(requests.get(config_path).text)

# Инициализация модели
model = IAST_BERT(
    bert_model_name=config["bert_model_name"],
    num_classes=10
)

# Загрузка весов
state_dict_path = f"https://huggingface.co/{repo_id}/resolve/main/pytorch_model.bin"
state_dict = torch.hub.load_state_dict_from_url(state_dict_path, map_location=device)
model.load_state_dict(state_dict)

# Загрузка токенизатора
tokenizer = AutoTokenizer.from_pretrained(repo_id)

# Модель готова
model.eval()

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Downloading: "https://huggingface.co/l1ghth4t/immunity/resolve/main/pytorch_model.bin" to /root/.cache/torch/hub/checkpoints/pytorch_model.bin
100%|██████████| 420M/420M [00:15<00:00, 28.5MB/s]


tokenizer_config.json:   0%|          | 0.00/1.27k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

IAST_BERT(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_aff

Устанавливаем метки:

In [24]:
label2id = {'CWE-16': 0,
  'CWE-352': 1,
  'CWE-400': 2,
  'CWE-502': 3,
  'CWE-639': 4,
  'CWE-77': 5,
  'CWE-79': 6,
  'CWE-89': 7,
  'CWE-918': 8,
  'Clean': 9
}
id2label = {idx: label for label, idx in label2id.items()}

Задаем контекст выполнения запроса - входные данные для модели:

In [40]:
context = {'context_id': 481, 'vulnerable': True, 'processing': False, 'request': {'url': 'http://www.morrow-wilson.com/tagspost.asp', 'method': 'POST', 'headers': {'HOST': '127.0.0.1:5000', 'USER_AGENT': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:134.0) Gecko/20100101 Firefox/134.0', 'ACCEPT': 'text/css,*/*;q=0.1', 'ACCEPT_LANGUAGE': 'en-US,en;q=0.5', 'ACCEPT_ENCODING': 'gzip, deflate, br, zstd', 'CONNECTION': 'keep-alive', 'REFERER': 'http://127.0.0.1:5000/', 'COOKIE': 'csrftoken=7GgBqVlBL6WuGdfu3e6EBXep8N5vFiVm; grafana_session=54c841e9cb2f4956a7c791efd284d120; grafana_session_expiry=1735995560; user_id=2; session_token=b3c634c91e1711c794704a031918a34b', 'SEC_FETCH_DEST': 'style', 'SEC_FETCH_MODE': 'no-cors', 'SEC_FETCH_SITE': 'same-origin', 'PRIORITY': 'u=2', 'Content-Type': 'application/json', 'User-Agent': 'Opera/9.26.(X11; Linux x86_64; quz-PE) Presto/2.9.164 Version/12.00'}, 'body': b'csrfmiddlewaretoken=6c6bee7e-5e2d-450c-b4dc-e356e8d898c8&newpass1=Rv6iz2qt&newpass2=7cFtCSRv'}, 'control_flow': {}, 'response': {'status_code': '400', 'headers': {'Content-Disposition': 'inline; filename=index.css', 'Content-Type': 'application/json', 'Content-Length': '1146', 'Last-Modified': 'Wed, 22 Jan 2025 23:29:59 GMT', 'Cache-Control': 'no-cache', 'ETag': '"1737588599.362596-1146-1135482071"', 'Date': 'Wed, 22 Jan 2025 23:36:36 GMT'}}}

Задаем функцию для предобработки входных данных:

In [41]:
def preprocess_context(json_example):
    # Обработка блока Request
    request_text = f"URL: {json_example['request']['url']}\n" \
                   f"Method: {json_example['request']['method']}\n" \
                   f"Headers: {json_example['request']['headers']}\n" \
                   f"Body: {json_example['request']['body']}"
    request_tokens = tokenizer(request_text, truncation=True, padding="max_length", max_length=512, return_tensors="pt")

    # Обработка блока Control Flow
    control_flow_text = "\n".join([f"{k}: {v}" for k, v in json_example['control_flow'].items()])
    control_flow_tokens = tokenizer(control_flow_text, truncation=True, padding="max_length", max_length=512, return_tensors="pt")

    # Обработка блока Response
    response_text = f"Status Code: {json_example['response']['status_code']}\n" \
                    f"Headers: {json_example['response']['headers']}"
    response_tokens = tokenizer(response_text, truncation=True, padding="max_length", max_length=512, return_tensors="pt")

    return {
        "request_input_ids": request_tokens["input_ids"].squeeze(0).to(device),
        "request_attention_mask": request_tokens["attention_mask"].squeeze(0).to(device),
        "control_flow_input_ids": control_flow_tokens["input_ids"].squeeze(0).to(device),
        "control_flow_attention_mask": control_flow_tokens["attention_mask"].squeeze(0).to(device),
        "response_input_ids": response_tokens["input_ids"].squeeze(0).to(device),
        "response_attention_mask": response_tokens["attention_mask"].squeeze(0).to(device),
    }

Предобработка контекста:

In [42]:
context_processed = preprocess_context(context)

Вызываем модель:

In [43]:
logits = model(
    request_input=context_processed["request_input_ids"].unsqueeze(0).to(device),
    request_mask=context_processed["request_attention_mask"].unsqueeze(0).to(device),
    control_flow_input=context_processed["control_flow_input_ids"].unsqueeze(0).to(device),
    control_flow_mask=context_processed["control_flow_attention_mask"].unsqueeze(0).to(device),
    response_input=context_processed["response_input_ids"].unsqueeze(0).to(device),
    response_mask=context_processed["response_attention_mask"].unsqueeze(0).to(device)
)
logits

tensor([[  1.3674,  -3.1265,  -7.4349,  -6.2527,   1.7794, -10.1412,  -2.3452,
          -4.5692,  -0.6608,  14.8613]], grad_fn=<AddmmBackward0>)

Интерпретация результата работы модели:

In [44]:
predicted_class = torch.argmax(logits, dim=1).item()
id2label[predicted_class]

'Clean'