In [3]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import transformers
import torch

In [4]:
model_id = "/data/common/models/meta-llama/CodeLlama-70b-Instruct-hf"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
   model_id,
   torch_dtype=torch.float16,
   device_map="auto",
)


Loading checkpoint shards:   0%|          | 0/29 [00:00<?, ?it/s]

In [5]:
rule_generation_prompt = f"""

    You are a cybersecurity expert tasked with performing Snort rule generation for a given Cyber Threat Intelligence (CTI).
    There is a sample task input and output provided below.
    
    Sample CTI Input and corresponding Snort Output:

    CTI Input:
        
      Title: Detection of QAZ Worm Client Login Activity over TCP Port 7597

      Threat Category: Malware – Backdoor

      Threat Name: QAZ Worm

      Detection Summary:

      This signature is designed to detect network traffic associated with the QAZ Worm, specifically its client login activity. The worm exhibits characteristic behavior by initiating a connection and transmitting a unique identifier string (qazwsx.hsq) to a remote server over TCP port 7597. This communication typically indicates the presence of a backdoor that allows unauthorized access to infected systems.

      Rule Metadata
      Classification: Misc Activity

      Ruleset: Community

      Rule Logic Breakdown
      Alert Type: alert

      Protocol: tcp

      Source IP: $EXTERNAL_NET (any IP address outside the local trusted network)

      Source Port: any

      Destination IP: $HOME_NET (any IP address inside the local trusted network)

      Destination Port: 7597 (known port used by the QAZ worm)

      Flow: to_server, established
      (Traffic must be flowing to a server and part of an established connection)

      Content Match: "qazwsx.hsq"
      (String in the payload that identifies the worm’s presence)

      Message: "MALWARE-BACKDOOR QAZ Worm Client Login access"

      Technical Details
      Port 7597 is not a standard well-known port and is leveraged by the QAZ Worm for backdoor communications.

      The content string "qazwsx.hsq" is a unique identifier used by the worm’s client when connecting to a command-and-control server or to another infected host.

      Detection relies on the presence of this string within an established TCP session directed to a host on the internal network.

      Indicators of Compromise (IOCs)
      String Pattern: qazwsx.hsq

      Destination Port: 7597/tcp

      Recommended Actions
      Block or restrict traffic on port 7597 at the perimeter firewall.

      Investigate any internal systems that initiate or receive such connections.

      Perform malware scanning and forensic analysis on potentially compromised hosts.

      Update endpoint and network defense signatures to ensure coverage against this and similar threats.
      
    Snort Output:
        
      alert tcp $EXTERNAL_NET any -> $HOME_NET 7597 ( msg:"MALWARE-BACKDOOR QAZ Worm Client Login access"; flow:to_server,established; content:"qazwsx.hsq"; metadata:ruleset community; classtype:misc-activity; sid:108; rev:12; )


    Generate Snort from the provided CTI. Do not include anything that is not provided.
    Do not print anything like sure here is the CTI or anything else. Only print the CTI. 

    """

In [6]:
user_input = """
    CTI Input: 
    
        Threat Category: Reconnaissance, File Transfer Protocol Abuse

        Threat  Description:  The  SNORT  rule  sid:1000018  is  designed  to  detect  attempts  to  initiate  file

        transfers using the Trivial File Transfer Protocol (TFTP) over UDP port 69.

        Indicators of Compromise (IoCs):

        Protocol: UDP

        Destination Port: 69

        Traffic Direction: Any source to destination on port 69

        Payload containing TFTP read/write request (e.g., RRQ or WRQ)

        Detection Mechanism:

        This  rule  detects  any  UDP  packet  sent  to  port  69  regardless  of  source  port  or  IP.  Since  TFTP

        operates over UDP and does not require a connection setup like TCP, the rule listens for potential

        read or write requests typically found at the beginning of a TFTP session.

        Possible Attribution & Use Cases:

        May indicate lateral movement or payload delivery during exploitation or post-exploitation phases.

        Could be used by attackers to exfiltrate data from compromised systems.

        Recommended Actions:

        Determine  if  the  TFTP  usage  is  expected  in  the  environment.  Investigate  unusual  sources  or

        destinations.

        Block or limit access to UDP port 69 on firewall and network perimeter devices.

        Author & Attribution:

        Rule Author: community

        Rule Source: Custom Rule

        SID: 1000018

        Revision: 1


    Snort Output:"""

In [7]:
chat = [
    {"role": "system", "content": rule_generation_prompt},
    {"role": "user", "content": user_input}
]

In [10]:
inputs = tokenizer.apply_chat_template(chat, return_tensors="pt").to("cuda")

output = model.generate(input_ids=inputs, max_new_tokens=200)
output = output[0].to("cpu")

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In [11]:
print(tokenizer.decode(output))

<s> Source: system

 You are a cybersecurity expert tasked with performing Snort rule generation for a given Cyber Threat Intelligence (CTI).
    There is a sample task input and output provided below.
    
    Sample CTI Input and corresponding Snort Output:

    CTI Input:
        
      Title: Detection of QAZ Worm Client Login Activity over TCP Port 7597

      Threat Category: Malware – Backdoor

      Threat Name: QAZ Worm

      Detection Summary:

      This signature is designed to detect network traffic associated with the QAZ Worm, specifically its client login activity. The worm exhibits characteristic behavior by initiating a connection and transmitting a unique identifier string (qazwsx.hsq) to a remote server over TCP port 7597. This communication typically indicates the presence of a backdoor that allows unauthorized access to infected systems.

      Rule Metadata
      Classification: Misc Activity

      Ruleset: Community

      Rule Logic Breakdown
      Alert Type