Reads a PDF file and returns its extracted text.

In [None]:
import os
import glob
import openai
from PyPDF2 import PdfReader  

def read_pdf(pdf_path):
    """
    Reads a PDF file and returns its extracted text.
    
    Args:
        pdf_path (str): Path to the PDF file.
        
    Returns:
        str: Combined text from all pages in the PDF.
    """
    text = ""
    try:
        with open(pdf_path, "rb") as file:
            reader = PdfReader(file)
            for page in reader.pages:
                page_text = page.extract_text()
                if page_text:
                    text += page_text + "\n"
    except Exception as e:
        print(f"Error reading {pdf_path}: {e}")
    return text


Rule Generation

In [None]:
def generate_rule_from_description(description, rule_type, api_key, model="gpt-3.5-turbo", max_prompt_chars=2000):
    """
    Uses the OpenAI ChatGPT API to generate a rule (YARA or Snort) based on the provided description.
    
    Args:
        description (str): Text extracted from the PDF.
        rule_type (str): The type of rule to generate ("YARA" or "Snort").
        api_key (str): Your OpenAI API key.
        model (str): OpenAI model to use (default: gpt-3.5-turbo).
        max_prompt_chars (int): Maximum number of characters from the description to include.
    
    Returns:
        str: The generated rule text.
    """
    openai.api_key = api_key
    
    # Limit the description length if too long
    if len(description) > max_prompt_chars:
        description = description[:max_prompt_chars] + "\n\n[Description truncated due to length]"
    
    rule_type_lower = rule_type.strip().lower()
    
    if rule_type_lower == "yara":
        prompt = f"""
I have a description for a YARA rule below:
--------------------------------------------------
{description}
--------------------------------------------------
Please generate a complete and valid YARA rule based on this description. The rule should include a proper rule header, string definitions, conditions, and comments explaining each part of the rule.
        """
    elif rule_type_lower == "snort":
        prompt = f"""
I have a description for a Snort rule below:
--------------------------------------------------
{description}
--------------------------------------------------
Please generate a complete and valid Snort rule based on this description. The rule should include all necessary fields (such as the rule header, options, protocol details) along with comments explaining the detection logic.
        """
    else:
        raise ValueError("Unsupported rule type. Please choose either 'YARA' or 'Snort'.")
    
    messages = [
        {"role": "system", "content": "You are a cybersecurity expert with expertise in creating both YARA and Snort rules."},
        {"role": "user", "content": prompt}
    ]
    
    print("Sending API request to OpenAI...")
    try:
        response = openai.ChatCompletion.create(
            model=model,
            messages=messages,
            temperature=0.2,  
            max_tokens=800    
        )
        print("API response received.")
        return response.choices[0].message["content"]
    except Exception as e:
        print(f"An error occurred during the API call: {e}")
        return None


Set up directories for PDF description files and output files.

In [None]:
input_dir = r"Your input directory path"
output_dir = r"Your output directory path"

os.makedirs(input_dir, exist_ok=True)
os.makedirs(output_dir, exist_ok=True)

print(f"Place your PDF description files in the '{input_dir}' directory.")
print(f"Generated rules will be saved in the '{output_dir}' directory.")


Place your PDF description files in the 'C:/Users/azimb/Downloads/snortpdf' directory.
Generated rules will be saved in the 'C:/Users/azimb/Downloads/snort3' directory.


In [None]:
# Hardcode your OpenAI API key here
api_key = "OPenAI_API_KEY"  

# Set the desired rule type (either "YARA" or "Snort")
default_rule_type = "Snort"
rule_type = default_rule_type  # Change to "SNORT" if needed

print(f"Using rule type: {rule_type}")


Using rule type: Snort


In [None]:
# Process each PDF file and generate the corresponding rule.
pdf_files = glob.glob(os.path.join(input_dir, "*.pdf"))

if not pdf_files:
    print(f"No PDF files found in {input_dir}. Please add PDF description files to proceed.")
else:
    for pdf_file in pdf_files:
        print(f"\nProcessing file: {pdf_file}")
        description = read_pdf(pdf_file)
        if not description.strip():
            print(f"Warning: No text extracted from {pdf_file}. Skipping file.")
            continue
        
        rule_output = generate_rule_from_description(description, rule_type, api_key)
        if rule_output is None:
            print(f"Failed to generate rule for {pdf_file}.")
            continue
        
        base_name = os.path.splitext(os.path.basename(pdf_file))[0]
        output_file = os.path.join(output_dir, f"{base_name}_{rule_type}_rule.txt")
        try:
            with open(output_file, "w") as f:
                f.write(rule_output)
            print(f"{rule_type} rule generated and saved to: {output_file}")
        except Exception as e:
            print(f"Error writing output to {output_file}: {e}")
