In [1]:
%load_ext autoreload
%autoreload 2

In [47]:
from snyk_ai import Models

# LLM used for summarizing code blocks
MODEL = Models.Llama_3_2

### Security Advisory Collection

In [48]:
from snyk_ai.advisories import Advisories


# Load all advisories
ADVISORIES = Advisories("../data/advisories")

print(f"Loaded {len(ADVISORIES)} advisories:\n")
for adv in ADVISORIES:
    print(f" * {adv.filename}")
    print(f"   {adv.title}")
    print(f"   {len(adv.blocks)} blocks, {len(adv.sections)} sections")
    print()

Loaded 8 advisories:

 * advisory-001.md
   Cross-Site Scripting (XSS) in express-validator
   43 blocks, 13 sections

 * advisory-002.md
   SQL Injection in webapp-auth
   56 blocks, 15 sections

 * advisory-003.md
   Dependency Confusion in secure-config
   73 blocks, 15 sections

 * advisory-004.md
   Path Traversal in data-processor
   61 blocks, 15 sections

 * advisory-005.md
   Remote Code Execution in file-handler
   65 blocks, 15 sections

 * advisory-006.md
   Cross-Site Request Forgery (CSRF) in api-client
   57 blocks, 15 sections

 * advisory-007.md
   Server-Side Request Forgery (SSRF) in http-server
   67 blocks, 15 sections

 * advisory-008.md
   Insecure Deserialization in json-parser
   61 blocks, 15 sections



### Anatomy of a Security Advisory document

In [10]:
adv = ADVISORIES["advisory-003.md"]

print(f"{adv.title}")
print()
print(f"{adv.executive_summary}")

Dependency Confusion in secure-config

A dependency confusion vulnerability has been discovered in the `secure-config` package affecting versions 3.0.0 through 3.1.9. This vulnerability allows attackers to potentially inject malicious packages into the dependency resolution process by exploiting missing package integrity checks and scoped package naming conflicts.


#### Markdown Blocks

In [27]:
for i, block in enumerate(adv.blocks):
    prefix = f"  {i:2}: {block.type.value:12}"
    lines = block.lines if block.lines else [block.content]
    for line in lines:
        content_preview = line[:50].replace("\n", " ")
        if len(line) > 50:
            content_preview += "..."
        print(f"{prefix} | {content_preview}")
        prefix = " " * 18
    print(f"{prefix.replace(' ', '-')}-|-{'-'*53}")

   0: header       | Security Advisory: Dependency Confusion in secure-...
-------------------|------------------------------------------------------
   1: paragraph    | **CVE ID:** CVE-2024-1237  
                   | **Package:** secure-config  
                   | **Ecosystem:** npm  
                   | **Severity:** Medium  
                   | **CVSS Score:** 6.5  
                   | **Published:** February 10, 2024
-------------------|------------------------------------------------------
   2: header       | Executive Summary
-------------------|------------------------------------------------------
   3: paragraph    | A dependency confusion vulnerability has been disc...
-------------------|------------------------------------------------------
   4: header       | Vulnerability Details
-------------------|------------------------------------------------------
   5: header       | Description
-------------------|------------------------------------------------------
   

#### Sections

Basically, a section is all blocks between two headers.

In [46]:
def size_line(line: str, max_len: int = 80) -> str:
    if (_len := len(line)) <= max_len:
        return line + " " * (max_len - _len)
    else:
        return line[:max_len-3] + "..."

# Pre-compute chunks for each section (requires MODEL for code summarization)
print("Computing chunks for all sections...")
section_chunks: dict[int, list] = {}
for section_idx, section in enumerate(adv.sections):
    section_chunks[section_idx] = section.get_chunks(MODEL)
print(f"Done. Total chunks: {sum(len(c) for c in section_chunks.values())}")
print()

# Build header text → section index mapping
header_to_section = {}
for section_idx, section in enumerate(adv.sections):
    if section.header:
        header_to_section[section.header.content] = section_idx

# Read raw markdown
with open(adv.path) as f:
    lines = f.readlines()

def print_chunks(section_idx: int) -> None:
    """Print chunks for a section in a box."""
    chunks = section_chunks.get(section_idx, [])
    if not chunks:
        return
    print()
    num_chunks = len(chunks)
    print(f"  ╭── CHUNKS ({num_chunks}) {'─' * (50 - (num_chunks >= 10))}╮")
    for i, chunk in enumerate(chunks):
        text = chunk.text.replace('\n', ' ').strip()
        text = size_line(text, 58)
        print(f"  │ {i+1:2}. {text} |")
    print(f"  ╰{'─' * 64}╯")
    print()

# Track state
current_section = None

for line in lines:
    raw = line.rstrip('\n')

    # Detect headers
    if raw.startswith('#'):
        header_text = raw.lstrip('#').strip()

        if header_text == "References":
            # Print chunks for previous section
            if current_section is not None:
                print_chunks(current_section)
            print(f"┌{'─'*68}┐")
            print(f"│ SKIPPED: References (not in sections) {' '*28} │")
            print(f"└{'─'*68}┘")
            current_section = None

        elif header_text in header_to_section:
            section_idx = header_to_section[header_text]
            if section_idx != current_section:
                # Print chunks for previous section
                if current_section is not None:
                    print_chunks(current_section)
                print(f"┌{'─'*68}┐")
                print(f"│ SECTION {section_idx:2}: {size_line(header_text, 54):54} │")
                print(f"└{'─'*68}┘")
                current_section = section_idx

    # Print raw line
    print(f"{size_line(raw)}")

# Print chunks for the last section
if current_section is not None:
    print_chunks(current_section)

Computing chunks for all sections...
Done. Total chunks: 71

┌────────────────────────────────────────────────────────────────────┐
│ SECTION  0: Security Advisory: Dependency Confusion in secure-c... │
└────────────────────────────────────────────────────────────────────┘
# Security Advisory: Dependency Confusion in secure-config                      
                                                                                
**CVE ID:** CVE-2024-1237                                                       
**Package:** secure-config                                                      
**Ecosystem:** npm                                                              
**Severity:** Medium                                                            
**CVSS Score:** 6.5                                                             
**Published:** February 10, 2024                                                
                                                                              