In [9]:
%load_ext autoreload
%autoreload 2

from snyk_ai import Models, create_model
MODEL = Models.Llama_3_2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Advisories

> **Unstructured Data**: 8 security advisory markdown documents
   - Located in the `advisories/` directory
   - Each contains detailed vulnerability descriptions, code examples, attack vectors, remediation steps, and CVSS breakdown tables
   - Covers vulnerabilities like XSS, SQL Injection, RCE, CSRF, SSRF, Path Traversal, etc.


In [13]:
from snyk_ai.utils.markdown import parse_markdown_document, BlockType
from pathlib import Path

advisories_dir = Path("../data/advisories").resolve()
advisory_files = sorted(advisories_dir.glob("*.md"))


print(f"Found {len(advisory_files)} advisory files:")

parsed_mds = {}

for advisory_path in advisory_files:
    blocks = parse_markdown_document(advisory_path)
    
    print(f"{advisory_path.name}: {len(blocks)} blocks")

    assert blocks[0].type is BlockType.HEADER
    assert blocks[0].content.startswith("Security Advisory: ")
    # print(f"\n  {blocks[0].content.removeprefix('Security Advisory: ')}")

    assert blocks[2].type is BlockType.HEADER
    assert blocks[2].content == "Executive Summary"
    assert blocks[3].type is BlockType.PARAGRAPH
    # print(f"\n  {blocks[3].content}\n")

    parsed_mds[advisory_path.name] = blocks

# clean up
del advisories_dir


Found 8 advisory files:
advisory-001.md: 44 blocks
advisory-002.md: 59 blocks
advisory-003.md: 79 blocks
advisory-004.md: 62 blocks
advisory-005.md: 66 blocks
advisory-006.md: 58 blocks
advisory-007.md: 68 blocks
advisory-008.md: 62 blocks


In [None]:
from snyk_ai.utils import summarize_document

# generate summaries for all advisories
summaries = {}
for advisory_path in advisory_files:
    print(f"Summarizing {advisory_path.name}...")
    summary_path = summarize_document(advisory_path, MODEL)
    summaries[advisory_path.name] = summary_path.read_text()
    print(f"  -> Saved to {summary_path.name}")

print(f"\nCompleted {len(summaries)} summaries.")

Summarizing advisory-001.md...
  -> Saved to advisory-001-summary-ollama_llama3.2-20260123052642.txt
Summarizing advisory-002.md...
  -> Saved to advisory-002-summary-ollama_llama3.2-20260123052644.txt
Summarizing advisory-003.md...
  -> Saved to advisory-003-summary-ollama_llama3.2-20260123052646.txt
Summarizing advisory-004.md...
  -> Saved to advisory-004-summary-ollama_llama3.2-20260123052649.txt
Summarizing advisory-005.md...
  -> Saved to advisory-005-summary-ollama_llama3.2-20260123052651.txt
Summarizing advisory-006.md...
  -> Saved to advisory-006-summary-ollama_llama3.2-20260123052653.txt
Summarizing advisory-007.md...
  -> Saved to advisory-007-summary-ollama_llama3.2-20260123052656.txt
Summarizing advisory-008.md...
  -> Saved to advisory-008-summary-ollama_llama3.2-20260123052658.txt

Completed 8 summaries.


In [None]:
from snyk_ai.utils import summarize_code_snippet

models = [Models.Llama_3_2, Models.Claude_Opus, Models.GPT_5_2]
for filename, md_blocks in list(parsed_mds.items()):
    code_blocks = [(b.content, b.language) for b in md_blocks if b.type is BlockType.CODE_BLOCK]
    print(f"#### {filename} - {len(code_blocks)} code blocks\n")
    
    for i, (code, lang) in enumerate(code_blocks):
        # header
        print(f"##### {i + 1} - {lang if lang else 'language not specified'}\n")
        # code block
        print(f"```{lang if lang else ''}\n{code}\n```\n")
        # summaries
        for MODEL in models:
            summary = summarize_code_snippet(code, MODEL)
            print(f"- {MODEL.name}\n  > {summary}\n")


#### advisory-001.md - 5 code blocks

##### 1 - javascript

```javascript
const { body, validationResult } = require('express-validator');
const express = require('express');
const app = express();

app.post('/register', 
  body('email').isEmail(),
  body('username').isLength({ min: 3 }),
  (req, res) => {
    const errors = validationResult(req);
    if (!errors.isEmpty()) {
      // VULNERABLE: User input directly inserted into HTML
      return res.status(400).send(`
        <h1>Validation Error</h1>
        <p>${errors.array()[0].msg}</p>
        <p>Input: ${req.body.username}</p>
      `);
    }
    // ... registration logic
  }
);
```

- ollama:llama3.2
  > This Express.js code validates user input for a post registration request, returning a 400 error response to the user with validation errors if they exist.

- ollama:llama3.2
  > This Express.js code validates user input for a `/register` POST endpoint using middleware functions, but has a vulnerability by directly inserting t