diff --git a/docs/design-guide/device-types.md b/docs/design-guide/device-types.md new file mode 100644 index 000000000..742536ba4 --- /dev/null +++ b/docs/design-guide/device-types.md @@ -0,0 +1,280 @@ +# Device Types + +Device types provide a structured way to define and categorize hardware models +supported by UnderStack. They serve as a declarative specification of hardware +characteristics, enabling consistent hardware identification, resource class +mapping, and infrastructure automation across the platform. + +## Purpose and Architecture + +Device type definitions solve several critical challenges in bare metal +infrastructure management: + +* **Hardware Identification**: Precise specification of manufacturer, model, + and physical attributes enables automated detection and categorization +* **Resource Classification**: Multiple resource class configurations per + device type allow flexible mapping of the same hardware model to different + Nova flavors and workload profiles +* **Infrastructure as Code**: Hardware specifications live in Git alongside + deployment configurations, providing versioning, review, and audit capabilities +* **Cross-Platform Integration**: Device types integrate with Nautobot, + Ironic, and Nova to provide consistent hardware metadata throughout the stack + +## Schema Structure + +Device type definitions follow the [device-type.schema.json](https://github.com/rackerlabs/understack/blob/main/schema/device-type.schema.json) +JSON Schema, which enforces validation and consistency across all definitions. + +### Common Properties + +All device types must specify: + +* **class**: Device category - `server`, `switch`, or `firewall` +* **manufacturer**: Hardware vendor (e.g., "Dell", "HPE") +* **model**: Specific model identifier (e.g., "PowerEdge R7615") +* **u_height**: Rack unit height (must be greater than 0) +* **is_full_depth**: Boolean indicating full-depth rack mounting + +### Optional Properties + +Device types may include: + +* **interfaces**: Named physical network interfaces on the hardware. Used to + define specific ports such as management interfaces (BMC/iDRAC/iLO) or + named switch ports. Each interface has: + * `name`: Interface identifier (e.g., "iDRAC", "eth0", "mgmt") + * `type`: Physical interface type (e.g., "1000base-t", "10gbase-x-sfp+") + * `mgmt_only`: Boolean flag indicating management-only interfaces +* **power-ports**: Power inlet specifications for the device. Each power port has: + * `name`: Power port identifier (e.g., "psu1", "psu2") + * `type`: Power port connector type (e.g., "iec-60320-c14", "iec-60320-c20") - see [Nautobot PowerPortTypeChoices](https://github.com/nautobot/nautobot/blob/develop/nautobot/dcim/choices.py#L507) for valid values + * `maximum_draw`: Maximum power draw in watts (optional) +* **resource_class**: Array of resource class configurations (required for + `class: server`) + +### Resource Classes + +For server-class devices, resource classes define the specific hardware +configurations that map to OpenStack Nova flavors. Multiple resource classes +can be defined for the same hardware model to represent common build +configurations in the data center (e.g., different CPU, RAM, or drive +populations of the same chassis). + +During server enrollment, the hardware inspection data is matched against +these resource class definitions. The matching resource class name is set on +the Ironic node's `resource_class` property, which is then used to create +corresponding Nova flavors for workload scheduling. + +Each resource class requires: + +* **name**: Resource class identifier (e.g., "m1.small", "compute-optimized"). + This value will be set on the Ironic node and used for Nova flavor creation. +* **cpu**: Object with `cores` (number) and `model` (string) +* **memory**: Object with `size` in GB +* **drives**: Array of drive objects, each with `size` in GB +* **nic_count**: Minimum number of user-usable network interfaces (integer). + This represents general-purpose network ports available for workload traffic, + not tied to specific named interfaces. Used to verify the server has + sufficient network connectivity for the workload profile. + +## Example Definition + +```yaml +# yaml-language-server: $schema=https://rackerlabs.github.io/understack/device-type.schema.json +class: server +manufacturer: Dell +model: PowerEdge R7615 +u_height: 2 +is_full_depth: true + +# Named physical interfaces (management, specific ports) +interfaces: + - name: iDRAC + type: 1000base-t + mgmt_only: true + +# Power inlet specifications +power-ports: + - name: psu1 + type: iec-60320-c14 + maximum_draw: 750 + - name: psu2 + type: iec-60320-c14 + maximum_draw: 750 + +resource_class: + - name: m1.small + cpu: + cores: 16 + model: AMD EPYC 9124 + memory: + size: 128 + drives: + - size: 480 + - size: 480 + # User-usable network interfaces (not tied to specific named ports) + nic_count: 2 +``` + +## Integration Points + +### GitOps Deployment + +Device type definitions live in the deployment repository under +`hardware/device-types/`. They are packaged as Kubernetes ConfigMaps via +Kustomize, making them available to platform components. + +### Resource Class Matching and Nova Flavors + +During bare metal enrollment: + +1. Hardware is inspected via Ironic to collect CPU, memory, drive, and network + interface data +2. The `understack-flavor-matcher` service compares inspection data against + device type resource class definitions +3. When a match is found, the resource class name is set on the Ironic node's + `resource_class` property +4. Nova flavors are created or updated based on the resource class, making the + hardware available for workload scheduling + +**Multiple Resource Classes**: Define multiple resource classes for the same +device type when you have common build variations of the same chassis. For +example, a Dell PowerEdge R7615 might be populated with different CPU models, +RAM capacities, or drive configurations depending on the intended workload +(compute, storage, memory-intensive, etc.). + +### Nautobot Synchronization + +Device types provide the source of truth for hardware specifications that are +synchronized to Nautobot's device type models, ensuring consistency between +the deployment repository and the infrastructure CMDB. + +### Ironic Integration + +During bare metal enrollment and inspection, Ironic driver metadata is +validated against device type definitions to confirm hardware matches +expected specifications. + +## File Organization + +Device type definitions are organized in the deployment repository: + +```text +hardware/ +├── base/ +│ └── kustomization.yaml # ConfigMap generation +└── device-types/ + ├── dell-poweredge-r7615.yaml + ├── hpe-proliant-dl360.yaml + └── ... +``` + +The `base/kustomization.yaml` generates a ConfigMap containing all device +type definitions: + +```yaml +configMapGenerator: + - name: device-types + options: + disableNameSuffixHash: true + files: + - dell-poweredge-r7615.yaml=../device-types/dell-poweredge-r7615.yaml +``` + +## Schema Validation + +Device type files include a YAML language server directive for editor-based +validation: + +```yaml +# yaml-language-server: $schema=https://rackerlabs.github.io/understack/device-type.schema.json +``` + +The schema enforces: + +* Required field presence +* Type correctness (strings, numbers, booleans, arrays, objects) +* Enum constraints (e.g., `class` must be server/switch/firewall) +* Conditional requirements (servers must have resource classes) +* Numeric constraints (e.g., `u_height > 0`) + +## Resource Class Assignment + +When a device-type defines multiple resource classes for the same hardware model, the Ironic inspection process determines which resource class to assign to each discovered node through exact hardware matching. + +### Inspection Hook Matching Logic + +The `resource-class` inspection hook in `python/ironic-understack` performs the following steps: + +1. **Hardware Discovery**: Ironic inspection discovers hardware specifications: + * CPU cores and model + * Memory size (in MB) + * Drive sizes and count + * System manufacturer and model + +2. **Device-Type Matching**: Hook reads device-types ConfigMap and matches: + * Manufacturer name (e.g., "Dell", "HPE") + * Model name (e.g., "PowerEdge R7615") + +3. **Resource Class Matching**: Within matched device-type, hook compares discovered specs against each resource class: + * CPU details must match `cpu.cores` and `cpu.model` exactly + * Memory size must match `memory.size` (converted to MB) + * Drive count and sizes must match `drives` array + * Network interface count must match `nic_count` + +4. **Assignment**: Hook sets `resource_class` property on Ironic node to the matching resource class name + +### Example Matching Scenario + +Device-type definition with multiple resource classes: + +```yaml +manufacturer: Dell +model: PowerEdge R7615 +resource_class: + - name: m1.small + cpu: {cores: 16, model: AMD EPYC 9124} + memory: {size: 128} + drives: [{size: 480}, {size: 480}] + - name: m1.medium + cpu: {cores: 32, model: AMD EPYC 9334} + memory: {size: 256} + drives: [{size: 960}, {size: 960}] +``` + +Inspection discovers Dell PowerEdge R7615 with 32 cores, 256 GB RAM, two 960 GB drives: + +* Matches device-type: Dell PowerEdge R7615 +* Matches resource class: m1.medium (exact CPU/memory/drives match) +* Sets `node.resource_class = "m1.medium"` + +### Matching Requirements + +* **Exact matching**: All specs (CPU cores, memory size, drive sizes) must match exactly +* **No partial matches**: If any spec differs, resource class is not matched +* **No match fallback**: If no resource class matches discovered specs, inspection fails with error +* **Drive order matters**: Drive sizes are matched in array order + +This ensures predictable resource class assignment and prevents misconfiguration. + +## Management Workflow + +Device types are managed through the `understackctl` CLI tool: + +**Adding new device types:** + +1. Create new device type definitions as YAML files +2. Validate and add with `understackctl device-type add ` (automatically updates Kustomization) +3. Commit to Git and submit pull request +4. ArgoCD detects changes and updates ConfigMap +5. Ironic inspection hook reads updated ConfigMap and uses new definitions for matching + +**Updating existing device types:** + +1. Edit the device type file in `$UC_DEPLOY/hardware/device-types/` +2. Validate with `understackctl device-type validate ` +3. Commit to Git and submit pull request +4. ArgoCD detects changes and updates ConfigMap + +See the [operator guide](../operator-guide/device-types.md) for detailed +command usage and examples. diff --git a/docs/design-guide/flavors.md b/docs/design-guide/flavors.md new file mode 100644 index 000000000..408f242f6 --- /dev/null +++ b/docs/design-guide/flavors.md @@ -0,0 +1,407 @@ +# Hardware Flavor Definitions + +## Purpose + +Hardware flavor definitions are YAML specifications that define how to create Nova flavors for bare metal nodes. Each hardware flavor definition results in a corresponding Nova flavor that users see when running `openstack flavor list`. These definitions specify matching criteria based on resource classes and traits, determining which Ironic bare metal nodes are eligible for each Nova flavor. + +The Nova flavor properties (vCPUs, RAM, disk) are automatically derived from the matched device-type resource class, and scheduling is performed using resource class and trait matching. + +Flavors enable operators to: + +* Create generic hardware categories (e.g., `m1.small`) that match any server in a resource class +* Define specialized flavors with trait requirements (e.g., `m1.small.nicX`) for specific hardware features +* Exclude specific hardware from flavor matching using trait absence requirements +* Provide users with both flexible and specialized compute options from the same hardware pool + +## Architecture + +### Workflow + +1. **Inspection Phase**: Custom inspection code adds traits to Ironic nodes based on discovered hardware capabilities (NICs, GPUs, storage controllers, etc.) +2. **Device-Type Matching**: Hardware inspection data is matched against device-type definitions, setting the node's `resource_class` property +3. **Flavor Matching**: Flavor definitions match nodes by `resource_class` first, then filter by trait requirements +4. **Nova Flavor Creation**: Matched flavors use the CPU, memory, and drive specifications from the device-type resource class to create Nova flavors + +### Data Flow + +```mermaid +graph TD + A[Hardware Inspection] -->|Discovers hardware| B[Ironic Node] + A -->|Sets traits| B + A -->|Sets resource_class| B + + C[Device-Type Definition] -->|Defines specs for resource_class| D[CPU, RAM, Drive specs] + + E[Flavor Definition] -->|Matches nodes by| F[resource_class + traits] + D -->|Provides properties| G[Nova Flavor Created] + F --> G +``` + +## Schema Structure + +Flavor definitions are YAML files validated against `schema/flavor.schema.json`. + +### Required Fields + +* **name**: Unique identifier for the flavor (e.g., `m1.small`, `m1.small.nicX`) +* **resource_class**: Ironic resource class to match (must correspond to a device-type resource class) + +### Optional Fields + +* **traits**: Array of trait requirements for hardware matching + * **trait**: Trait name without `CUSTOM_` prefix (e.g., `NICX`, `GPU`, `NVME`) + * Pattern: `^[A-Z][A-Z0-9_]*$` (uppercase alphanumeric and underscores) + * `CUSTOM_` prefix is automatically added when interacting with Ironic + * **state**: Either `required` (node must have trait) or `absent` (node must not have trait) + +### Example: Generic Flavor + +```yaml +--- +# yaml-language-server: $schema=https://rackerlabs.github.io/understack/flavor.schema.json +name: m1.small +resource_class: m1.small +``` + +This matches all Ironic nodes with `resource_class=m1.small`, regardless of traits. Nova flavor properties (vCPUs, RAM, disk) come from the device-type's `m1.small` resource class definition. + +### Example: Trait-Specific Flavor + +```yaml +--- +# yaml-language-server: $schema=https://rackerlabs.github.io/understack/flavor.schema.json +name: m1.small.nicX +resource_class: m1.small +traits: + - trait: NICX + state: required +``` + +This matches only nodes with `resource_class=m1.small` AND the `CUSTOM_NICX` trait. Nova flavor properties still come from the device-type's `m1.small` resource class. + +### Example: Exclusion Flavor + +```yaml +--- +# yaml-language-server: $schema=https://rackerlabs.github.io/understack/flavor.schema.json +name: m1.small.no-gpu +resource_class: m1.small +traits: + - trait: GPU + state: absent +``` + +This matches nodes with `resource_class=m1.small` that do NOT have the `CUSTOM_GPU` trait. + +## Integration Points + +### GitOps Deployment + +Flavor definitions are stored in the deployment repository: + +```text +$UC_DEPLOY/ +└── hardware/ + ├── flavors/ + │ ├── m1.small.yaml + │ ├── m1.small.nicX.yaml + │ └── m1.medium.yaml + └── base/ + └── kustomization.yaml +``` + +The `kustomization.yaml` generates a ConfigMap containing all flavor definitions: + +```yaml +configMapGenerator: + - name: flavors + options: + disableNameSuffixHash: true + files: + - m1.small.yaml=../flavors/m1.small.yaml + - m1.small.nicX.yaml=../flavors/m1.small.nicX.yaml +``` + +ArgoCD detects changes and updates the ConfigMap in the cluster. + +### Trait System + +**Trait Discovery**: Custom inspection code examines hardware and adds traits to Ironic nodes: + +* NIC models/capabilities (e.g., `CUSTOM_NICX`, `CUSTOM_MELLANOX`) +* GPU presence/models (e.g., `CUSTOM_GPU`, `CUSTOM_NVIDIA_A100`) +* Storage controllers (e.g., `CUSTOM_NVME`, `CUSTOM_RAID`) +* CPU features (e.g., `CUSTOM_AVX512`, `CUSTOM_SGX`) + +**Trait Naming Convention**: + +* Users write trait names without the `CUSTOM_` prefix in flavor definitions +* The system automatically adds `CUSTOM_` when interacting with Ironic APIs +* Trait names must be uppercase with alphanumeric characters and underscores + +### Flavor Matcher + +The flavor-matcher service (or workflow component) consumes both flavor and device-type definitions: + +1. Queries Ironic for nodes with matching `resource_class` +2. Filters nodes based on trait requirements: + * `required`: Node must have the trait + * `absent`: Node must NOT have the trait +3. Looks up the device-type resource class to get CPU, memory, and drive specifications +4. Creates or updates Nova flavors with properties from the device-type resource class + +### Nova Flavor Property Derivation + +Nova flavor properties (vcpus, ram, disk) are derived from the device-type resource class for convenience. For bare metal flavors, these properties are informational only - the actual scheduling is done through the `extra_specs` properties where resource consumption is set to 0. See [OpenStack Ironic flavor configuration documentation](https://docs.openstack.org/ironic/latest/install/configure-nova-flavors.html) for details on how Nova flavors work with Ironic. + +Derived properties: + +* **vcpus**: CPU cores from resource class `cpu.cores` +* **ram**: Memory size from resource class `memory.size` (converted to MB) +* **disk**: Primary drive size from resource class `drives[0].size` (or 0 for diskless) + +The extra_specs properties are set for scheduling: + +* **resources:VCPU='0'**: Bare metal doesn't consume virtual CPU resources +* **resources:MEMORY_MB='0'**: Bare metal doesn't consume virtual memory resources +* **resources:DISK_GB='0'**: Bare metal doesn't consume virtual disk resources +* **resources:CUSTOM_BAREMETAL_{RESOURCE_CLASS}='1'**: Requires one bare metal node of the specified resource class + +Example device-type resource class: + +```yaml +resource_class: + - name: m1.small + cpu: + cores: 16 + model: AMD EPYC 9124 + memory: + size: 128 + drives: + - size: 480 + - size: 480 + nic_count: 2 +``` + +This produces a Nova flavor with properties: + +* vcpus: 16 +* ram: 131072 MB (128 GB * 1024) +* disk: 480 GB + +And extra_specs for scheduling: + +* resources:CUSTOM_BAREMETAL_M1_SMALL='1' +* resources:VCPU='0' +* resources:MEMORY_MB='0' +* resources:DISK_GB='0' + +## Use Cases + +### Generic Hardware Pools + +Create flavors that match any hardware in a resource class: + +```yaml +--- +name: compute.standard +resource_class: m1.medium +``` + +Users get any available `m1.medium` server, providing flexibility and maximizing hardware utilization. + +### Specialized Workloads + +Create flavors for specific hardware capabilities: + +```yaml +--- +name: compute.gpu +resource_class: m1.large +traits: + - trait: GPU + state: required +``` + +Guarantees instances get hardware with GPU capabilities while using the `m1.large` resource class specifications for vCPUs/RAM/disk. + +### Hardware Requirement with Exclusion + +Require specific hardware while excluding others: + +```yaml +--- +name: m1.small.mellanox-cx5 +resource_class: m1.small +traits: + - trait: NIC_MELLANOX_CX5 + state: required +``` + +Guarantees instances get nodes with Mellanox ConnectX-5 network cards. + +### Multiple Flavors per Resource Class + +Define multiple flavors for the same resource class with different trait requirements: + +```yaml +--- +# Generic flavor +name: m1.small +resource_class: m1.small +--- +# Specialized variant +name: m1.small.nvme +resource_class: m1.small +traits: + - trait: NVME + state: required +``` + +Users can choose between generic availability or guaranteed NVMe storage. Both flavors have identical Nova properties (derived from device-type `m1.small`), but different hardware selection criteria. + +## Best Practices + +### Naming Conventions + +* **Base flavor**: Use simple names matching resource class (e.g., `m1.small`) +* **Specialized flavors**: Append trait indicators (e.g., `m1.small.mellanox-cx5`, `m1.large.gpu`) +* **Exclusion flavors**: Use descriptive suffixes (e.g., `m1.small.no-gpu`) + +### Trait Design + +* Define traits at appropriate granularity - both specific (model-level) and general (category-level) traits are useful +* Specific traits enable precise hardware selection and maximize flexibility for users with specialized needs +* General traits provide broader hardware pools for users with less specific requirements +* Document trait meanings and discovery logic in a central registry (see [hardware-traits.md](hardware-traits.md) for standard traits) +* Use consistent trait naming across the organization: + * Reference the standard traits documented in hardware-traits.md + * For custom traits, establish naming conventions in your deployment repository's trait registry + * Follow the pattern: `CATEGORY_VENDOR_MODEL` (e.g., `NIC_MELLANOX_CX5`, `GPU_NVIDIA_A100`) + * Coordinate with other teams when defining new traits to avoid duplicates or conflicts + +### Resource Class Alignment + +* Ensure `resource_class` references exist in device-type definitions +* Nova properties are automatically derived from device-type resource class specifications +* Multiple flavors can reference the same resource class with different trait filters +* Resource class defines "what resources the hardware has"; flavor defines "which hardware qualifies" + +### Version Control + +* Treat flavor definitions as infrastructure-as-code +* Use descriptive commit messages when adding/modifying flavors +* Test flavor matching in non-production before promoting changes +* Document rationale for trait requirements in commit messages + +## Management Workflow + +Flavor definitions are managed using the `understackctl flavor` CLI: + +```bash +# Validate a flavor definition +understackctl flavor validate /tmp/m1.small.yaml + +# Add to deployment repository +understackctl flavor add /tmp/m1.small.yaml + +# List all flavors +understackctl flavor list + +# Show flavor details +understackctl flavor show m1.small + +# Delete a flavor +understackctl flavor delete m1.small +``` + +See the [operator guide](../operator-guide/flavors.md) for detailed usage instructions. + +## Validation + +Flavor definitions undergo JSON schema validation checking: + +* Required field presence (name, resource_class) +* Type correctness (strings for name/resource_class/traits) +* Trait name patterns (uppercase alphanumeric with underscores) +* Trait state enum values (`required` or `absent`) + +Validation happens at: + +* **Editor time**: YAML language server validates against schema URL +* **CLI time**: `understackctl flavor add` and `validate` commands perform full schema validation +* **Runtime**: Flavor-matcher validates ConfigMap contents before processing + +## Relationship to Device-Types + +Flavors and device-types have a tightly coupled relationship: + +* **Device-types** define physical hardware models and resource classes with CPU/memory/drive specifications +* **Flavors** reference these resource classes and add trait filtering for node selection +* **Device-types** describe "what the hardware is" and "what resources it has" +* **Flavors** describe "which hardware qualifies for this Nova flavor" + +### Resource Class Linkage + +Each flavor's `resource_class` must match a resource class defined in a device-type: + +**Device-Type Definition** (`dell-poweredge-r7615.yaml`): + +```yaml +--- +class: server +manufacturer: Dell +model: PowerEdge R7615 +resource_class: + - name: m1.small + cpu: + cores: 16 + model: AMD EPYC 9124 + memory: + size: 128 + drives: + - size: 480 + - size: 480 + nic_count: 2 +``` + +**Flavor Definition** (`m1.small.yaml`): + +```yaml +--- +name: m1.small +resource_class: m1.small # Links to device-type resource class +``` + +The flavor-matcher looks up `m1.small` in device-type definitions to find the CPU (16 cores), memory (128 GB), and drives (480 GB) when creating the Nova flavor. + +**Important**: Resource class names must be unique across all device types. Each resource class name should only be defined in one device type to avoid conflicts and ensure predictable Nova flavor creation. Validation checks enforce this constraint. + +### Multiple Flavors, Single Resource Class + +A single device-type resource class supports multiple flavors with different trait filters: + +```yaml +--- +# Generic: matches all m1.small nodes +name: m1.small +resource_class: m1.small +--- +# Specialized: only nodes with NICX trait +name: m1.small.nicX +resource_class: m1.small +traits: + - trait: NICX + state: required +--- +# Exclusion: nodes without GPU +name: m1.small.no-gpu +resource_class: m1.small +traits: + - trait: GPU + state: absent +``` + +All three Nova flavors have identical vCPUs/RAM/disk (from device-type `m1.small`), but select different subsets of hardware based on traits. + +This separation allows flexible hardware matching strategies without duplicating resource specifications across multiple files. diff --git a/docs/design-guide/hardware-traits.md b/docs/design-guide/hardware-traits.md new file mode 100644 index 000000000..5c0cbb7ab --- /dev/null +++ b/docs/design-guide/hardware-traits.md @@ -0,0 +1,284 @@ +# Hardware Traits + +## Purpose + +Hardware traits are characteristics discovered during inspection that describe capabilities or features of bare metal nodes. Traits enable flavor definitions to filter nodes based on specific hardware attributes beyond basic resource class matching. + +Traits follow a hybrid model: + +* **Standard traits**: Common, well-defined traits with consistent discovery logic +* **Custom traits**: Site-specific or vendor-specific traits following naming conventions + +## Trait Naming Convention + +All trait names in UnderStack follow these rules: + +* Uppercase alphanumeric characters with underscores only (pattern: `^[A-Z][A-Z0-9_]*$`) +* Written without `CUSTOM_` prefix in flavor definitions (prefix added automatically when interacting with Ironic) +* Descriptive and concise (e.g., `NVME`, not `HAS_NVME_STORAGE_CAPABILITY`) +* Category-based prefixes for organization (e.g., `NIC_`, `GPU_`, `CPU_`) + +**Good trait names:** + +* `NVME` - Node has NVMe storage +* `GPU_NVIDIA` - Node has NVIDIA GPU +* `NIC_MELLANOX` - Node has Mellanox network card +* `CPU_AVX512` - CPU supports AVX-512 instructions + +**Poor trait names:** + +* `nvme` - Not uppercase +* `has-nvme` - Contains hyphens +* `CUSTOM_NVME` - Includes CUSTOM_ prefix (added automatically) + +## Standard Traits + +Standard traits are commonly used across deployments and have well-defined discovery logic. + +### Storage Traits + +* **NVME**: Node has at least one NVMe storage device + * Discovery: Check for NVMe devices in `/sys/class/nvme/` or block device type +* **RAID**: Node has hardware RAID controller + * Discovery: Detect RAID controller via lspci or vendor-specific tools +* **SSD**: Node has at least one SSD (non-NVMe) + * Discovery: Check block device rotational flag + +### Network Traits + +* **NIC_MELLANOX_CX5**: Node has Mellanox ConnectX-5 network interface + * Discovery: Check NIC vendor ID (0x15b3) and device ID (0x1017 for ConnectX-5) +* **NIC_INTEL_X710**: Node has Intel X710 network interface + * Discovery: Check NIC vendor ID (0x8086) and device ID (0x1572 for X710-DA4) +* **NIC_BROADCOM_57414**: Node has Broadcom BCM57414 network interface + * Discovery: Check NIC vendor ID (0x14e4) and device ID (0x16d7 for BCM57414) +* **NIC_25G**: Node has 25 Gbps capable network interface + * Discovery: Check link speed capability +* **NIC_100G**: Node has 100 Gbps capable network interface + * Discovery: Check link speed capability + +### GPU Traits + +* **GPU**: Node has GPU device + * Discovery: Check for GPU PCI device class (0x0300) +* **GPU_NVIDIA**: Node has NVIDIA GPU + * Discovery: Check GPU vendor ID (0x10de) +* **GPU_AMD**: Node has AMD GPU + * Discovery: Check GPU vendor ID (0x1002) + +### CPU Traits + +* **CPU_AVX512**: CPU supports AVX-512 instruction set + * Discovery: Check CPU flags for avx512 +* **CPU_SGX**: CPU supports Intel SGX + * Discovery: Check CPU flags for sgx +* **CPU_AMD**: CPU is AMD processor + * Discovery: Check CPU vendor string +* **CPU_INTEL**: CPU is Intel processor + * Discovery: Check CPU vendor string + +### Firmware/BIOS Traits + +* **SECURE_BOOT**: Node has Secure Boot enabled + * Discovery: Check UEFI Secure Boot status +* **TPM**: Node has TPM (Trusted Platform Module) + * Discovery: Check for TPM device in `/sys/class/tpm/` + +## Custom Traits + +Custom traits allow site-specific or vendor-specific hardware categorization not covered by standard traits. + +### Use Cases for Custom Traits + +* **Hardware generations**: `GEN10`, `GEN11` for HP server generations +* **Vendor-specific features**: `IDRAC9`, `ILO5` for management interfaces + +### Custom Trait Guidelines + +* Use clear, descriptive names +* Document trait meaning in deployment repository +* Include discovery logic in inspection hook +* Avoid overlapping with standard traits +* Consider whether trait should become standard if widely used + +## Trait Discovery + +Traits are added to Ironic nodes during the inspection process via inspection hooks in `python/ironic-understack`. + +### Inspection Hook Pattern + +```python +from ironic.drivers.modules.inspector.hooks import base +from oslo_log import log as logging + +LOG = logging.getLogger(__name__) + +class TraitDiscoveryHook(base.InspectionHook): + """Hook to discover and add hardware traits to nodes.""" + + def __call__(self, task, inventory, plugin_data): + """Discover traits from hardware inventory.""" + traits = set() + + # Example: Detect NVMe storage + if self._has_nvme_storage(inventory): + traits.add('CUSTOM_NVME') + + # Example: Detect GPU + if self._has_gpu(inventory): + traits.add('CUSTOM_GPU') + gpu_vendor = self._get_gpu_vendor(inventory) + if gpu_vendor == 'nvidia': + traits.add('CUSTOM_GPU_NVIDIA') + + # Add discovered traits to node + if traits: + task.node.set_trait(list(traits)) + LOG.info("Added traits to node %s: %s", task.node.uuid, traits) + + def _has_nvme_storage(self, inventory): + """Check if node has NVMe storage.""" + for disk in inventory.get('disks', []): + if 'nvme' in disk.get('name', '').lower(): + return True + return False + + def _has_gpu(self, inventory): + """Check if node has GPU.""" + for pci in inventory.get('pci_devices', []): + # PCI class 0x0300 is VGA compatible controller + if pci.get('class_id') == '0x0300': + return True + return False + + def _get_gpu_vendor(self, inventory): + """Determine GPU vendor.""" + for pci in inventory.get('pci_devices', []): + if pci.get('class_id') == '0x0300': + vendor_id = pci.get('vendor_id') + if vendor_id == '0x10de': + return 'nvidia' + elif vendor_id == '0x1002': + return 'amd' + return None +``` + +### Trait Discovery Best Practices + +* **Idempotent**: Running inspection multiple times should produce same traits +* **Accurate**: Only add traits that are definitively present +* **Documented**: Document discovery logic for each trait +* **Efficient**: Minimize expensive hardware probing +* **Testable**: Include unit tests for trait discovery logic + +## Trait Usage in Flavors + +Flavors reference traits to filter nodes within a resource class. + +### Requiring Specific Hardware + +```yaml +name: compute.nvme +resource_class: m1.medium +traits: + - trait: NVME + state: required +``` + +Matches only nodes with NVMe storage in m1.medium resource class. + +### Excluding Hardware + +```yaml +name: compute.no-gpu +resource_class: m1.large +traits: + - trait: GPU + state: absent +``` + +Matches nodes without GPU in m1.large resource class. + +### Multiple Trait Requirements + +```yaml +name: compute.nvidia-nvme +resource_class: m1.large +traits: + - trait: GPU_NVIDIA + state: required + - trait: NVME + state: required +``` + +Matches nodes with both NVIDIA GPU and NVMe storage. + +## Trait Registry + +Maintain a trait registry in your deployment repository documenting all traits in use. + +**Example: `$UC_DEPLOY/docs/traits.md`** + +```markdown +# Hardware Traits Registry + +## Standard Traits +- NVME: NVMe storage present +- GPU_NVIDIA: NVIDIA GPU present +- NIC_MELLANOX: Mellanox NIC present + +## Custom Traits +- GEN10: HP Gen10 server hardware +``` + +Benefits: + +* Central documentation of all traits +* Discovery logic references +* Cross-team communication +* Audit trail for trait additions + +## Integration with Flavors + +The complete hardware matching flow: + +1. **Inspection**: Ironic inspector discovers hardware, inspection hooks add traits +2. **Resource Class**: Inspection hook matches device-type and sets `node.resource_class` +3. **Trait Set**: Inspection hook sets `node.traits` based on discovered capabilities +4. **Flavor Matching**: Flavor definitions filter nodes by `resource_class` + trait requirements +5. **Nova Flavor**: Nova flavor created with properties from device-type resource class + +Example end-to-end: + +**Hardware**: Dell R7615 with 32 cores, 256GB RAM, NVMe, NVIDIA GPU + +**Inspection discovers**: + +* Device-type: Dell PowerEdge R7615 +* Resource class: m1.medium (matched by CPU/RAM) +* Traits: NVME, GPU, GPU_NVIDIA + +**Flavor matches**: + +```yaml +name: compute.gpu-nvme +resource_class: m1.medium +traits: + - trait: GPU_NVIDIA + state: required + - trait: NVME + state: required +``` + +**Result**: Node eligible for `compute.gpu-nvme` flavor, Nova flavor created with m1.medium resource class properties (32 vCPUs, 256GB RAM from device-type). + +## Trait Evolution + +As deployments mature, custom traits may become standard traits: + +1. **Custom trait usage**: Site adds `CUSTOM_NETWORK_ACCEL` for SmartNIC hardware +2. **Cross-site adoption**: Other sites implement same trait +3. **Standardization**: Trait documented as standard `NIC_SMARTNIC` +4. **Migration**: Update inspection hooks and flavor definitions to use standard name + +Maintain backward compatibility during transitions by supporting both trait names temporarily. diff --git a/docs/operator-guide/device-types.md b/docs/operator-guide/device-types.md new file mode 100644 index 000000000..146fef21f --- /dev/null +++ b/docs/operator-guide/device-types.md @@ -0,0 +1,517 @@ +# Device Type Management + +Device types define the hardware models supported by your UnderStack +deployment. This guide covers how to create, manage, and validate device +type definitions using the `understackctl` CLI tool. + +For architectural details and schema information, see the +[design guide](../design-guide/device-types.md). + +## Prerequisites + +* `understackctl` CLI tool installed +* Access to your deployment repository +* `UC_DEPLOY` environment variable set to your deployment repository path + +## Command Overview + +The `understackctl device-type` command provides five subcommands: + +```bash +understackctl device-type add # Add a device type to the deployment +understackctl device-type validate # Validate a device type definition +understackctl device-type delete # Delete a device type +understackctl device-type list # List all device types +understackctl device-type show # Show device type details +``` + +All commands require the `UC_DEPLOY` environment variable to be set: + +```bash +export UC_DEPLOY=/path/to/your/deployment-repo +``` + +## Creating Device Type Definitions + +### 1. Create the YAML Definition File + +Create a new YAML file with your hardware specifications. You can create it +anywhere (e.g., `/tmp/my-device.yaml`). The filename doesn't matter at this +stage as it will be automatically named based on the manufacturer and model +when added to the deployment. + +### 2. Define the Hardware Specification + +Start with the YAML language server directive for editor validation: + +```yaml +# yaml-language-server: $schema=https://rackerlabs.github.io/understack/device-type.schema.json +class: server +manufacturer: Dell +model: PowerEdge R7615 +u_height: 2 +is_full_depth: true +``` + +### 3. Add Network Interfaces + +Define named physical interfaces on the hardware. This is typically used for +management interfaces (BMC/iDRAC/iLO) or specific named ports on network +devices: + +```yaml +interfaces: + - name: iDRAC + type: 1000base-t + mgmt_only: true +``` + +**Note**: General-purpose network ports for workload traffic are specified +using `nic_count` in the resource class definition, not here. + +### 3a. Add Power Ports (Optional) + +Define power inlet specifications for accurate power capacity planning and +monitoring: + +```yaml +power-ports: + - name: psu1 + type: iec-60320-c14 + maximum_draw: 750 + - name: psu2 + type: iec-60320-c14 + maximum_draw: 750 +``` + +Each power port specification includes: + +* `name`: Power supply identifier (e.g., "psu1", "psu2", "PSU-A") +* `type`: Power connector type - see [Nautobot PowerPortTypeChoices](https://github.com/nautobot/nautobot/blob/develop/nautobot/dcim/choices.py#L507) for valid values (e.g., "iec-60320-c14", "iec-60320-c20") +* `maximum_draw`: Maximum power draw in watts (optional) + +Common power port types: + +* `iec-60320-c14`: Standard 15A power inlet (most common for servers) +* `iec-60320-c20`: High-current 20A power inlet (used for high-power servers) +* `iec-60309-p-n-e-6h`: Industrial 3-phase power connectors + +**Note**: Power port information helps with capacity planning and can be +synchronized to Nautobot for power feed calculations. + +### 4. Define Resource Classes + +For server-class devices, specify one or more resource class configurations. +Each resource class represents a hardware profile that can be matched to +servers during enrollment: + +```yaml +resource_class: + - name: m1.small + cpu: + cores: 16 + model: AMD EPYC 9124 + memory: + size: 128 + drives: + - size: 480 + - size: 480 + # Minimum user-usable network interfaces (not tied to specific named ports) + nic_count: 2 + + - name: m1.medium + cpu: + cores: 32 + model: AMD EPYC 9334 + memory: + size: 256 + drives: + - size: 960 + - size: 960 + # This configuration requires at least 2 network ports for workload traffic + nic_count: 2 +``` + +**Understanding nic_count**: This field specifies the minimum number of +general-purpose network interfaces available for workload traffic. These are +not tied to specific named interfaces defined in the top-level `interfaces` +section. The system verifies that the server has at least this many usable +network ports beyond any management-only interfaces. + +**Understanding multiple resource classes**: You can define multiple resource +classes for the same device type to represent different hardware configurations +of the same chassis model. For example, you might populate the same server +model with different CPU, RAM, or storage configurations depending on the +intended workload. Each resource class represents one of these common build +variations in your data center. During hardware enrollment, the inspection +data is matched against these definitions, and the matching resource class +name is set on the Ironic node for Nova flavor creation. + +### 5. Add to Deployment + +Use the `add` command to validate and add your device type definition to the +deployment repository: + +```bash +understackctl device-type add /tmp/dell-poweredge-r7615.yaml +``` + +The command will: + +* Validate the YAML structure against the full JSON schema +* Check all required fields, types, and constraints +* Automatically generate a filename from manufacturer and model (lowercase with hyphens, e.g., `dell-poweredge-r7615.yaml`) +* Copy the file to `$UC_DEPLOY/hardware/device-types/` +* Update `$UC_DEPLOY/hardware/base/kustomization.yaml` to include the new device type + +Successful output: + +```text +INFO Device type added successfully path=$UC_DEPLOY/hardware/device-types/dell-poweredge-r7615.yaml +INFO Added to kustomization.yaml file=dell-poweredge-r7615.yaml +``` + +Validation errors will show which fields are missing or invalid: + +```text +Error: missing required field: u_height +Error: device-type already exists at $UC_DEPLOY/hardware/device-types/dell-poweredge-r7615.yaml +``` + +### 6. Commit and Deploy + +The kustomization file has been automatically updated by the `add` command. + +```bash +cd $UC_DEPLOY +git add hardware/ +git commit -m "Add Dell PowerEdge R7615 device type definition" +git push +``` + +ArgoCD will detect the changes and update the device-types ConfigMap. + +## Validating Device Types + +You can validate a device type definition without adding it to the deployment: + +```bash +understackctl device-type validate /tmp/dell-poweredge-r7615.yaml +``` + +This performs full JSON schema validation including: + +* Required field presence +* Type correctness (strings, numbers, booleans, arrays, objects) +* Enum constraints (e.g., `class` must be server/switch/firewall) +* Conditional requirements (servers must have resource classes) +* Numeric constraints (e.g., `u_height > 0`) + +Successful output: + +```text +INFO Device type definition is valid class=server manufacturer=Dell model=PowerEdge R7615 +``` + +Validation errors will show the specific issue: + +```text +Error: validation failed: u_height: must be > 0 +Error: validation failed: missing properties: 'cpu', 'memory', 'drives', 'interfaces' +``` + +## Listing Device Types + +View all device type definitions in your deployment: + +```bash +understackctl device-type list +``` + +Example output: + +```text +Device Types: + - dell-poweredge-r7615 + - hpe-proliant-dl360 + - cisco-nexus-9336c +``` + +## Viewing Device Type Details + +Display the full specification for a specific device type: + +```bash +understackctl device-type show dell-poweredge-r7615 +``` + +Example output: + +```text +Device Type: dell-poweredge-r7615 +═══════════════════════════════════════════ + +Class: server +Manufacturer: Dell +Model: PowerEdge R7615 +Height (in u): 2 +Full Depth: true + +Interfaces: + 1. iDRAC (1000base-t) [Management Only] + +Resource Classes: + + 1. m1.small + ─────────────────────────────────── + CPU: 16 cores (AMD EPYC 9124) + Memory: 128 GB + NICs: 2 + Drives: 480 GB, 480 GB + + 2. m1.medium + ─────────────────────────────────── + CPU: 32 cores (AMD EPYC 9334) + Memory: 256 GB + NICs: 2 + Drives: 960 GB, 960 GB + + 3. m1.large + ─────────────────────────────────── + CPU: 64 cores (AMD EPYC 9554) + Memory: 512 GB + NICs: 4 + Drives: 1920 GB, 1920 GB +``` + +## Updating Device Types + +To update an existing device type definition: + +1. Edit the file directly in `$UC_DEPLOY/hardware/device-types/` +1. Validate your changes: + +```bash +understackctl device-type validate $UC_DEPLOY/hardware/device-types/dell-poweredge-r7615.yaml +``` + +1. Commit the changes: + +```bash +cd $UC_DEPLOY +git add hardware/device-types/dell-poweredge-r7615.yaml +git commit -m "Update Dell PowerEdge R7615 device type configuration" +git push +``` + +ArgoCD will detect the changes and update the device-types ConfigMap. + +## Deleting Device Types + +Remove a device type definition from your deployment: + +```bash +understackctl device-type delete dell-poweredge-r7615 +``` + +The command will: + +* Remove the file from `$UC_DEPLOY/hardware/device-types/` +* Update `$UC_DEPLOY/hardware/base/kustomization.yaml` to remove the entry + +After deletion, commit the changes: + +```bash +cd $UC_DEPLOY +git add hardware/ +git commit -m "Remove Dell PowerEdge R7615 device type" +git push +``` + +## Best Practices + +### Naming Conventions + +* Filenames are automatically generated from the manufacturer and model fields +* Ensure manufacturer and model fields are accurate as they determine the filename + +### Resource Class Design + +* Define resource classes that map to your Nova flavor strategy +* Use descriptive names that indicate the workload type or size tier +* Define multiple resource classes for the same device type when you have + common build variations in your data center +* Resource class names are set on Ironic nodes and are used as one of the inputs to create Nova flavors + +**Example**: A Dell PowerEdge R7615 chassis might have three common builds: + +* `m1.small`: 16-core CPU, 128GB RAM, basic drives (general compute) +* `m1.medium`: 32-core CPU, 256GB RAM, faster drives (balanced workloads) +* `m1.large`: 64-core CPU, 512GB RAM, high-capacity drives (memory-intensive) + +Each build variation becomes a separate resource class, allowing precise +matching during hardware enrollment and accurate Nova flavor creation. + +### Version Control + +* Always validate device types before committing +* Include descriptive commit messages explaining what hardware is being added +* Submit changes via pull requests for team review +* Tag releases when updating device type definitions for production deployments + +### Interface Definitions + +* Use the top-level `interfaces` section for named physical ports (management + interfaces, specific switch ports) +* Mark management interfaces with `mgmt_only: true` +* Follow standard interface type naming (e.g., `1000base-t`, `10gbase-x-sfp+`, + `25gbase-x-sfp28`) - see [Nautobot interface types](https://docs.nautobot.com/projects/core/en/stable/user-guide/core-data-model/dcim/interface/#interface-type) for available values +* Use `nic_count` in resource classes to specify minimum user-usable network + ports (not tied to specific named interfaces) + +## Troubleshooting + +### Validation Failures + +**Missing required fields**: + +Ensure all required fields are present: `class`, `manufacturer`, `model`, +`u_height`, `is_full_depth`. For server-class devices, also include +`resource_class`. + +**Invalid class value**: + +The `class` field must be exactly one of: `server`, `switch`, `firewall`. + +**Invalid u_height**: + +The `u_height` must be a number greater than 0. + +**Invalid resource class**: + +Each resource class entry must include all required fields: `name`, `cpu`, +`memory`, `drives`, `nic_count`. + +### ConfigMap Not Updating + +If ArgoCD doesn't pick up your changes: + +1. Verify the file is listed in `hardware/base/kustomization.yaml` +2. Check that you've committed and pushed to the correct branch +3. Review ArgoCD application status: `kubectl get applications -n argocd` +4. Force a sync if needed: `argocd app sync ` + +### Schema Validation in Editor + +If your editor doesn't validate the YAML: + +1. Ensure the schema directive is on the first line +2. Verify your editor supports YAML language server protocol +3. Check that the schema URL is accessible +4. Try installing a YAML extension for your editor (e.g., YAML extension for + VS Code) + +## Examples + +### Server Device Type + +```yaml +# yaml-language-server: $schema=https://rackerlabs.github.io/understack/device-type.schema.json +class: server +manufacturer: HPE +model: ProLiant DL360 Gen10 +u_height: 1 +is_full_depth: true + +interfaces: + - name: iLO + type: 1000base-t + mgmt_only: true + - name: NIC1 + type: 10gbase-x-sfp+ + - name: NIC2 + type: 10gbase-x-sfp+ + +power-ports: + - name: psu1 + type: iec-60320-c14 + maximum_draw: 800 + - name: psu2 + type: iec-60320-c14 + maximum_draw: 800 + +resource_class: + - name: compute-standard + cpu: + cores: 24 + model: Intel Xeon Gold 6252 + memory: + size: 192 + drives: + - size: 480 + - size: 960 + - size: 960 + nic_count: 2 +``` + +### Switch Device Type + +```yaml +# yaml-language-server: $schema=https://rackerlabs.github.io/understack/device-type.schema.json +class: switch +manufacturer: Cisco +model: Nexus 9336C-FX2 +u_height: 1 +is_full_depth: true + +interfaces: + - name: mgmt0 + type: 1000base-t + mgmt_only: true + - name: Ethernet1/1 + type: 100gbase-x-qsfp28 + - name: Ethernet1/2 + type: 100gbase-x-qsfp28 + # ... additional 34 100G QSFP28 ports + +power-ports: + - name: ps1 + type: iec-60320-c14 + maximum_draw: 450 + - name: ps2 + type: iec-60320-c14 + maximum_draw: 450 +``` + +### Firewall Device Type + +```yaml +# yaml-language-server: $schema=https://rackerlabs.github.io/understack/device-type.schema.json +class: firewall +manufacturer: Palo Alto +model: PA-5220 +u_height: 3 +is_full_depth: true + +interfaces: + - name: mgmt + type: 1000base-t + mgmt_only: true + - name: ha1-a + type: 1000base-t + - name: ha1-b + type: 1000base-t + - name: ethernet1/1 + type: 10gbase-t + - name: ethernet1/5 + type: 10gbase-x-sfp+ + # ... additional data plane interfaces + +power-ports: + - name: psu1 + type: iec-60320-c20 + maximum_draw: 1440 + - name: psu2 + type: iec-60320-c20 + maximum_draw: 1440 +``` diff --git a/docs/operator-guide/flavors.md b/docs/operator-guide/flavors.md new file mode 100644 index 000000000..2b3d98a3a --- /dev/null +++ b/docs/operator-guide/flavors.md @@ -0,0 +1,484 @@ +# Flavor Management + +Flavors define how Ironic bare metal nodes are matched to Nova flavors based on resource classes and hardware traits. This guide covers how to create, manage, and validate flavor definitions using the `understackctl` CLI tool. + +For architectural details and integration information, see the [design guide](../design-guide/flavors.md). + +## Prerequisites + +* `understackctl` CLI tool installed +* Access to your deployment repository +* `UC_DEPLOY` environment variable set to your deployment repository path +* Existing device-type definitions with resource classes + +## Command Overview + +The `understackctl flavor` command provides five subcommands: + +```bash +understackctl flavor add # Add a flavor to the deployment +understackctl flavor validate # Validate a flavor definition +understackctl flavor delete # Delete a flavor +understackctl flavor list # List all flavors +understackctl flavor show # Show flavor details +``` + +All commands require the `UC_DEPLOY` environment variable to be set: + +```bash +export UC_DEPLOY=/path/to/your/deployment-repo +``` + +## Creating Flavor Definitions + +### 1. Create the YAML Definition File + +Create a new YAML file with your flavor specification. You can create it anywhere (e.g., `/tmp/my-flavor.yaml`). The filename doesn't matter at this stage as it will be automatically named based on the flavor name when added to the deployment. + +### 2. Define the Flavor Specification + +Start with the YAML language server directive for editor validation: + +```yaml +# yaml-language-server: $schema=https://rackerlabs.github.io/understack/flavor.schema.json +name: m1.small +resource_class: m1.small +``` + +**Required Fields**: + +* `name`: Unique flavor name (e.g., `m1.small`, `compute.standard`) +* `resource_class`: Must match a resource class defined in a device-type + +**Note**: Nova flavor properties (vCPUs, RAM, disk) are automatically derived from the device-type resource class specification for convenience. Nova performs scheduling by matching the resource class and traits on Ironic nodes. See the [OpenStack Ironic flavor configuration documentation](https://docs.openstack.org/ironic/latest/install/configure-nova-flavors.html) for details. + +### 3. Add Trait Requirements (Optional) + +Define hardware trait requirements to filter which nodes match this flavor: + +```yaml +# yaml-language-server: $schema=https://rackerlabs.github.io/understack/flavor.schema.json +name: m1.small.nicX +resource_class: m1.small +traits: + - trait: NICX + state: required +``` + +**Trait Requirements**: + +* `trait`: Hardware trait name WITHOUT the `CUSTOM_` prefix (e.g., `NICX`, `GPU`, `NVME`) + * Must be uppercase alphanumeric with underscores + * The system automatically adds `CUSTOM_` prefix when interacting with Ironic +* `state`: Either `required` (node must have trait) or `absent` (node must not have trait) + +### 4. Add to Deployment + +Use the `add` command to validate and add your flavor definition to the deployment repository: + +```bash +understackctl flavor add /tmp/m1.small.yaml +``` + +The command will: + +* Validate the YAML structure against the full JSON schema +* Check all required fields and types +* Automatically generate a filename from the flavor name (e.g., `m1.small.yaml`) +* Copy the file to `$UC_DEPLOY/hardware/flavors/` +* Update `$UC_DEPLOY/hardware/base/kustomization.yaml` to include the new flavor + +Successful output: + +```text +INFO Flavor added successfully path=$UC_DEPLOY/hardware/flavors/m1.small.yaml +INFO Added to kustomization.yaml file=m1.small.yaml +``` + +Validation errors will show which fields are missing or invalid: + +```text +Error: validation failed: missing properties: 'resource_class' +Error: flavor already exists at $UC_DEPLOY/hardware/flavors/m1.small.yaml +``` + +### 5. Commit and Deploy + +The kustomization file has been automatically updated by the `add` command. + +```bash +cd $UC_DEPLOY +git add hardware/ +git commit -m "Add m1.small flavor definition" +git push +``` + +ArgoCD will detect the changes and update the flavors ConfigMap. + +## Validating Flavors + +You can validate a flavor definition without adding it to the deployment: + +```bash +understackctl flavor validate /tmp/m1.small.yaml +``` + +This performs full JSON schema validation including: + +* Required field presence (name, resource_class) +* Type correctness (strings) +* Trait name patterns (uppercase alphanumeric with underscores) +* Trait state enum values (`required` or `absent`) + +Successful output: + +```text +INFO Flavor definition is valid name=m1.small resource_class=m1.small +``` + +Validation errors will show the specific issue: + +```text +Error: validation failed: traits[0].state: must be one of [required, absent] +Error: validation failed: traits[0].trait: does not match pattern ^[A-Z][A-Z0-9_]*$ +``` + +## Listing Flavors + +View all flavor definitions in your deployment: + +```bash +understackctl flavor list +``` + +Example output: + +```text +Flavors: + - m1.small + - m1.small.nicX + - m1.medium + - compute.gpu +``` + +## Viewing Flavor Details + +Display the full specification for a specific flavor: + +```bash +understackctl flavor show m1.small +``` + +Example output: + +```text +Flavor: m1.small +═══════════════════════════════════════════ + +Resource Class: m1.small + (Nova properties derived from device-type for convenience; scheduling uses resource class and traits) + +Trait Requirements: None (matches all nodes in resource class) +``` + +For a flavor with trait requirements: + +```text +Flavor: m1.small.nicX +═══════════════════════════════════════════ + +Resource Class: m1.small + (Nova properties derived from device-type for convenience; scheduling uses resource class and traits) + +Trait Requirements: + 1. NICX [required] +``` + +## Updating Flavors + +To update an existing flavor definition: + +1. Edit the file directly in `$UC_DEPLOY/hardware/flavors/` +1. Validate your changes: + +```bash +understackctl flavor validate $UC_DEPLOY/hardware/flavors/m1.small.yaml +``` + +1. Commit the changes: + +```bash +cd $UC_DEPLOY +git add hardware/flavors/m1.small.yaml +git commit -m "Update m1.small flavor trait requirements" +git push +``` + +ArgoCD will detect the changes and update the flavors ConfigMap. + +## Deleting Flavors + +Remove a flavor definition from your deployment: + +```bash +understackctl flavor delete m1.small +``` + +The command will: + +* Remove the file from `$UC_DEPLOY/hardware/flavors/` +* Update `$UC_DEPLOY/hardware/base/kustomization.yaml` to remove the entry + +After deletion, commit the changes: + +```bash +cd $UC_DEPLOY +git add hardware/ +git commit -m "Remove m1.small flavor" +git push +``` + +## Common Use Cases + +### Generic Hardware Flavor + +Match all nodes in a resource class without trait filtering: + +```yaml +# yaml-language-server: $schema=https://rackerlabs.github.io/understack/flavor.schema.json +name: compute.standard +resource_class: m1.medium +``` + +This provides maximum flexibility by allowing any hardware in the `m1.medium` resource class. + +### Specialized Hardware Flavor + +Require specific hardware capabilities: + +```yaml +# yaml-language-server: $schema=https://rackerlabs.github.io/understack/flavor.schema.json +name: compute.gpu +resource_class: m1.large +traits: + - trait: GPU + state: required +``` + +Guarantees instances get nodes with GPU hardware. + +### NIC-Specific Hardware + +Require specific network hardware: + +```yaml +# yaml-language-server: $schema=https://rackerlabs.github.io/understack/flavor.schema.json +name: m1.small.mellanox-cx5 +resource_class: m1.small +traits: + - trait: NIC_MELLANOX_CX5 + state: required +``` + +Only matches nodes with Mellanox ConnectX-5 network cards. + +### Multiple Traits + +Combine multiple trait requirements: + +```yaml +# yaml-language-server: $schema=https://rackerlabs.github.io/understack/flavor.schema.json +name: compute.nvme-no-gpu +resource_class: m1.medium +traits: + - trait: NVME + state: required + - trait: GPU + state: absent +``` + +Requires NVMe storage but excludes GPU nodes. + +## Best Practices + +### Naming Conventions + +* **Generic flavors**: Use simple, descriptive names (e.g., `compute.standard`, `m1.small`) +* **Specialized flavors**: Append trait indicators (e.g., `m1.small.mellanox-cx5`, `compute.gpu`) +* **Exclusion flavors**: Use descriptive suffixes (e.g., `m1.small.no-gpu`) + +### Resource Class References + +* Always ensure the `resource_class` exists in device-type definitions before creating flavors +* Use `understackctl device-type list` to see available resource classes +* Nova properties (vCPUs, RAM, disk) are automatically derived from the device-type resource class for convenience (scheduling uses resource class and traits) + +### Trait Management + +* Write trait names without the `CUSTOM_` prefix (it's added automatically) +* Use uppercase with underscores (e.g., `NICX`, `NVIDIA_A100`, `NVME_STORAGE`) +* Document trait meanings and discovery logic in your team's documentation +* Balance between specificity and flexibility (too many specific traits fragment hardware pools) + +### Version Control + +* Always validate flavors before committing +* Include descriptive commit messages explaining the purpose of the flavor +* Submit changes via pull requests for team review +* Test flavor matching in non-production before promoting to production + +### Trait Strategy + +* Create a base generic flavor for each resource class (e.g., `m1.small`) +* Add specialized variants only when users need guaranteed hardware features +* Use trait absence requirements sparingly (mainly for excluding known-problematic hardware) +* Document trait requirements in flavor names or commit messages + +## Troubleshooting + +### Validation Failures + +**Missing required fields**: + +Ensure both `name` and `resource_class` are present in the definition. + +**Invalid trait name**: + +Trait names must be uppercase alphanumeric with underscores. Don't include the `CUSTOM_` prefix. + +**Invalid state**: + +The `state` field must be exactly `required` or `absent`. + +### ConfigMap Not Updating + +If ArgoCD doesn't pick up your changes: + +1. Verify the file is listed in `hardware/base/kustomization.yaml` +2. Check that you've committed and pushed to the correct branch +3. Review ArgoCD application status: `kubectl get applications -n argocd` +4. Force a sync if needed: `argocd app sync ` + +### Resource Class Not Found + +If the flavor references a non-existent resource class: + +1. List available device-types: `understackctl device-type list` +2. Show device-type details to see resource classes: `understackctl device-type show ` +3. Create the necessary device-type or use an existing resource class + +### Trait Matching Issues + +If nodes aren't matching the expected flavor: + +1. Check Ironic node traits: `openstack baremetal node show -f json -c traits` +2. Verify trait names have the `CUSTOM_` prefix in Ironic +3. Confirm inspection code is properly adding traits to nodes +4. Review flavor-matcher logs for matching errors + +### Schema Validation in Editor + +If your editor doesn't validate the YAML: + +1. Ensure the schema directive is on the first line +2. Verify your editor supports YAML language server protocol +3. Try installing a YAML extension for your editor (e.g., YAML extension for VS Code) + +## Examples + +### Generic Compute Flavor + +```yaml +# yaml-language-server: $schema=https://rackerlabs.github.io/understack/flavor.schema.json +name: m1.small +resource_class: m1.small +``` + +Matches all nodes in the `m1.small` resource class. + +### NIC-Specific Flavor + +```yaml +# yaml-language-server: $schema=https://rackerlabs.github.io/understack/flavor.schema.json +name: m1.small.nicX +resource_class: m1.small +traits: + - trait: NICX + state: required +``` + +Only matches nodes with the `CUSTOM_NICX` trait. + +### GPU Compute Flavor + +```yaml +# yaml-language-server: $schema=https://rackerlabs.github.io/understack/flavor.schema.json +name: compute.gpu +resource_class: m1.large +traits: + - trait: GPU + state: required +``` + +Guarantees GPU hardware for compute workloads. + +### Combined Hardware Requirements + +```yaml +# yaml-language-server: $schema=https://rackerlabs.github.io/understack/flavor.schema.json +name: m1.medium.mellanox-cx5-nvme +resource_class: m1.medium +traits: + - trait: NIC_MELLANOX_CX5 + state: required + - trait: NVME + state: required +``` + +Requires both Mellanox ConnectX-5 and NVMe storage. + +## Integration with Device-Types + +Flavors and device-types work together to define Nova flavors: + +**Device-Type** (`dell-poweredge-r7615.yaml`): + +```yaml +class: server +manufacturer: Dell +model: PowerEdge R7615 +resource_class: + - name: m1.small + cpu: + cores: 16 + model: AMD EPYC 9124 + memory: + size: 128 # GB + drives: + - size: 480 # GB + - size: 480 + nic_count: 2 +``` + +**Flavor** (`m1.small.yaml`): + +```yaml +name: m1.small +resource_class: m1.small +``` + +The flavor-matcher service: + +1. Reads the flavor definition +2. Queries Ironic for nodes with `resource_class=m1.small` +3. Looks up the device-type `m1.small` resource class +4. Creates a Nova flavor with: + * vcpus: 16 (from `cpu.cores`) + * ram: 131072 MB (from `memory.size` * 1024) + * disk: 480 GB (from `drives[0].size`) + +This separation allows you to: + +* Define hardware specifications once in device-types +* Create multiple flavors (generic, specialized, exclusion) for the same resource class +* Filter hardware matching without duplicating Nova property specifications diff --git a/docs/schema/device-type.schema.json b/docs/schema/device-type.schema.json new file mode 120000 index 000000000..67a1a7989 --- /dev/null +++ b/docs/schema/device-type.schema.json @@ -0,0 +1 @@ +../../schema/device-type.schema.json \ No newline at end of file diff --git a/examples/deploy-repo/hardware/base/kustomization.yaml b/examples/deploy-repo/hardware/base/kustomization.yaml new file mode 100644 index 000000000..d4e8b68d1 --- /dev/null +++ b/examples/deploy-repo/hardware/base/kustomization.yaml @@ -0,0 +1,18 @@ +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +configMapGenerator: + - name: device-types + options: + disableNameSuffixHash: true + files: + - cisco-nexus-9336c-fx2.yaml=../device-types/cisco-nexus-9336c-fx2.yaml + - dell-poweredge-r7615.yaml=../device-types/dell-poweredge-r7615.yaml + - palo-alto-pa-5220.yaml=../device-types/palo-alto-pa-5220.yaml + - name: flavors + options: + disableNameSuffixHash: true + files: + - m1.small.yaml=../flavors/m1.small.yaml + - m1.small.nicX.yaml=../flavors/m1.small.nicX.yaml diff --git a/examples/deploy-repo/hardware/device-types/cisco-nexus-9336c-fx2.yaml b/examples/deploy-repo/hardware/device-types/cisco-nexus-9336c-fx2.yaml new file mode 100644 index 000000000..f2b42c116 --- /dev/null +++ b/examples/deploy-repo/hardware/device-types/cisco-nexus-9336c-fx2.yaml @@ -0,0 +1,83 @@ +# yaml-language-server: $schema=https://rackerlabs.github.io/understack/device-type.schema.json +class: switch +manufacturer: Cisco +model: Nexus 9336C-FX2 +u_height: 1 +is_full_depth: true + +interfaces: + - name: mgmt0 + type: 1000base-t + mgmt_only: true + - name: Ethernet1/1 + type: 100gbase-x-qsfp28 + - name: Ethernet1/2 + type: 100gbase-x-qsfp28 + - name: Ethernet1/3 + type: 100gbase-x-qsfp28 + - name: Ethernet1/4 + type: 100gbase-x-qsfp28 + - name: Ethernet1/5 + type: 100gbase-x-qsfp28 + - name: Ethernet1/6 + type: 100gbase-x-qsfp28 + - name: Ethernet1/7 + type: 100gbase-x-qsfp28 + - name: Ethernet1/8 + type: 100gbase-x-qsfp28 + - name: Ethernet1/9 + type: 100gbase-x-qsfp28 + - name: Ethernet1/10 + type: 100gbase-x-qsfp28 + - name: Ethernet1/11 + type: 100gbase-x-qsfp28 + - name: Ethernet1/12 + type: 100gbase-x-qsfp28 + - name: Ethernet1/13 + type: 100gbase-x-qsfp28 + - name: Ethernet1/14 + type: 100gbase-x-qsfp28 + - name: Ethernet1/15 + type: 100gbase-x-qsfp28 + - name: Ethernet1/16 + type: 100gbase-x-qsfp28 + - name: Ethernet1/17 + type: 100gbase-x-qsfp28 + - name: Ethernet1/18 + type: 100gbase-x-qsfp28 + - name: Ethernet1/19 + type: 100gbase-x-qsfp28 + - name: Ethernet1/20 + type: 100gbase-x-qsfp28 + - name: Ethernet1/21 + type: 100gbase-x-qsfp28 + - name: Ethernet1/22 + type: 100gbase-x-qsfp28 + - name: Ethernet1/23 + type: 100gbase-x-qsfp28 + - name: Ethernet1/24 + type: 100gbase-x-qsfp28 + - name: Ethernet1/25 + type: 100gbase-x-qsfp28 + - name: Ethernet1/26 + type: 100gbase-x-qsfp28 + - name: Ethernet1/27 + type: 100gbase-x-qsfp28 + - name: Ethernet1/28 + type: 100gbase-x-qsfp28 + - name: Ethernet1/29 + type: 100gbase-x-qsfp28 + - name: Ethernet1/30 + type: 100gbase-x-qsfp28 + - name: Ethernet1/31 + type: 100gbase-x-qsfp28 + - name: Ethernet1/32 + type: 100gbase-x-qsfp28 + - name: Ethernet1/33 + type: 100gbase-x-qsfp28 + - name: Ethernet1/34 + type: 100gbase-x-qsfp28 + - name: Ethernet1/35 + type: 100gbase-x-qsfp28 + - name: Ethernet1/36 + type: 100gbase-x-qsfp28 diff --git a/examples/deploy-repo/hardware/device-types/dell-poweredge-r7615.yaml b/examples/deploy-repo/hardware/device-types/dell-poweredge-r7615.yaml new file mode 100644 index 000000000..2101e3c31 --- /dev/null +++ b/examples/deploy-repo/hardware/device-types/dell-poweredge-r7615.yaml @@ -0,0 +1,45 @@ +# yaml-language-server: $schema=https://rackerlabs.github.io/understack/device-type.schema.json +class: server +manufacturer: Dell +model: PowerEdge R7615 +u_height: 2 +is_full_depth: true + +interfaces: + - name: iDRAC + type: 1000base-t + mgmt_only: true + +resource_class: + - name: m1.small + cpu: + cores: 16 + model: AMD EPYC 9124 + memory: + size: 128 + drives: + - size: 480 + - size: 480 + nic_count: 2 + + - name: m1.medium + cpu: + cores: 32 + model: AMD EPYC 9334 + memory: + size: 256 + drives: + - size: 960 + - size: 960 + nic_count: 2 + + - name: m1.large + cpu: + cores: 64 + model: AMD EPYC 9554 + memory: + size: 512 + drives: + - size: 1920 + - size: 1920 + nic_count: 4 diff --git a/examples/deploy-repo/hardware/device-types/palo-alto-pa-5220.yaml b/examples/deploy-repo/hardware/device-types/palo-alto-pa-5220.yaml new file mode 100644 index 000000000..4d1ee14ca --- /dev/null +++ b/examples/deploy-repo/hardware/device-types/palo-alto-pa-5220.yaml @@ -0,0 +1,69 @@ +# yaml-language-server: $schema=https://rackerlabs.github.io/understack/device-type.schema.json +class: firewall +manufacturer: Palo Alto +model: PA-5220 +u_height: 3 +is_full_depth: true + +interfaces: + - name: mgmt + type: 1000base-t + mgmt_only: true + - name: ha1-a + type: 1000base-t + - name: ha1-b + type: 1000base-t + - name: hsci + type: 40gbase-x-qsfp+ + - name: aux-1 + type: 10gbase-x-sfp+ + - name: aux-2 + type: 10gbase-x-sfp+ + - name: ethernet1/1 + type: 10gbase-t + - name: ethernet1/2 + type: 10gbase-t + - name: ethernet1/3 + type: 10gbase-t + - name: ethernet1/4 + type: 10gbase-t + - name: ethernet1/5 + type: 10gbase-x-sfp+ + - name: ethernet1/6 + type: 10gbase-x-sfp+ + - name: ethernet1/7 + type: 10gbase-x-sfp+ + - name: ethernet1/8 + type: 10gbase-x-sfp+ + - name: ethernet1/9 + type: 10gbase-x-sfp+ + - name: ethernet1/10 + type: 10gbase-x-sfp+ + - name: ethernet1/11 + type: 10gbase-x-sfp+ + - name: ethernet1/12 + type: 10gbase-x-sfp+ + - name: ethernet1/13 + type: 10gbase-x-sfp+ + - name: ethernet1/14 + type: 10gbase-x-sfp+ + - name: ethernet1/15 + type: 10gbase-x-sfp+ + - name: ethernet1/16 + type: 10gbase-x-sfp+ + - name: ethernet1/17 + type: 10gbase-x-sfp+ + - name: ethernet1/18 + type: 10gbase-x-sfp+ + - name: ethernet1/19 + type: 10gbase-x-sfp+ + - name: ethernet1/20 + type: 10gbase-x-sfp+ + - name: ethernet1/21 + type: 40gbase-x-qsfp+ + - name: ethernet1/22 + type: 40gbase-x-qsfp+ + - name: ethernet1/23 + type: 40gbase-x-qsfp+ + - name: ethernet1/24 + type: 40gbase-x-qsfp+ diff --git a/examples/deploy-repo/hardware/flavors/m1.small.nicX.yaml b/examples/deploy-repo/hardware/flavors/m1.small.nicX.yaml new file mode 100644 index 000000000..536bc63e3 --- /dev/null +++ b/examples/deploy-repo/hardware/flavors/m1.small.nicX.yaml @@ -0,0 +1,6 @@ +# yaml-language-server: $schema=https://rackerlabs.github.io/understack/flavor.schema.json +name: m1.small.nicX +resource_class: m1.small +traits: + - trait: NICX + requirement: required diff --git a/examples/deploy-repo/hardware/flavors/m1.small.yaml b/examples/deploy-repo/hardware/flavors/m1.small.yaml new file mode 100644 index 000000000..7c2caf753 --- /dev/null +++ b/examples/deploy-repo/hardware/flavors/m1.small.yaml @@ -0,0 +1,3 @@ +# yaml-language-server: $schema=https://rackerlabs.github.io/understack/flavor.schema.json +name: m1.small +resource_class: m1.small diff --git a/go/understackctl/cmd/deviceType/deviceType.go b/go/understackctl/cmd/deviceType/deviceType.go new file mode 100644 index 000000000..d9d00e49c --- /dev/null +++ b/go/understackctl/cmd/deviceType/deviceType.go @@ -0,0 +1,459 @@ +package deviceType + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + "strings" + + "github.com/charmbracelet/log" + "github.com/gookit/goutil/envutil" + "github.com/santhosh-tekuri/jsonschema/v5" + "github.com/spf13/cobra" + "gopkg.in/yaml.v3" +) + +func getDeviceTypesDir() string { + deployPath := envutil.Getenv("UC_DEPLOY") + if deployPath == "" { + log.Fatal("UC_DEPLOY environment variable is not set") + } + return filepath.Join(deployPath, "hardware", "device-types") +} + +func getKustomizationPath() string { + deployPath := envutil.Getenv("UC_DEPLOY") + if deployPath == "" { + log.Fatal("UC_DEPLOY environment variable is not set") + } + return filepath.Join(deployPath, "hardware", "base", "kustomization.yaml") +} + +func updateKustomization(fileName string, add bool) error { + kustomizationPath := getKustomizationPath() + + // Read existing kustomization + data, err := os.ReadFile(kustomizationPath) + if err != nil { + return fmt.Errorf("failed to read kustomization.yaml: %w", err) + } + + var kustomization map[string]interface{} + if err := yaml.Unmarshal(data, &kustomization); err != nil { + return fmt.Errorf("failed to parse kustomization.yaml: %w", err) + } + + // Navigate to configMapGenerator + configMapGenerators, ok := kustomization["configMapGenerator"].([]interface{}) + if !ok { + return fmt.Errorf("configMapGenerator not found or invalid format") + } + + // Find the device-types configMap + var deviceTypesMap map[string]interface{} + for _, gen := range configMapGenerators { + genMap := gen.(map[string]interface{}) + if genMap["name"] == "device-types" { + deviceTypesMap = genMap + break + } + } + + if deviceTypesMap == nil { + return fmt.Errorf("device-types configMapGenerator not found") + } + + // Get files array + files, ok := deviceTypesMap["files"].([]interface{}) + if !ok { + files = []interface{}{} + } + + // Build the entry: filename=../device-types/filename + entry := fmt.Sprintf("%s=../device-types/%s", fileName, fileName) + + if add { + // Check if entry already exists + for _, f := range files { + if f.(string) == entry { + log.Info("Entry already exists in kustomization.yaml", "file", fileName) + return nil + } + } + // Add the entry + files = append(files, entry) + log.Info("Added to kustomization.yaml", "file", fileName) + } else { + // Remove the entry + var newFiles []interface{} + found := false + for _, f := range files { + if f.(string) != entry { + newFiles = append(newFiles, f) + } else { + found = true + } + } + if !found { + log.Warn("Entry not found in kustomization.yaml", "file", fileName) + } else { + log.Info("Removed from kustomization.yaml", "file", fileName) + } + files = newFiles + } + + // Update the files array + deviceTypesMap["files"] = files + + // Marshal back to YAML + output, err := yaml.Marshal(kustomization) + if err != nil { + return fmt.Errorf("failed to marshal kustomization.yaml: %w", err) + } + + // Write back + if err := os.WriteFile(kustomizationPath, output, 0644); err != nil { + return fmt.Errorf("failed to write kustomization.yaml: %w", err) + } + + return nil +} + +func NewCmdDeviceType() *cobra.Command { + cmd := &cobra.Command{ + Use: "device-type", + Short: "Manage device type definitions", + Long: "Create, delete, list, and show hardware device type definitions", + } + + cmd.AddCommand(newCmdAdd()) + cmd.AddCommand(newCmdValidate()) + cmd.AddCommand(newCmdDelete()) + cmd.AddCommand(newCmdList()) + cmd.AddCommand(newCmdShow()) + + return cmd +} + +func newCmdAdd() *cobra.Command { + return &cobra.Command{ + Use: "add ", + Short: "Add a device-type definition to the deployment", + Long: "Validate and add a device-type definition from a YAML file to the deployment repository", + Args: cobra.ExactArgs(1), + RunE: runAdd, + } +} + +func newCmdValidate() *cobra.Command { + return &cobra.Command{ + Use: "validate ", + Short: "Validate a device-type definition", + Long: "Validate a device-type definition against the JSON schema without adding it to the deployment", + Args: cobra.ExactArgs(1), + RunE: runValidate, + } +} + +func newCmdDelete() *cobra.Command { + return &cobra.Command{ + Use: "delete ", + Short: "Delete a device-type definition", + Long: "Delete a device-type definition by name", + Args: cobra.ExactArgs(1), + RunE: runDelete, + } +} + +func newCmdList() *cobra.Command { + return &cobra.Command{ + Use: "list", + Short: "List all device-type definitions", + Long: "List all device-type definitions in the hardware/device-types directory", + Args: cobra.NoArgs, + RunE: runList, + } +} + +func newCmdShow() *cobra.Command { + return &cobra.Command{ + Use: "show ", + Short: "Show details of a device-type", + Long: "Show detailed information about a specific device-type definition", + Args: cobra.ExactArgs(1), + RunE: runShow, + } +} + +func parseDeviceType(data []byte) (*DeviceType, error) { + var deviceType DeviceType + if err := yaml.Unmarshal(data, &deviceType); err != nil { + return nil, fmt.Errorf("failed to parse YAML: %w", err) + } + return &deviceType, nil +} + +func validateDeviceType(data []byte) (*DeviceType, error) { + // Parse YAML into struct + deviceType, err := parseDeviceType(data) + if err != nil { + return nil, err + } + + // Get the schema file path + deployPath := envutil.Getenv("UC_DEPLOY") + var schemaPath string + + // Try to find schema - check both UC_DEPLOY and current working directory context + possiblePaths := []string{ + filepath.Join(deployPath, "..", "..", "schema", "device-type.schema.json"), + "../../schema/device-type.schema.json", + "../../../schema/device-type.schema.json", + } + + for _, path := range possiblePaths { + if _, err := os.Stat(path); err == nil { + schemaPath = path + break + } + } + + if schemaPath == "" { + return nil, fmt.Errorf("could not find device-type.schema.json in expected locations") + } + + // Load and compile schema + compiler := jsonschema.NewCompiler() + if err := compiler.AddResource("schema.json", strings.NewReader(readSchemaFile(schemaPath))); err != nil { + return nil, fmt.Errorf("failed to add schema resource: %w", err) + } + + schema, err := compiler.Compile("schema.json") + if err != nil { + return nil, fmt.Errorf("failed to compile schema: %w", err) + } + + // Convert to JSON for validation (jsonschema library works with JSON) + jsonData, err := json.Marshal(deviceType) + if err != nil { + return nil, fmt.Errorf("failed to convert to JSON: %w", err) + } + + var jsonDoc interface{} + if err := json.Unmarshal(jsonData, &jsonDoc); err != nil { + return nil, fmt.Errorf("failed to parse JSON: %w", err) + } + + // Validate against schema + if err := schema.Validate(jsonDoc); err != nil { + return nil, fmt.Errorf("validation failed: %w", err) + } + + return deviceType, nil +} + +func readSchemaFile(path string) string { + data, err := os.ReadFile(path) + if err != nil { + log.Fatalf("Failed to read schema file: %v", err) + } + return string(data) +} + +func generateFileName(manufacturer, model string) string { + manufacturerClean := strings.ToLower(strings.ReplaceAll(manufacturer, " ", "-")) + modelClean := strings.ToLower(strings.ReplaceAll(model, " ", "-")) + return fmt.Sprintf("%s-%s.yaml", manufacturerClean, modelClean) +} + +func runAdd(cmd *cobra.Command, args []string) error { + sourceFile := args[0] + + // Read the file + data, err := os.ReadFile(sourceFile) + if err != nil { + return fmt.Errorf("failed to read file: %w", err) + } + + // Validate against JSON schema + deviceType, err := validateDeviceType(data) + if err != nil { + return err + } + + // Validate manufacturer and model are non-empty + if deviceType.Manufacturer == "" || deviceType.Model == "" { + return fmt.Errorf("manufacturer and model must be non-empty strings") + } + + // Generate filename + fileName := generateFileName(deviceType.Manufacturer, deviceType.Model) + destPath := filepath.Join(getDeviceTypesDir(), fileName) + + // Check if file already exists + if _, err := os.Stat(destPath); err == nil { + return fmt.Errorf("device-type already exists at %s", destPath) + } + + // Ensure the directory exists + if err := os.MkdirAll(filepath.Dir(destPath), 0755); err != nil { + return fmt.Errorf("failed to create directory: %w", err) + } + + // Copy the file to the destination + if err := os.WriteFile(destPath, data, 0644); err != nil { + return fmt.Errorf("failed to write device-type file: %w", err) + } + + log.Info("Device type added successfully", "path", destPath) + + // Update kustomization.yaml + if err := updateKustomization(fileName, true); err != nil { + return fmt.Errorf("failed to update kustomization.yaml: %w", err) + } + + return nil +} + +func runValidate(cmd *cobra.Command, args []string) error { + sourceFile := args[0] + + // Read the file + data, err := os.ReadFile(sourceFile) + if err != nil { + return fmt.Errorf("failed to read file: %w", err) + } + + // Validate against JSON schema + deviceType, err := validateDeviceType(data) + if err != nil { + return err + } + + log.Info("Device type definition is valid", + "class", deviceType.Class, + "manufacturer", deviceType.Manufacturer, + "model", deviceType.Model) + + return nil +} + +func runDelete(cmd *cobra.Command, args []string) error { + name := args[0] + fileName := fmt.Sprintf("%s.yaml", name) + filePath := filepath.Join(getDeviceTypesDir(), fileName) + + if _, err := os.Stat(filePath); os.IsNotExist(err) { + return fmt.Errorf("device-type %s not found", name) + } + + if err := os.Remove(filePath); err != nil { + return fmt.Errorf("failed to delete device-type: %w", err) + } + + log.Info("Device type deleted", "name", name) + + // Update kustomization.yaml + if err := updateKustomization(fileName, false); err != nil { + return fmt.Errorf("failed to update kustomization.yaml: %w", err) + } + + return nil +} + +func runList(cmd *cobra.Command, args []string) error { + deviceTypesDir := getDeviceTypesDir() + entries, err := os.ReadDir(deviceTypesDir) + if err != nil { + if os.IsNotExist(err) { + log.Info("No device types found - directory does not exist") + return nil + } + return fmt.Errorf("failed to read device-types directory: %w", err) + } + + if len(entries) == 0 { + log.Info("No device types found") + return nil + } + + fmt.Println("Device Types:") + for _, entry := range entries { + if !entry.IsDir() && filepath.Ext(entry.Name()) == ".yaml" { + name := entry.Name()[:len(entry.Name())-5] // Remove .yaml extension + fmt.Printf(" - %s\n", name) + } + } + + return nil +} + +func runShow(cmd *cobra.Command, args []string) error { + name := args[0] + fileName := fmt.Sprintf("%s.yaml", name) + filePath := filepath.Join(getDeviceTypesDir(), fileName) + + data, err := os.ReadFile(filePath) + if err != nil { + if os.IsNotExist(err) { + return fmt.Errorf("device-type %s not found", name) + } + return fmt.Errorf("failed to read device-type file: %w", err) + } + + deviceType, err := parseDeviceType(data) + if err != nil { + return fmt.Errorf("failed to parse device-type: %w", err) + } + + // Display device type information in a formatted way + fmt.Printf("Device Type: %s\n", name) + fmt.Printf("═══════════════════════════════════════════\n\n") + + // Basic information + fmt.Printf("Class: %s\n", deviceType.Class) + fmt.Printf("Manufacturer: %s\n", deviceType.Manufacturer) + fmt.Printf("Model: %s\n", deviceType.Model) + fmt.Printf("Height (in u): %.0f\n", deviceType.UHeight) + fmt.Printf("Full Depth: %t\n\n", deviceType.IsFullDepth) + + // Interfaces + if len(deviceType.Interfaces) > 0 { + fmt.Printf("Interfaces:\n") + for i, iface := range deviceType.Interfaces { + fmt.Printf(" %d. %s (%s)", i+1, iface.Name, iface.Type) + if iface.MgmtOnly { + fmt.Printf(" [Management Only]") + } + fmt.Println() + } + fmt.Println() + } + + // Resource Classes + if len(deviceType.ResourceClass) > 0 { + fmt.Printf("Resource Classes:\n") + for i, rc := range deviceType.ResourceClass { + fmt.Printf("\n %d. %s\n", i+1, rc.Name) + fmt.Printf(" ───────────────────────────────────\n") + fmt.Printf(" CPU: %d cores (%s)\n", rc.CPU.Cores, rc.CPU.Model) + fmt.Printf(" Memory: %d GB\n", rc.Memory.Size) + fmt.Printf(" NICs: %d\n", rc.NICCount) + + if len(rc.Drives) > 0 { + fmt.Printf(" Drives: ") + for j, drive := range rc.Drives { + if j > 0 { + fmt.Printf(", ") + } + fmt.Printf("%d GB", drive.Size) + } + fmt.Println() + } + } + fmt.Println() + } + + return nil +} diff --git a/go/understackctl/cmd/deviceType/types.go b/go/understackctl/cmd/deviceType/types.go new file mode 100644 index 000000000..d968af908 --- /dev/null +++ b/go/understackctl/cmd/deviceType/types.go @@ -0,0 +1,45 @@ +package deviceType + +// DeviceType represents a hardware device type definition +type DeviceType struct { + Class string `yaml:"class" json:"class"` + Manufacturer string `yaml:"manufacturer" json:"manufacturer"` + Model string `yaml:"model" json:"model"` + UHeight float64 `yaml:"u_height" json:"u_height"` + IsFullDepth bool `yaml:"is_full_depth" json:"is_full_depth"` + Interfaces []Interface `yaml:"interfaces,omitempty" json:"interfaces,omitempty"` + ResourceClass []ResourceClass `yaml:"resource_class,omitempty" json:"resource_class,omitempty"` +} + +// Interface represents a network interface +type Interface struct { + Name string `yaml:"name" json:"name"` + Type string `yaml:"type" json:"type"` + MgmtOnly bool `yaml:"mgmt_only,omitempty" json:"mgmt_only,omitempty"` + DetectOnly bool `yaml:"detect_only,omitempty" json:"detect_only,omitempty"` +} + +// ResourceClass represents a hardware configuration profile +type ResourceClass struct { + Name string `yaml:"name" json:"name"` + CPU CPU `yaml:"cpu" json:"cpu"` + Memory Memory `yaml:"memory" json:"memory"` + Drives []Drive `yaml:"drives" json:"drives"` + NICCount int `yaml:"nic_count" json:"nic_count"` +} + +// CPU represents CPU specifications +type CPU struct { + Cores int `yaml:"cores" json:"cores"` + Model string `yaml:"model" json:"model"` +} + +// Memory represents memory specifications +type Memory struct { + Size int `yaml:"size" json:"size"` +} + +// Drive represents a storage drive +type Drive struct { + Size int `yaml:"size" json:"size"` +} diff --git a/go/understackctl/cmd/flavor/flavor.go b/go/understackctl/cmd/flavor/flavor.go new file mode 100644 index 000000000..d3f7af9d5 --- /dev/null +++ b/go/understackctl/cmd/flavor/flavor.go @@ -0,0 +1,427 @@ +package flavor + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + "strings" + + "github.com/charmbracelet/log" + "github.com/gookit/goutil/envutil" + "github.com/santhosh-tekuri/jsonschema/v5" + "github.com/spf13/cobra" + "gopkg.in/yaml.v3" +) + +func getFlavorsDir() string { + deployPath := envutil.Getenv("UC_DEPLOY") + if deployPath == "" { + log.Fatal("UC_DEPLOY environment variable is not set") + } + return filepath.Join(deployPath, "hardware", "flavors") +} + +func getKustomizationPath() string { + deployPath := envutil.Getenv("UC_DEPLOY") + if deployPath == "" { + log.Fatal("UC_DEPLOY environment variable is not set") + } + return filepath.Join(deployPath, "hardware", "base", "kustomization.yaml") +} + +func updateKustomization(fileName string, add bool) error { + kustomizationPath := getKustomizationPath() + + // Read existing kustomization + data, err := os.ReadFile(kustomizationPath) + if err != nil { + return fmt.Errorf("failed to read kustomization.yaml: %w", err) + } + + var kustomization map[string]interface{} + if err := yaml.Unmarshal(data, &kustomization); err != nil { + return fmt.Errorf("failed to parse kustomization.yaml: %w", err) + } + + // Navigate to configMapGenerator + configMapGenerators, ok := kustomization["configMapGenerator"].([]interface{}) + if !ok { + return fmt.Errorf("configMapGenerator not found or invalid format") + } + + // Find the flavors configMap + var flavorsMap map[string]interface{} + for _, gen := range configMapGenerators { + genMap := gen.(map[string]interface{}) + if genMap["name"] == "flavors" { + flavorsMap = genMap + break + } + } + + if flavorsMap == nil { + return fmt.Errorf("flavors configMapGenerator not found") + } + + // Get files array + files, ok := flavorsMap["files"].([]interface{}) + if !ok { + files = []interface{}{} + } + + // Build the entry: filename=../flavors/filename + entry := fmt.Sprintf("%s=../flavors/%s", fileName, fileName) + + if add { + // Check if entry already exists + for _, f := range files { + if f.(string) == entry { + log.Info("Entry already exists in kustomization.yaml", "file", fileName) + return nil + } + } + // Add the entry + files = append(files, entry) + log.Info("Added to kustomization.yaml", "file", fileName) + } else { + // Remove the entry + var newFiles []interface{} + found := false + for _, f := range files { + if f.(string) != entry { + newFiles = append(newFiles, f) + } else { + found = true + } + } + if !found { + log.Warn("Entry not found in kustomization.yaml", "file", fileName) + } else { + log.Info("Removed from kustomization.yaml", "file", fileName) + } + files = newFiles + } + + // Update the files array + flavorsMap["files"] = files + + // Marshal back to YAML + output, err := yaml.Marshal(kustomization) + if err != nil { + return fmt.Errorf("failed to marshal kustomization.yaml: %w", err) + } + + // Write back + if err := os.WriteFile(kustomizationPath, output, 0644); err != nil { + return fmt.Errorf("failed to write kustomization.yaml: %w", err) + } + + return nil +} + +func NewCmdFlavor() *cobra.Command { + cmd := &cobra.Command{ + Use: "flavor", + Short: "Manage hardware flavor definitions", + Long: "Create, delete, list, and show hardware flavor definitions for node matching", + } + + cmd.AddCommand(newCmdAdd()) + cmd.AddCommand(newCmdValidate()) + cmd.AddCommand(newCmdDelete()) + cmd.AddCommand(newCmdList()) + cmd.AddCommand(newCmdShow()) + + return cmd +} + +func newCmdAdd() *cobra.Command { + return &cobra.Command{ + Use: "add ", + Short: "Add a flavor definition to the deployment", + Long: "Validate and add a flavor definition from a YAML file to the deployment repository", + Args: cobra.ExactArgs(1), + RunE: runAdd, + } +} + +func newCmdValidate() *cobra.Command { + return &cobra.Command{ + Use: "validate ", + Short: "Validate a flavor definition", + Long: "Validate a flavor definition against the JSON schema without adding it to the deployment", + Args: cobra.ExactArgs(1), + RunE: runValidate, + } +} + +func newCmdDelete() *cobra.Command { + return &cobra.Command{ + Use: "delete ", + Short: "Delete a flavor definition", + Long: "Delete a flavor definition by name", + Args: cobra.ExactArgs(1), + RunE: runDelete, + } +} + +func newCmdList() *cobra.Command { + return &cobra.Command{ + Use: "list", + Short: "List all flavor definitions", + Long: "List all flavor definitions in the hardware/flavors directory", + Args: cobra.NoArgs, + RunE: runList, + } +} + +func newCmdShow() *cobra.Command { + return &cobra.Command{ + Use: "show ", + Short: "Show details of a flavor", + Long: "Show detailed information about a specific flavor definition", + Args: cobra.ExactArgs(1), + RunE: runShow, + } +} + +func parseFlavor(data []byte) (*Flavor, error) { + var flavor Flavor + if err := yaml.Unmarshal(data, &flavor); err != nil { + return nil, fmt.Errorf("failed to parse YAML: %w", err) + } + return &flavor, nil +} + +func validateFlavor(data []byte) (*Flavor, error) { + // Parse YAML into struct + flavor, err := parseFlavor(data) + if err != nil { + return nil, err + } + + // Get the schema file path + deployPath := envutil.Getenv("UC_DEPLOY") + var schemaPath string + + // Try to find schema - check both UC_DEPLOY and current working directory context + possiblePaths := []string{ + filepath.Join(deployPath, "..", "..", "schema", "flavor.schema.json"), + "../../schema/flavor.schema.json", + "../../../schema/flavor.schema.json", + } + + for _, path := range possiblePaths { + if _, err := os.Stat(path); err == nil { + schemaPath = path + break + } + } + + if schemaPath == "" { + return nil, fmt.Errorf("could not find flavor.schema.json in expected locations") + } + + // Load and compile schema + compiler := jsonschema.NewCompiler() + if err := compiler.AddResource("schema.json", strings.NewReader(readSchemaFile(schemaPath))); err != nil { + return nil, fmt.Errorf("failed to add schema resource: %w", err) + } + + schema, err := compiler.Compile("schema.json") + if err != nil { + return nil, fmt.Errorf("failed to compile schema: %w", err) + } + + // Convert to JSON for validation (jsonschema library works with JSON) + jsonData, err := json.Marshal(flavor) + if err != nil { + return nil, fmt.Errorf("failed to convert to JSON: %w", err) + } + + var jsonDoc interface{} + if err := json.Unmarshal(jsonData, &jsonDoc); err != nil { + return nil, fmt.Errorf("failed to parse JSON: %w", err) + } + + // Validate against schema + if err := schema.Validate(jsonDoc); err != nil { + return nil, fmt.Errorf("validation failed: %w", err) + } + + return flavor, nil +} + +func readSchemaFile(path string) string { + data, err := os.ReadFile(path) + if err != nil { + log.Fatalf("Failed to read schema file: %v", err) + } + return string(data) +} + +func generateFileName(name string) string { + return fmt.Sprintf("%s.yaml", name) +} + +func runAdd(cmd *cobra.Command, args []string) error { + sourceFile := args[0] + + // Read the file + data, err := os.ReadFile(sourceFile) + if err != nil { + return fmt.Errorf("failed to read file: %w", err) + } + + // Validate against JSON schema + flavor, err := validateFlavor(data) + if err != nil { + return err + } + + // Validate name is non-empty + if flavor.Name == "" { + return fmt.Errorf("name must be non-empty string") + } + + // Generate filename + fileName := generateFileName(flavor.Name) + destPath := filepath.Join(getFlavorsDir(), fileName) + + // Check if file already exists + if _, err := os.Stat(destPath); err == nil { + return fmt.Errorf("flavor already exists at %s", destPath) + } + + // Ensure the directory exists + if err := os.MkdirAll(filepath.Dir(destPath), 0755); err != nil { + return fmt.Errorf("failed to create directory: %w", err) + } + + // Copy the file to the destination + if err := os.WriteFile(destPath, data, 0644); err != nil { + return fmt.Errorf("failed to write flavor file: %w", err) + } + + log.Info("Flavor added successfully", "path", destPath) + + // Update kustomization.yaml + if err := updateKustomization(fileName, true); err != nil { + return fmt.Errorf("failed to update kustomization.yaml: %w", err) + } + + return nil +} + +func runValidate(cmd *cobra.Command, args []string) error { + sourceFile := args[0] + + // Read the file + data, err := os.ReadFile(sourceFile) + if err != nil { + return fmt.Errorf("failed to read file: %w", err) + } + + // Validate against JSON schema + flavor, err := validateFlavor(data) + if err != nil { + return err + } + + log.Info("Flavor definition is valid", + "name", flavor.Name, + "resource_class", flavor.ResourceClass) + + return nil +} + +func runDelete(cmd *cobra.Command, args []string) error { + name := args[0] + fileName := generateFileName(name) + filePath := filepath.Join(getFlavorsDir(), fileName) + + if _, err := os.Stat(filePath); os.IsNotExist(err) { + return fmt.Errorf("flavor %s not found", name) + } + + if err := os.Remove(filePath); err != nil { + return fmt.Errorf("failed to delete flavor: %w", err) + } + + log.Info("Flavor deleted", "name", name) + + // Update kustomization.yaml + if err := updateKustomization(fileName, false); err != nil { + return fmt.Errorf("failed to update kustomization.yaml: %w", err) + } + + return nil +} + +func runList(cmd *cobra.Command, args []string) error { + flavorsDir := getFlavorsDir() + entries, err := os.ReadDir(flavorsDir) + if err != nil { + if os.IsNotExist(err) { + log.Info("No flavors found - directory does not exist") + return nil + } + return fmt.Errorf("failed to read flavors directory: %w", err) + } + + if len(entries) == 0 { + log.Info("No flavors found") + return nil + } + + fmt.Println("Flavors:") + for _, entry := range entries { + if !entry.IsDir() && filepath.Ext(entry.Name()) == ".yaml" { + name := entry.Name()[:len(entry.Name())-5] // Remove .yaml extension + fmt.Printf(" - %s\n", name) + } + } + + return nil +} + +func runShow(cmd *cobra.Command, args []string) error { + name := args[0] + fileName := generateFileName(name) + filePath := filepath.Join(getFlavorsDir(), fileName) + + data, err := os.ReadFile(filePath) + if err != nil { + if os.IsNotExist(err) { + return fmt.Errorf("flavor %s not found", name) + } + return fmt.Errorf("failed to read flavor file: %w", err) + } + + flavor, err := parseFlavor(data) + if err != nil { + return fmt.Errorf("failed to parse flavor: %w", err) + } + + // Display flavor information in a formatted way + fmt.Printf("Flavor: %s\n", name) + fmt.Printf("═══════════════════════════════════════════\n\n") + + // Basic information + fmt.Printf("Resource Class: %s\n", flavor.ResourceClass) + fmt.Printf(" (Nova properties derived from device-type resource class)\n\n") + + // Traits + if len(flavor.Traits) > 0 { + fmt.Printf("Trait Requirements:\n") + for i, trait := range flavor.Traits { + fmt.Printf(" %d. %s [%s]\n", i+1, trait.Trait, trait.Requirement) + } + fmt.Println() + } else { + fmt.Printf("Trait Requirements: None (matches all nodes in resource class)\n\n") + } + + return nil +} diff --git a/go/understackctl/cmd/flavor/types.go b/go/understackctl/cmd/flavor/types.go new file mode 100644 index 000000000..98ca363f3 --- /dev/null +++ b/go/understackctl/cmd/flavor/types.go @@ -0,0 +1,14 @@ +package flavor + +// Flavor represents a hardware flavor definition +type Flavor struct { + Name string `yaml:"name" json:"name"` + ResourceClass string `yaml:"resource_class" json:"resource_class"` + Traits []Trait `yaml:"traits,omitempty" json:"traits,omitempty"` +} + +// Trait represents a hardware trait requirement +type Trait struct { + Trait string `yaml:"trait" json:"trait"` + Requirement string `yaml:"requirement" json:"requirement"` +} diff --git a/go/understackctl/cmd/root/root.go b/go/understackctl/cmd/root/root.go index 797878554..6163ce821 100644 --- a/go/understackctl/cmd/root/root.go +++ b/go/understackctl/cmd/root/root.go @@ -6,7 +6,9 @@ import ( "github.com/rackerlabs/understack/go/understackctl/cmd/argocd" "github.com/rackerlabs/understack/go/understackctl/cmd/certManager" "github.com/rackerlabs/understack/go/understackctl/cmd/deploy" + "github.com/rackerlabs/understack/go/understackctl/cmd/deviceType" "github.com/rackerlabs/understack/go/understackctl/cmd/dex" + "github.com/rackerlabs/understack/go/understackctl/cmd/flavor" "github.com/rackerlabs/understack/go/understackctl/cmd/helmConfig" "github.com/rackerlabs/understack/go/understackctl/cmd/node" "github.com/rackerlabs/understack/go/understackctl/cmd/openstack" @@ -29,7 +31,9 @@ func init() { rootCmd.AddCommand(deploy.NewCmdDeploy()) rootCmd.AddCommand(argocd.NewCmdArgocdSecret()) rootCmd.AddCommand(certManager.NewCmdCertManagerSecret()) + rootCmd.AddCommand(deviceType.NewCmdDeviceType()) rootCmd.AddCommand(dex.NewCmdDexSecrets()) + rootCmd.AddCommand(flavor.NewCmdFlavor()) rootCmd.AddCommand(helmConfig.NewCmdHelmConfig()) rootCmd.AddCommand(node.NewCmdNode()) rootCmd.AddCommand(openstack.NewCmdOpenstackSecrets()) diff --git a/go/understackctl/go.mod b/go/understackctl/go.mod index 0a434ea01..6772e686a 100644 --- a/go/understackctl/go.mod +++ b/go/understackctl/go.mod @@ -58,6 +58,7 @@ require ( github.com/pkg/errors v0.9.1 // indirect github.com/rivo/uniseg v0.4.7 // indirect github.com/sagikazarmark/locafero v0.7.0 // indirect + github.com/santhosh-tekuri/jsonschema/v5 v5.3.1 // indirect github.com/shopspring/decimal v1.4.0 // indirect github.com/sourcegraph/conc v0.3.0 // indirect github.com/spf13/afero v1.12.0 // indirect diff --git a/go/understackctl/go.sum b/go/understackctl/go.sum index dbe9edecb..964274933 100644 --- a/go/understackctl/go.sum +++ b/go/understackctl/go.sum @@ -125,6 +125,8 @@ github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99 github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/sagikazarmark/locafero v0.7.0 h1:5MqpDsTGNDhY8sGp0Aowyf0qKsPrhewaLSsFaodPcyo= github.com/sagikazarmark/locafero v0.7.0/go.mod h1:2za3Cg5rMaTMoG/2Ulr9AwtFaIppKXTRYnozin4aB5k= +github.com/santhosh-tekuri/jsonschema/v5 v5.3.1 h1:lZUw3E0/J3roVtGQ+SCrUrg3ON6NgVqpn3+iol9aGu4= +github.com/santhosh-tekuri/jsonschema/v5 v5.3.1/go.mod h1:uToXkOrWAZ6/Oc07xWQrPOhJotwFIyu2bBVN41fcDUY= github.com/shopspring/decimal v1.4.0 h1:bxl37RwXBklmTi0C79JfXCEBD1cqqHt0bbgBAGFp81k= github.com/shopspring/decimal v1.4.0/go.mod h1:gawqmDU56v4yIKSwfBSFip1HdCCXN8/+DMd9qYNcwME= github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo= diff --git a/mkdocs.yml b/mkdocs.yml index 89934170a..240c778a2 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -121,6 +121,10 @@ nav: - 'Design Guide': - design-guide/intro.md - design-guide/add-remove-app.md + - 'Hardware Definitions': + - design-guide/device-types.md + - design-guide/hardware-traits.md + - design-guide/flavors.md - design-guide/neutron-networking.md - design-guide/argo-workflows.md - design-guide/argo-events.md @@ -169,6 +173,9 @@ nav: - operator-guide/nautobot.md - operator-guide/troubleshooting-osh.md - operator-guide/logging.md + - 'Hardware': + - operator-guide/device-types.md + - operator-guide/flavors.md - 'Scripts and Tools': - operator-guide/scripts.md - 'User Guide': diff --git a/schema/device-type.schema.json b/schema/device-type.schema.json new file mode 100644 index 000000000..080640d11 --- /dev/null +++ b/schema/device-type.schema.json @@ -0,0 +1,138 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://rackerlabs.github.io/understack/schema/device-type.schema.json", + "title": "UnderStack Device Type", + "description": "Device type configuration schema for hardware models", + "type": "object", + "properties": { + "class": { + "description": "Device class category", + "type": "string", + "enum": ["server", "switch", "firewall"] + }, + "manufacturer": { + "description": "Manufacturer of the hardware chassis", + "type": "string" + }, + "model": { + "description": "Model of the hardware chassis", + "type": "string" + }, + "resource_class": { + "description": "Resource class configurations", + "type": "array", + "items": { + "type": "object", + "description": "Resource class configuration", + "properties": { + "name": { + "description": "Resource class name", + "type": "string" + }, + "cpu": { + "description": "CPU specifications", + "type": "object", + "properties": { + "cores": { + "description": "Total CPU cores", + "type": "number" + }, + "model": { + "description": "Processor model", + "type": "string" + } + }, + "required": ["cores", "model"] + }, + "memory": { + "description": "Memory specifications", + "type": "object", + "properties": { + "size": { + "description": "Total memory in GB", + "type": "number" + } + }, + "required": ["size"] + }, + "drives": { + "description": "Drives", + "type": "array", + "items": { + "type": "object", + "description": "Drive", + "properties": { + "size": { + "type": "number", + "description": "Capacity in GB" + } + }, + "required": ["size"] + } + }, + "nic_count": { + "description": "Number of network interface cards", + "type": "integer" + } + }, + "required": ["name", "cpu", "memory", "drives", "nic_count"] + } + }, + "u_height": { + "description": "Rack unit height", + "type": "number", + "exclusiveMinimum": 0 + }, + "is_full_depth": { + "description": "Whether the device is full depth in the rack", + "type": "boolean" + }, + "interfaces": { + "description": "Network interfaces", + "type": "array", + "items": { + "type": "object", + "description": "Network interface", + "properties": { + "name": { + "type": "string", + "description": "Interface name" + }, + "type": { + "type": "string", + "description": "Interface type" + }, + "mgmt_only": { + "type": "boolean", + "description": "Whether this interface is management-only" + } + }, + "required": ["name", "type"] + } + }, + "power-ports": { + "description": "Power ports", + "type": "array", + "items": { + "type": "object", + "description": "Power port", + "properties": { + "name": { + "type": "string", + "description": "Power port name" + }, + "type": { + "type": "string", + "description": "Power port type (valid Nautobot power port type, see https://github.com/nautobot/nautobot/blob/develop/nautobot/dcim/choices.py#L507)" + }, + "maximum_draw": { + "type": "integer", + "description": "Maximum power draw in watts" + } + }, + "required": ["name", "type"] + } + } + }, + "required": [ "class", "manufacturer", "model", "u_height", "is_full_depth" ] +} diff --git a/schema/flavor.schema.json b/schema/flavor.schema.json index 5db821d51..141a12e9d 100644 --- a/schema/flavor.schema.json +++ b/schema/flavor.schema.json @@ -1,74 +1,42 @@ { - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "https://rackerlabs.github.io/understack/schema/flavor.schema.json", - "title": "UnderStack Hardware Flavor", - "description": "Server flavor configuration schema", - "type": "object", - "properties": { - "name": { - "description": "Flavor name for specified configuration (ie gp01.s)", - "type": "string" - }, - "manufacturer": { - "description": "Manufacturer of the hardware chassis", - "type": "string" - }, - "model": { - "description": "Model of the hardware chassis", - "type": "string" - }, - "cpu_cores": { - "description": "Total CPU cores.", - "type": "number" - }, - "cpu_model": { - "description": "Processor model", - "type": "string" - }, - "memory_gb": { - "description": "Total memory in GB", - "type": "number" - }, - "memory_modules": { - "description": "Memory modules", - "type": "array", - "items": { - "type": "number", - "description": "Capacity in GB" - } - }, - "drives": { - "description": "Drives", - "type": "array", - "items": { - "type": "number", - "description": "Capacity in GB" - } - }, - "pci": { - "description": "PCI devices", - "type": "array", - "items": { - "type": "object", - "description": "PCI device", - "properties": { - "vendor_id": { - "type": "string" - }, - "device_id": { - "type": "string" - }, - "sub_vendor_id": { - "type": "string" - }, - "sub_device_id": { - "type": "string" - } - }, - "required": ["vendor_id", "device_id", "sub_vendor_id", "sub_device_id"] - - } - } + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://rackerlabs.github.io/understack/schema/flavor.schema.json", + "title": "UnderStack Hardware Flavor", + "description": "Hardware flavor configuration schema for matching Ironic nodes to resource classes with trait filtering", + "type": "object", + "properties": { + "name": { + "description": "Unique flavor name", + "type": "string", + "minLength": 1 + }, + "resource_class": { + "description": "Ironic resource class to match (must correspond to a device-type resource class)", + "type": "string", + "minLength": 1 }, - "required": [ "name", "manufacturer", "model", "cpu_cores", "cpu_model", "memory_gb", "drives" ] + "traits": { + "description": "Hardware traits for node matching (CUSTOM_ prefix added automatically)", + "type": "array", + "items": { + "type": "object", + "properties": { + "trait": { + "description": "Trait name without CUSTOM_ prefix (e.g., NICX, GPU, NVME)", + "type": "string", + "pattern": "^[A-Z][A-Z0-9_]*$" + }, + "state": { + "description": "Whether the trait must be present (required) or absent", + "type": "string", + "enum": ["required", "absent"] + } + }, + "required": ["trait", "state"], + "additionalProperties": false + } + } + }, + "required": ["name", "resource_class"], + "additionalProperties": false }