diff --git a/checkpoint/checkpoint.go b/checkpoint/checkpoint.go new file mode 100644 index 000000000..2180aebdd --- /dev/null +++ b/checkpoint/checkpoint.go @@ -0,0 +1,76 @@ +package checkpoint + +import ( + "encoding/json" + "fmt" + "io/ioutil" + + "github.com/intel/multus-cni/types" +) + +const ( + checkPointfile = "/var/lib/kubelet/device-plugins/kubelet_internal_checkpoint" +) + +type PodDevicesEntry struct { + PodUID string + ContainerName string + ResourceName string + DeviceIDs []string + AllocResp []byte +} + +type checkpointData struct { + PodDeviceEntries []PodDevicesEntry + RegisteredDevices map[string][]string +} + +type Data struct { + Data checkpointData + Checksum uint64 +} + +// getPodEntries gets all Pod device allocation entries from checkpoint file +func getPodEntries() ([]PodDevicesEntry, error) { + + podEntries := []PodDevicesEntry{} + + cpd := &Data{} + rawBytes, err := ioutil.ReadFile(checkPointfile) + if err != nil { + return podEntries, fmt.Errorf("getPodEntries(): error reading file %s\n%v\n", checkPointfile, err) + + } + + if err = json.Unmarshal(rawBytes, cpd); err != nil { + return podEntries, fmt.Errorf("getPodEntries(): error unmarshalling raw bytes %v", err) + } + + return cpd.Data.PodDeviceEntries, nil +} + +// GetComputeDeviceMap returns a map of resourceName to list of device IDs +func GetComputeDeviceMap(podID string) (map[string]*types.ResourceInfo, error) { + + resourceMap := make(map[string]*types.ResourceInfo) + podEntires, err := getPodEntries() + + if err != nil { + return nil, err + } + + for _, pod := range podEntires { + if pod.PodUID == podID { + entry, ok := resourceMap[pod.ResourceName] + if ok { + // already exists; append to it + entry.DeviceIDs = append(entry.DeviceIDs, pod.DeviceIDs...) + } else { + // new entry + resourceMap[pod.ResourceName] = &types.ResourceInfo{DeviceIDs: pod.DeviceIDs} + } + } + } + + return resourceMap, nil +} diff --git a/examples/README.md b/examples/README.md index ba5906e6b..17e0fb178 100644 --- a/examples/README.md +++ b/examples/README.md @@ -60,3 +60,35 @@ A sample `cni-configuration.conf` is provided, typically this file is placed in ## Other considerations Primarily in this setup one thing that one should consider are the aspects of the `macvlan-conf.yml`, which is likely specific to the configuration of the node on which this resides. + +## Passing down device information +Some CNI plugins require specific device information which maybe pre-allocated by K8s device plugin. This could be indicated by providing `k8s.v1.cni.cncf.io/resourceName` annotaton in its network attachment definition CRD. The file [`examples/sriov-net.yaml`](./sriov-net.yaml) shows an example on how to define a Network attachment definition with specific device allocation information. Multus will get allocated device information and make them available for CNI plugin to work on. + +In this exmaple (shown below), it is expected that an [SRIOV Device Plugin](https://github.com/intel/sriov-network-device-plugin/) making a pool of SRIOV VFs available to the K8s with `intel.com/sriov` as their resourceName. Any device allocated from this resource pool will be passed down by Multus to the [sriov-cni](https://github.com/intel/sriov-cni/tree/dev/k8s-deviceid-model) plugin in `deviceID` field. This is up to the sriov-cni plugin to capture this information and work with this specific device information. + +```yaml +apiVersion: "k8s.cni.cncf.io/v1" +kind: NetworkAttachmentDefinition +metadata: + name: sriov-net-a + annotations: + k8s.v1.cni.cncf.io/resourceName: intel.com/sriov +spec: + config: '{ + "type": "sriov", + "vlan": 1000, + "ipam": { + "type": "host-local", + "subnet": "10.56.217.0/24", + "rangeStart": "10.56.217.171", + "rangeEnd": "10.56.217.181", + "routes": [{ + "dst": "0.0.0.0/0" + }], + "gateway": "10.56.217.1" + } +}' +``` +The [net-resource-sample-pod.yaml](./net-resource-sample-pod.yaml) is an exmaple Pod manifest file that requesting a SRIOV device from a host which is then configured using the above network attachement definition. + +>For further information on how to configure SRIOV Device Plugin and SRIOV-CNI please refer to the links given above. \ No newline at end of file diff --git a/examples/net-resource-sample-pod.yaml b/examples/net-resource-sample-pod.yaml new file mode 100644 index 000000000..3f6eb0686 --- /dev/null +++ b/examples/net-resource-sample-pod.yaml @@ -0,0 +1,21 @@ +apiVersion: v1 +kind: Pod +metadata: + name: testpod1 + labels: + env: test + annotations: + k8s.v1.cni.cncf.io/networks: sriov-net-a +spec: + containers: + - name: appcntr1 + image: centos/tools + imagePullPolicy: IfNotPresent + command: [ "/bin/bash", "-c", "--" ] + args: [ "while true; do sleep 300000; done;" ] + resources: + requests: + intel.com/sriov: '1' + limits: + intel.com/sriov: '1' + restartPolicy: "Never" diff --git a/examples/sriov-net.yaml b/examples/sriov-net.yaml new file mode 100644 index 000000000..9b265d3e3 --- /dev/null +++ b/examples/sriov-net.yaml @@ -0,0 +1,21 @@ +apiVersion: "k8s.cni.cncf.io/v1" +kind: NetworkAttachmentDefinition +metadata: + name: sriov-net-a + annotations: + k8s.v1.cni.cncf.io/resourceName: intel.com/sriov +spec: + config: '{ + "type": "sriov", + "vlan": 1000, + "ipam": { + "type": "host-local", + "subnet": "10.56.217.0/24", + "rangeStart": "10.56.217.171", + "rangeEnd": "10.56.217.181", + "routes": [{ + "dst": "0.0.0.0/0" + }], + "gateway": "10.56.217.1" + } +}' diff --git a/k8sclient/k8sclient.go b/k8sclient/k8sclient.go index 8e1ee6433..ba9435bf7 100644 --- a/k8sclient/k8sclient.go +++ b/k8sclient/k8sclient.go @@ -31,10 +31,15 @@ import ( "github.com/containernetworking/cni/libcni" "github.com/containernetworking/cni/pkg/skel" cnitypes "github.com/containernetworking/cni/pkg/types" + "github.com/intel/multus-cni/checkpoint" "github.com/intel/multus-cni/logging" "github.com/intel/multus-cni/types" ) +const ( + resourceNameAnnot = "k8s.v1.cni.cncf.io/resourceName" +) + // NoK8sNetworkError indicates error, no network in kubernetes type NoK8sNetworkError struct { message string @@ -131,16 +136,16 @@ func setPodNetworkAnnotation(client KubeClient, namespace string, pod *v1.Pod, n return pod, nil } -func getPodNetworkAnnotation(client KubeClient, k8sArgs *types.K8sArgs) (string, string, error) { +func getPodNetworkAnnotation(client KubeClient, k8sArgs *types.K8sArgs) (string, string, string, error) { var err error logging.Debugf("getPodNetworkAnnotation: %v, %v", client, k8sArgs) pod, err := client.GetPod(string(k8sArgs.K8S_POD_NAMESPACE), string(k8sArgs.K8S_POD_NAME)) if err != nil { - return "", "", logging.Errorf("getPodNetworkAnnotation: failed to query the pod %v in out of cluster comm: %v", string(k8sArgs.K8S_POD_NAME), err) + return "", "", "", logging.Errorf("getPodNetworkAnnotation: failed to query the pod %v in out of cluster comm: %v", string(k8sArgs.K8S_POD_NAME), err) } - return pod.Annotations["k8s.v1.cni.cncf.io/networks"], pod.ObjectMeta.Namespace, nil + return pod.Annotations["k8s.v1.cni.cncf.io/networks"], pod.ObjectMeta.Namespace, string(pod.UID), nil } func parsePodNetworkObjectName(podnetwork string) (string, string, string, error) { @@ -326,7 +331,8 @@ func cniConfigFromNetworkResource(customResource *types.NetworkAttachmentDefinit return config, nil } -func getKubernetesDelegate(client KubeClient, net *types.NetworkSelectionElement, confdir string) (*types.DelegateNetConf, error) { +func getKubernetesDelegate(client KubeClient, net *types.NetworkSelectionElement, confdir string, resourceMap map[string]*types.ResourceInfo) (*types.DelegateNetConf, error) { + logging.Debugf("getKubernetesDelegate: %v, %v, %s", client, net, confdir) rawPath := fmt.Sprintf("/apis/k8s.cni.cncf.io/v1/namespaces/%s/network-attachment-definitions/%s", net.Namespace, net.Name) netData, err := client.GetRawWithPath(rawPath) @@ -339,12 +345,26 @@ func getKubernetesDelegate(client KubeClient, net *types.NetworkSelectionElement return nil, logging.Errorf("getKubernetesDelegate: failed to get the netplugin data: %v", err) } + // Get resourceName annotation from NetDefinition + deviceID := "" + resourceName, ok := customResource.Metadata.Annotations[resourceNameAnnot] + if ok { + // ResourceName annotation is found; try to get device info from resourceMap + entry, ok := resourceMap[resourceName] + if ok { + if idCount := len(entry.DeviceIDs); idCount > 0 && idCount > entry.Index { + deviceID = entry.DeviceIDs[entry.Index] + entry.Index++ // increment Index for next delegate + } + } + } + configBytes, err := cniConfigFromNetworkResource(customResource, confdir) if err != nil { return nil, err } - delegate, err := types.LoadDelegateNetConf(configBytes, net.InterfaceRequest) + delegate, err := types.LoadDelegateNetConf(configBytes, net.InterfaceRequest, deviceID) if err != nil { return nil, err } @@ -447,11 +467,17 @@ func GetK8sClient(kubeconfig string, kubeClient KubeClient) (KubeClient, error) func GetK8sNetwork(k8sclient KubeClient, k8sArgs *types.K8sArgs, confdir string) ([]*types.DelegateNetConf, error) { logging.Debugf("GetK8sNetwork: %v, %v, %v", k8sclient, k8sArgs, confdir) - netAnnot, defaultNamespace, err := getPodNetworkAnnotation(k8sclient, k8sArgs) + netAnnot, defaultNamespace, podID, err := getPodNetworkAnnotation(k8sclient, k8sArgs) if err != nil { return nil, err } + // Get Pod ComputeDevices info + resourceMap, err := checkpoint.GetComputeDeviceMap(podID) + if err != nil { + return nil, logging.Errorf("GetK8sNetwork: failed to get resourceMap for PodUID: %v %v", podID, err) + } + if len(netAnnot) == 0 { return nil, &NoK8sNetworkError{"no kubernetes network found"} } @@ -464,7 +490,7 @@ func GetK8sNetwork(k8sclient KubeClient, k8sArgs *types.K8sArgs, confdir string) // Read all network objects referenced by 'networks' var delegates []*types.DelegateNetConf for _, net := range networks { - delegate, err := getKubernetesDelegate(k8sclient, net, confdir) + delegate, err := getKubernetesDelegate(k8sclient, net, confdir, resourceMap) if err != nil { return nil, logging.Errorf("GetK8sNetwork: failed getting the delegate: %v", err) } diff --git a/types/conf.go b/types/conf.go index e95d9cee2..bfbc13bce 100644 --- a/types/conf.go +++ b/types/conf.go @@ -50,7 +50,16 @@ func LoadDelegateNetConfList(bytes []byte, delegateConf *DelegateNetConf) error } // Convert raw CNI JSON into a DelegateNetConf structure -func LoadDelegateNetConf(bytes []byte, ifnameRequest string) (*DelegateNetConf, error) { +func LoadDelegateNetConf(bytes []byte, ifnameRequest, deviceID string) (*DelegateNetConf, error) { + // If deviceID is present, inject this into delegate config + if deviceID != "" { + if updatedBytes, err := delegateAddDeviceID(bytes, deviceID); err != nil { + return nil, logging.Errorf("error in LoadDelegateNetConf - delegateAddDeviceID unable to update delegate config: %v", err) + } else { + bytes = updatedBytes + } + } + delegateConf := &DelegateNetConf{} logging.Debugf("LoadDelegateNetConf: %s, %s", string(bytes), ifnameRequest) if err := json.Unmarshal(bytes, &delegateConf.Conf); err != nil { @@ -190,7 +199,7 @@ func LoadNetConf(bytes []byte) (*NetConf, error) { if err != nil { return nil, logging.Errorf("error marshalling delegate %d config: %v", idx, err) } - delegateConf, err := LoadDelegateNetConf(bytes, "") + delegateConf, err := LoadDelegateNetConf(bytes, "", "") if err != nil { return nil, logging.Errorf("failed to load delegate %d config: %v", idx, err) } @@ -210,3 +219,21 @@ func (n *NetConf) AddDelegates(newDelegates []*DelegateNetConf) error { n.Delegates = append(n.Delegates, newDelegates...) return nil } + +// delegateAddDeviceID injects deviceID information in delegate bytes +func delegateAddDeviceID(inBytes []byte, deviceID string) ([]byte, error) { + var rawConfig map[string]interface{} + var err error + + err = json.Unmarshal(inBytes, &rawConfig) + if err != nil { + return nil, logging.Errorf("delegateAddDeviceID: failed to unmarshal inBytes: %v", err) + } + // Inject deviceID + rawConfig["deviceID"] = deviceID + configBytes, err := json.Marshal(rawConfig) + if err != nil { + return nil, logging.Errorf("delegateAddDeviceID: failed to re-marshal Spec.Config: %v", err) + } + return configBytes, nil +} diff --git a/types/types.go b/types/types.go index 20ed055ea..b0760280b 100644 --- a/types/types.go +++ b/types/types.go @@ -119,3 +119,9 @@ type K8sArgs struct { K8S_POD_NAMESPACE types.UnmarshallableString K8S_POD_INFRA_CONTAINER_ID types.UnmarshallableString } + +// ResourceInfo is struct to hold Pod device allocation information +type ResourceInfo struct { + Index int + DeviceIDs []string +}