From 81b488ed5a9a0184581e8c3198a8398969d720a8 Mon Sep 17 00:00:00 2001 From: Abdul Halim Date: Mon, 10 Sep 2018 16:27:07 +0100 Subject: [PATCH 1/2] parse kubelet checkpoint file for pod devices Enabling kubelete checkpoint file parsing to get Pod device info so that these device information can be passed into CNI plugins that need specific device information to work on. Change-Id: I6630f56adc0a8307f575fc09ce9090c1ffca0337 --- checkpoint/checkpoint.go | 76 +++++++++++++++++++++++++++ examples/README.md | 32 +++++++++++ examples/net-resource-sample-pod.yaml | 21 ++++++++ examples/sriov-net.yaml | 21 ++++++++ k8sclient/k8sclient.go | 40 +++++++++++--- types/conf.go | 31 ++++++++++- types/types.go | 6 +++ 7 files changed, 218 insertions(+), 9 deletions(-) create mode 100644 checkpoint/checkpoint.go create mode 100644 examples/net-resource-sample-pod.yaml create mode 100644 examples/sriov-net.yaml diff --git a/checkpoint/checkpoint.go b/checkpoint/checkpoint.go new file mode 100644 index 000000000..2180aebdd --- /dev/null +++ b/checkpoint/checkpoint.go @@ -0,0 +1,76 @@ +package checkpoint + +import ( + "encoding/json" + "fmt" + "io/ioutil" + + "github.com/intel/multus-cni/types" +) + +const ( + checkPointfile = "/var/lib/kubelet/device-plugins/kubelet_internal_checkpoint" +) + +type PodDevicesEntry struct { + PodUID string + ContainerName string + ResourceName string + DeviceIDs []string + AllocResp []byte +} + +type checkpointData struct { + PodDeviceEntries []PodDevicesEntry + RegisteredDevices map[string][]string +} + +type Data struct { + Data checkpointData + Checksum uint64 +} + +// getPodEntries gets all Pod device allocation entries from checkpoint file +func getPodEntries() ([]PodDevicesEntry, error) { + + podEntries := []PodDevicesEntry{} + + cpd := &Data{} + rawBytes, err := ioutil.ReadFile(checkPointfile) + if err != nil { + return podEntries, fmt.Errorf("getPodEntries(): error reading file %s\n%v\n", checkPointfile, err) + + } + + if err = json.Unmarshal(rawBytes, cpd); err != nil { + return podEntries, fmt.Errorf("getPodEntries(): error unmarshalling raw bytes %v", err) + } + + return cpd.Data.PodDeviceEntries, nil +} + +// GetComputeDeviceMap returns a map of resourceName to list of device IDs +func GetComputeDeviceMap(podID string) (map[string]*types.ResourceInfo, error) { + + resourceMap := make(map[string]*types.ResourceInfo) + podEntires, err := getPodEntries() + + if err != nil { + return nil, err + } + + for _, pod := range podEntires { + if pod.PodUID == podID { + entry, ok := resourceMap[pod.ResourceName] + if ok { + // already exists; append to it + entry.DeviceIDs = append(entry.DeviceIDs, pod.DeviceIDs...) + } else { + // new entry + resourceMap[pod.ResourceName] = &types.ResourceInfo{DeviceIDs: pod.DeviceIDs} + } + } + } + + return resourceMap, nil +} diff --git a/examples/README.md b/examples/README.md index ba5906e6b..fb9556fae 100644 --- a/examples/README.md +++ b/examples/README.md @@ -60,3 +60,35 @@ A sample `cni-configuration.conf` is provided, typically this file is placed in ## Other considerations Primarily in this setup one thing that one should consider are the aspects of the `macvlan-conf.yml`, which is likely specific to the configuration of the node on which this resides. + +## Passing down device information +Some CNI plugins require specific device information which maybe pre-allocated by K8s device plugin. This could be indicated by providing `k8s.v1.cni.cncf.io/resourceName` annotaton in its network attachment definition CRD. The file [`examples/sriov-net.yaml`](./sriov-net.yaml) shows an example on how to define a Network attachment definition with specific device allocation information. Multus will get allocated device information and make them available for CNI plugin to work on. + +In this exmaple (shown below), it is expected that an [SRIOV Device Plugin](https://github.com/intel/sriov-network-device-plugin/tree/dev/k8s-deviceid-model) making a pool of SRIOV VFs available to the K8s with `intel.com/sriov` as their resourceName. Any device allocated from this resource pool will be passed down by Multus to the [sriov-cni](https://github.com/intel/sriov-cni/tree/dev/k8s-deviceid-model) plugin in `deviceID` field. This is up to the sriov-cni plugin to capture this information and work with this specific device information. + +```yaml +apiVersion: "k8s.cni.cncf.io/v1" +kind: NetworkAttachmentDefinition +metadata: + name: sriov-net-a + annotations: + k8s.v1.cni.cncf.io/resourceName: intel.com/sriov +spec: + config: '{ + "type": "sriov", + "vlan": 1000, + "ipam": { + "type": "host-local", + "subnet": "10.56.217.0/24", + "rangeStart": "10.56.217.171", + "rangeEnd": "10.56.217.181", + "routes": [{ + "dst": "0.0.0.0/0" + }], + "gateway": "10.56.217.1" + } +}' +``` +The [net-resource-sample-pod.yaml](./net-resource-sample-pod.yaml) is an exmaple Pod manifest file that requesting a SRIOV device from a host which is then configured using the above network attachement definition. + +>For further information on how to configure SRIOV Device Plugin and SRIOV-CNI please refer to the links given above. \ No newline at end of file diff --git a/examples/net-resource-sample-pod.yaml b/examples/net-resource-sample-pod.yaml new file mode 100644 index 000000000..3f6eb0686 --- /dev/null +++ b/examples/net-resource-sample-pod.yaml @@ -0,0 +1,21 @@ +apiVersion: v1 +kind: Pod +metadata: + name: testpod1 + labels: + env: test + annotations: + k8s.v1.cni.cncf.io/networks: sriov-net-a +spec: + containers: + - name: appcntr1 + image: centos/tools + imagePullPolicy: IfNotPresent + command: [ "/bin/bash", "-c", "--" ] + args: [ "while true; do sleep 300000; done;" ] + resources: + requests: + intel.com/sriov: '1' + limits: + intel.com/sriov: '1' + restartPolicy: "Never" diff --git a/examples/sriov-net.yaml b/examples/sriov-net.yaml new file mode 100644 index 000000000..9b265d3e3 --- /dev/null +++ b/examples/sriov-net.yaml @@ -0,0 +1,21 @@ +apiVersion: "k8s.cni.cncf.io/v1" +kind: NetworkAttachmentDefinition +metadata: + name: sriov-net-a + annotations: + k8s.v1.cni.cncf.io/resourceName: intel.com/sriov +spec: + config: '{ + "type": "sriov", + "vlan": 1000, + "ipam": { + "type": "host-local", + "subnet": "10.56.217.0/24", + "rangeStart": "10.56.217.171", + "rangeEnd": "10.56.217.181", + "routes": [{ + "dst": "0.0.0.0/0" + }], + "gateway": "10.56.217.1" + } +}' diff --git a/k8sclient/k8sclient.go b/k8sclient/k8sclient.go index 8e1ee6433..ba9435bf7 100644 --- a/k8sclient/k8sclient.go +++ b/k8sclient/k8sclient.go @@ -31,10 +31,15 @@ import ( "github.com/containernetworking/cni/libcni" "github.com/containernetworking/cni/pkg/skel" cnitypes "github.com/containernetworking/cni/pkg/types" + "github.com/intel/multus-cni/checkpoint" "github.com/intel/multus-cni/logging" "github.com/intel/multus-cni/types" ) +const ( + resourceNameAnnot = "k8s.v1.cni.cncf.io/resourceName" +) + // NoK8sNetworkError indicates error, no network in kubernetes type NoK8sNetworkError struct { message string @@ -131,16 +136,16 @@ func setPodNetworkAnnotation(client KubeClient, namespace string, pod *v1.Pod, n return pod, nil } -func getPodNetworkAnnotation(client KubeClient, k8sArgs *types.K8sArgs) (string, string, error) { +func getPodNetworkAnnotation(client KubeClient, k8sArgs *types.K8sArgs) (string, string, string, error) { var err error logging.Debugf("getPodNetworkAnnotation: %v, %v", client, k8sArgs) pod, err := client.GetPod(string(k8sArgs.K8S_POD_NAMESPACE), string(k8sArgs.K8S_POD_NAME)) if err != nil { - return "", "", logging.Errorf("getPodNetworkAnnotation: failed to query the pod %v in out of cluster comm: %v", string(k8sArgs.K8S_POD_NAME), err) + return "", "", "", logging.Errorf("getPodNetworkAnnotation: failed to query the pod %v in out of cluster comm: %v", string(k8sArgs.K8S_POD_NAME), err) } - return pod.Annotations["k8s.v1.cni.cncf.io/networks"], pod.ObjectMeta.Namespace, nil + return pod.Annotations["k8s.v1.cni.cncf.io/networks"], pod.ObjectMeta.Namespace, string(pod.UID), nil } func parsePodNetworkObjectName(podnetwork string) (string, string, string, error) { @@ -326,7 +331,8 @@ func cniConfigFromNetworkResource(customResource *types.NetworkAttachmentDefinit return config, nil } -func getKubernetesDelegate(client KubeClient, net *types.NetworkSelectionElement, confdir string) (*types.DelegateNetConf, error) { +func getKubernetesDelegate(client KubeClient, net *types.NetworkSelectionElement, confdir string, resourceMap map[string]*types.ResourceInfo) (*types.DelegateNetConf, error) { + logging.Debugf("getKubernetesDelegate: %v, %v, %s", client, net, confdir) rawPath := fmt.Sprintf("/apis/k8s.cni.cncf.io/v1/namespaces/%s/network-attachment-definitions/%s", net.Namespace, net.Name) netData, err := client.GetRawWithPath(rawPath) @@ -339,12 +345,26 @@ func getKubernetesDelegate(client KubeClient, net *types.NetworkSelectionElement return nil, logging.Errorf("getKubernetesDelegate: failed to get the netplugin data: %v", err) } + // Get resourceName annotation from NetDefinition + deviceID := "" + resourceName, ok := customResource.Metadata.Annotations[resourceNameAnnot] + if ok { + // ResourceName annotation is found; try to get device info from resourceMap + entry, ok := resourceMap[resourceName] + if ok { + if idCount := len(entry.DeviceIDs); idCount > 0 && idCount > entry.Index { + deviceID = entry.DeviceIDs[entry.Index] + entry.Index++ // increment Index for next delegate + } + } + } + configBytes, err := cniConfigFromNetworkResource(customResource, confdir) if err != nil { return nil, err } - delegate, err := types.LoadDelegateNetConf(configBytes, net.InterfaceRequest) + delegate, err := types.LoadDelegateNetConf(configBytes, net.InterfaceRequest, deviceID) if err != nil { return nil, err } @@ -447,11 +467,17 @@ func GetK8sClient(kubeconfig string, kubeClient KubeClient) (KubeClient, error) func GetK8sNetwork(k8sclient KubeClient, k8sArgs *types.K8sArgs, confdir string) ([]*types.DelegateNetConf, error) { logging.Debugf("GetK8sNetwork: %v, %v, %v", k8sclient, k8sArgs, confdir) - netAnnot, defaultNamespace, err := getPodNetworkAnnotation(k8sclient, k8sArgs) + netAnnot, defaultNamespace, podID, err := getPodNetworkAnnotation(k8sclient, k8sArgs) if err != nil { return nil, err } + // Get Pod ComputeDevices info + resourceMap, err := checkpoint.GetComputeDeviceMap(podID) + if err != nil { + return nil, logging.Errorf("GetK8sNetwork: failed to get resourceMap for PodUID: %v %v", podID, err) + } + if len(netAnnot) == 0 { return nil, &NoK8sNetworkError{"no kubernetes network found"} } @@ -464,7 +490,7 @@ func GetK8sNetwork(k8sclient KubeClient, k8sArgs *types.K8sArgs, confdir string) // Read all network objects referenced by 'networks' var delegates []*types.DelegateNetConf for _, net := range networks { - delegate, err := getKubernetesDelegate(k8sclient, net, confdir) + delegate, err := getKubernetesDelegate(k8sclient, net, confdir, resourceMap) if err != nil { return nil, logging.Errorf("GetK8sNetwork: failed getting the delegate: %v", err) } diff --git a/types/conf.go b/types/conf.go index e95d9cee2..bfbc13bce 100644 --- a/types/conf.go +++ b/types/conf.go @@ -50,7 +50,16 @@ func LoadDelegateNetConfList(bytes []byte, delegateConf *DelegateNetConf) error } // Convert raw CNI JSON into a DelegateNetConf structure -func LoadDelegateNetConf(bytes []byte, ifnameRequest string) (*DelegateNetConf, error) { +func LoadDelegateNetConf(bytes []byte, ifnameRequest, deviceID string) (*DelegateNetConf, error) { + // If deviceID is present, inject this into delegate config + if deviceID != "" { + if updatedBytes, err := delegateAddDeviceID(bytes, deviceID); err != nil { + return nil, logging.Errorf("error in LoadDelegateNetConf - delegateAddDeviceID unable to update delegate config: %v", err) + } else { + bytes = updatedBytes + } + } + delegateConf := &DelegateNetConf{} logging.Debugf("LoadDelegateNetConf: %s, %s", string(bytes), ifnameRequest) if err := json.Unmarshal(bytes, &delegateConf.Conf); err != nil { @@ -190,7 +199,7 @@ func LoadNetConf(bytes []byte) (*NetConf, error) { if err != nil { return nil, logging.Errorf("error marshalling delegate %d config: %v", idx, err) } - delegateConf, err := LoadDelegateNetConf(bytes, "") + delegateConf, err := LoadDelegateNetConf(bytes, "", "") if err != nil { return nil, logging.Errorf("failed to load delegate %d config: %v", idx, err) } @@ -210,3 +219,21 @@ func (n *NetConf) AddDelegates(newDelegates []*DelegateNetConf) error { n.Delegates = append(n.Delegates, newDelegates...) return nil } + +// delegateAddDeviceID injects deviceID information in delegate bytes +func delegateAddDeviceID(inBytes []byte, deviceID string) ([]byte, error) { + var rawConfig map[string]interface{} + var err error + + err = json.Unmarshal(inBytes, &rawConfig) + if err != nil { + return nil, logging.Errorf("delegateAddDeviceID: failed to unmarshal inBytes: %v", err) + } + // Inject deviceID + rawConfig["deviceID"] = deviceID + configBytes, err := json.Marshal(rawConfig) + if err != nil { + return nil, logging.Errorf("delegateAddDeviceID: failed to re-marshal Spec.Config: %v", err) + } + return configBytes, nil +} diff --git a/types/types.go b/types/types.go index 20ed055ea..b0760280b 100644 --- a/types/types.go +++ b/types/types.go @@ -119,3 +119,9 @@ type K8sArgs struct { K8S_POD_NAMESPACE types.UnmarshallableString K8S_POD_INFRA_CONTAINER_ID types.UnmarshallableString } + +// ResourceInfo is struct to hold Pod device allocation information +type ResourceInfo struct { + Index int + DeviceIDs []string +} From e736da5e9615fc515eadbae69958b04c6f155d23 Mon Sep 17 00:00:00 2001 From: Abdul Halim Date: Tue, 18 Sep 2018 10:36:30 +0100 Subject: [PATCH 2/2] updated examples/README.md Change-Id: I650fec86659b3690e1dc4b15bf84b6574cb0baba --- examples/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/README.md b/examples/README.md index fb9556fae..17e0fb178 100644 --- a/examples/README.md +++ b/examples/README.md @@ -64,7 +64,7 @@ Primarily in this setup one thing that one should consider are the aspects of th ## Passing down device information Some CNI plugins require specific device information which maybe pre-allocated by K8s device plugin. This could be indicated by providing `k8s.v1.cni.cncf.io/resourceName` annotaton in its network attachment definition CRD. The file [`examples/sriov-net.yaml`](./sriov-net.yaml) shows an example on how to define a Network attachment definition with specific device allocation information. Multus will get allocated device information and make them available for CNI plugin to work on. -In this exmaple (shown below), it is expected that an [SRIOV Device Plugin](https://github.com/intel/sriov-network-device-plugin/tree/dev/k8s-deviceid-model) making a pool of SRIOV VFs available to the K8s with `intel.com/sriov` as their resourceName. Any device allocated from this resource pool will be passed down by Multus to the [sriov-cni](https://github.com/intel/sriov-cni/tree/dev/k8s-deviceid-model) plugin in `deviceID` field. This is up to the sriov-cni plugin to capture this information and work with this specific device information. +In this exmaple (shown below), it is expected that an [SRIOV Device Plugin](https://github.com/intel/sriov-network-device-plugin/) making a pool of SRIOV VFs available to the K8s with `intel.com/sriov` as their resourceName. Any device allocated from this resource pool will be passed down by Multus to the [sriov-cni](https://github.com/intel/sriov-cni/tree/dev/k8s-deviceid-model) plugin in `deviceID` field. This is up to the sriov-cni plugin to capture this information and work with this specific device information. ```yaml apiVersion: "k8s.cni.cncf.io/v1"