Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle cAdvisor partial failures #25933

Merged
merged 1 commit into from
May 22, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 2 additions & 2 deletions pkg/kubelet/cadvisor/cadvisor_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,15 +148,15 @@ func (cc *cadvisorClient) VersionInfo() (*cadvisorapi.VersionInfo, error) {

func (cc *cadvisorClient) SubcontainerInfo(name string, req *cadvisorapi.ContainerInfoRequest) (map[string]*cadvisorapi.ContainerInfo, error) {
infos, err := cc.SubcontainersInfo(name, req)
if err != nil {
if err != nil && len(infos) == 0 {
return nil, err
}

result := make(map[string]*cadvisorapi.ContainerInfo, len(infos))
for _, info := range infos {
result[info.Name] = info
}
return result, nil
return result, err
}

func (cc *cadvisorClient) MachineInfo() (*cadvisorapi.MachineInfo, error) {
Expand Down
30 changes: 18 additions & 12 deletions pkg/kubelet/server/stats/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,11 @@ import (
"path"
"time"

"github.com/emicklei/go-restful"
"github.com/golang/glog"
cadvisorapi "github.com/google/cadvisor/info/v1"
cadvisorapiv2 "github.com/google/cadvisor/info/v2"

"github.com/emicklei/go-restful"
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/kubelet/cm"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
Expand Down Expand Up @@ -133,14 +133,14 @@ func parseStatsRequest(request *restful.Request) (StatsRequest, error) {
func (h *handler) handleStats(request *restful.Request, response *restful.Response) {
query, err := parseStatsRequest(request)
if err != nil {
handleError(response, err)
handleError(response, "/stats", err)
return
}

// Root container stats.
statsMap, err := h.provider.GetRawContainerInfo("/", query.cadvisorRequest(), false)
if err != nil {
handleError(response, err)
handleError(response, fmt.Sprintf("/stats %v", query), err)
return
}
writeResponse(response, statsMap["/"])
Expand All @@ -150,7 +150,7 @@ func (h *handler) handleStats(request *restful.Request, response *restful.Respon
func (h *handler) handleSummary(request *restful.Request, response *restful.Response) {
summary, err := h.summaryProvider.Get()
if err != nil {
handleError(response, err)
handleError(response, "/stats/summary", err)
} else {
writeResponse(response, summary)
}
Expand All @@ -160,7 +160,7 @@ func (h *handler) handleSummary(request *restful.Request, response *restful.Resp
func (h *handler) handleSystemContainer(request *restful.Request, response *restful.Response) {
query, err := parseStatsRequest(request)
if err != nil {
handleError(response, err)
handleError(response, "/stats/container", err)
return
}

Expand All @@ -169,8 +169,13 @@ func (h *handler) handleSystemContainer(request *restful.Request, response *rest
stats, err := h.provider.GetRawContainerInfo(
containerName, query.cadvisorRequest(), query.Subcontainers)
if err != nil {
handleError(response, err)
return
if _, ok := stats[containerName]; ok {
// If the failure is partial, log it and return a best-effort response.
glog.Errorf("Partial failure issuing GetRawContainerInfo(%v): %v", query, err)
} else {
handleError(response, fmt.Sprintf("/stats/container %v", query), err)
return
}
}
writeResponse(response, stats)
}
Expand All @@ -181,7 +186,7 @@ func (h *handler) handleSystemContainer(request *restful.Request, response *rest
func (h *handler) handlePodContainer(request *restful.Request, response *restful.Response) {
query, err := parseStatsRequest(request)
if err != nil {
handleError(response, err)
handleError(response, request.Request.URL.String(), err)
return
}

Expand All @@ -203,7 +208,7 @@ func (h *handler) handlePodContainer(request *restful.Request, response *restful
pod, ok := h.provider.GetPodByName(params["namespace"], params["podName"])
if !ok {
glog.V(4).Infof("Container not found: %v", params)
handleError(response, kubecontainer.ErrContainerNotFound)
response.WriteError(http.StatusNotFound, kubecontainer.ErrContainerNotFound)
return
}
stats, err := h.provider.GetContainerInfo(
Expand All @@ -213,7 +218,7 @@ func (h *handler) handlePodContainer(request *restful.Request, response *restful
query.cadvisorRequest())

if err != nil {
handleError(response, err)
handleError(response, fmt.Sprintf("%s %v", request.Request.URL.String(), query), err)
return
}
writeResponse(response, stats)
Expand All @@ -226,13 +231,14 @@ func writeResponse(response *restful.Response, stats interface{}) {
}

// handleError serializes an error object into an HTTP response.
func handleError(response *restful.Response, err error) {
// request is provided for logging.
func handleError(response *restful.Response, request string, err error) {
switch err {
case kubecontainer.ErrContainerNotFound:
response.WriteError(http.StatusNotFound, err)
default:
msg := fmt.Sprintf("Internal Error: %v", err)
glog.Errorf("HTTP InternalServerError: %s", msg)
glog.Errorf("HTTP InternalServerError serving %s: %s", request, msg)
response.WriteErrorString(http.StatusInternalServerError, msg)
}
}
17 changes: 12 additions & 5 deletions pkg/kubelet/server/stats/summary.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,26 +68,33 @@ func (sp *summaryProviderImpl) Get() (*stats.Summary, error) {
}
infos, err := sp.provider.GetContainerInfoV2("/", options)
if err != nil {
return nil, err
if _, ok := infos["/"]; ok {
// If the failure is partial, log it and return a best-effort response.
glog.Errorf("Partial failure issuing GetContainerInfoV2: %v", err)
} else {
return nil, fmt.Errorf("failed GetContainerInfoV2: %v", err)
}
}

// TODO(timstclair): Consider returning a best-effort response if any of the following errors
// occur.
node, err := sp.provider.GetNode()
if err != nil {
return nil, err
return nil, fmt.Errorf("failed GetNode: %v", err)
}

nodeConfig := sp.provider.GetNodeConfig()
rootFsInfo, err := sp.provider.RootFsInfo()
if err != nil {
return nil, err
return nil, fmt.Errorf("failed RootFsInfo: %v", err)
}
imageFsInfo, err := sp.provider.DockerImagesFsInfo()
if err != nil {
return nil, err
return nil, fmt.Errorf("failed DockerImagesFsInfo: %v", err)
}
imageStats, err := sp.runtime.ImageStats()
if err != nil || imageStats == nil {
return nil, err
return nil, fmt.Errorf("failed ImageStats: %v", err)
}
sb := &summaryBuilder{sp.fsResourceAnalyzer, node, nodeConfig, rootFsInfo, imageFsInfo, *imageStats, infos}
return sb.build()
Expand Down