Skip to content

Commit

Permalink
feat(backend): Upgrade argo to v3.4.16 (#10568)
Browse files Browse the repository at this point in the history
* pull argo v3.4.16 upstream

Signed-off-by: Giulio Frasca <gfrasca@redhat.com>

* upgrade to Argo v3.4.16

Signed-off-by: Giulio Frasca <gfrasca@redhat.com>

* Update ValidateWorkflow calls

Signed-off-by: Giulio Frasca <gfrasca@redhat.com>

* Add NodeStatus pod name retrieval function

- Argo 3.4.16 upgrade introduces a breaking change with inconsistent node.ID vs
  node.Name
- introduce a function in workflow.go to conditionally handle this

Signed-off-by: Giulio Frasca <gfrasca@redhat.com>

* Remove PNS Executor manifests and containerRuntimeExecutor ConfigMap Key

- PNS Executor was removed in Argo v3.4, so manifests no longer valid
- WorkflowController will fail to start if `containerRuntimeExecutor`
  provided as input parameter, so remove from WC ConfigMap and CM
  patches

Signed-off-by: Giulio Frasca <gfrasca@redhat.com>

* fix(frontend): Fix Sidebar tabs to work with argo pod name-id mismatch

- Stemming from upgrade to argo 3.4, Pod Name is no longer always the
  same as NodeID, which breaks a few tabs (PodInfo, PodEvents and
  PodLogs).  Add function to address this

Signed-off-by: Giulio Frasca <gfrasca@redhat.com>

* test: update frontend CI to accommodate pod id/name changes

Signed-off-by: Giulio Frasca <gfrasca@redhat.com>

---------

Signed-off-by: Giulio Frasca <gfrasca@redhat.com>
  • Loading branch information
gmfrasca authored Apr 16, 2024
1 parent 60a443e commit 809d576
Show file tree
Hide file tree
Showing 91 changed files with 28,284 additions and 6,963 deletions.
4 changes: 2 additions & 2 deletions .cloudbuild.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -170,10 +170,10 @@ steps:
args: ['pull', 'gcr.io/cloudsql-docker/gce-proxy:1.25.0']
id: 'pullCloudsqlProxy'
- name: 'gcr.io/cloud-builders/docker'
args: ['pull', 'gcr.io/ml-pipeline/argoexec:v3.3.10-license-compliance']
args: ['pull', 'gcr.io/ml-pipeline/argoexec:v3.4.16-license-compliance']
id: 'pullArgoExecutor'
- name: 'gcr.io/cloud-builders/docker'
args: ['pull', 'gcr.io/ml-pipeline/workflow-controller:v3.3.10-license-compliance']
args: ['pull', 'gcr.io/ml-pipeline/workflow-controller:v3.4.16-license-compliance']
id: 'pullArgoWorkflowController'

# V2 related images
Expand Down
20 changes: 10 additions & 10 deletions .release.cloudbuild.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -478,14 +478,14 @@ steps:
docker push gcr.io/ml-pipeline/google/pipelines-test/cloudsqlproxy:$(cat /workspace/mm.ver)
- name: 'gcr.io/cloud-builders/docker'
args: ['pull', 'gcr.io/ml-pipeline/argoexec:v3.3.10-license-compliance']
args: ['pull', 'gcr.io/ml-pipeline/argoexec:v3.4.16-license-compliance']
id: 'pullArgoExecutor'
- name: 'gcr.io/cloud-builders/docker'
args: ['tag', 'gcr.io/ml-pipeline/argoexec:v3.3.10-license-compliance', 'gcr.io/ml-pipeline/google/pipelines/argoexecutor:$TAG_NAME']
args: ['tag', 'gcr.io/ml-pipeline/argoexec:v3.4.16-license-compliance', 'gcr.io/ml-pipeline/google/pipelines/argoexecutor:$TAG_NAME']
id: 'tagArgoExecutorForMarketplace'
waitFor: ['pullArgoExecutor']
- name: 'gcr.io/cloud-builders/docker'
args: ['tag', 'gcr.io/ml-pipeline/argoexec:v3.3.10-license-compliance', 'gcr.io/ml-pipeline/google/pipelines-test/argoexecutor:$TAG_NAME']
args: ['tag', 'gcr.io/ml-pipeline/argoexec:v3.4.16-license-compliance', 'gcr.io/ml-pipeline/google/pipelines-test/argoexecutor:$TAG_NAME']
id: 'tagArgoExecutorForMarketplaceTest'
waitFor: ['pullArgoExecutor']
- id: 'tagArgoExecutorForMarketplaceMajorMinor'
Expand All @@ -495,20 +495,20 @@ steps:
args:
- -ceux
- |
docker tag gcr.io/ml-pipeline/argoexec:v3.3.10-license-compliance gcr.io/ml-pipeline/google/pipelines/argoexecutor:$(cat /workspace/mm.ver)
docker tag gcr.io/ml-pipeline/argoexec:v3.3.10-license-compliance gcr.io/ml-pipeline/google/pipelines-test/argoexecutor:$(cat /workspace/mm.ver)
docker tag gcr.io/ml-pipeline/argoexec:v3.4.16-license-compliance gcr.io/ml-pipeline/google/pipelines/argoexecutor:$(cat /workspace/mm.ver)
docker tag gcr.io/ml-pipeline/argoexec:v3.4.16-license-compliance gcr.io/ml-pipeline/google/pipelines-test/argoexecutor:$(cat /workspace/mm.ver)
docker push gcr.io/ml-pipeline/google/pipelines/argoexecutor:$(cat /workspace/mm.ver)
docker push gcr.io/ml-pipeline/google/pipelines-test/argoexecutor:$(cat /workspace/mm.ver)
- name: 'gcr.io/cloud-builders/docker'
args: ['pull', 'gcr.io/ml-pipeline/workflow-controller:v3.3.10-license-compliance']
args: ['pull', 'gcr.io/ml-pipeline/workflow-controller:v3.4.16-license-compliance']
id: 'pullArgoWorkflowController'
- name: 'gcr.io/cloud-builders/docker'
args: ['tag', 'gcr.io/ml-pipeline/workflow-controller:v3.3.10-license-compliance', 'gcr.io/ml-pipeline/google/pipelines/argoworkflowcontroller:$TAG_NAME']
args: ['tag', 'gcr.io/ml-pipeline/workflow-controller:v3.4.16-license-compliance', 'gcr.io/ml-pipeline/google/pipelines/argoworkflowcontroller:$TAG_NAME']
id: 'tagArgoWorkflowControllerForMarketplace'
waitFor: ['pullArgoWorkflowController']
- name: 'gcr.io/cloud-builders/docker'
args: ['tag', 'gcr.io/ml-pipeline/workflow-controller:v3.3.10-license-compliance', 'gcr.io/ml-pipeline/google/pipelines-test/argoworkflowcontroller:$TAG_NAME']
args: ['tag', 'gcr.io/ml-pipeline/workflow-controller:v3.4.16-license-compliance', 'gcr.io/ml-pipeline/google/pipelines-test/argoworkflowcontroller:$TAG_NAME']
id: 'tagArgoWorkflowControllerForMarketplaceTest'
waitFor: ['pullArgoWorkflowController']
- id: 'tagArgoWorkflowControllerForMarketplaceMajorMinor'
Expand All @@ -518,8 +518,8 @@ steps:
args:
- -ceux
- |
docker tag gcr.io/ml-pipeline/workflow-controller:v3.3.10-license-compliance gcr.io/ml-pipeline/google/pipelines/argoworkflowcontroller:$(cat /workspace/mm.ver)
docker tag gcr.io/ml-pipeline/workflow-controller:v3.3.10-license-compliance gcr.io/ml-pipeline/google/pipelines-test/argoworkflowcontroller:$(cat /workspace/mm.ver)
docker tag gcr.io/ml-pipeline/workflow-controller:v3.4.16-license-compliance gcr.io/ml-pipeline/google/pipelines/argoworkflowcontroller:$(cat /workspace/mm.ver)
docker tag gcr.io/ml-pipeline/workflow-controller:v3.4.16-license-compliance gcr.io/ml-pipeline/google/pipelines-test/argoworkflowcontroller:$(cat /workspace/mm.ver)
docker push gcr.io/ml-pipeline/google/pipelines/argoworkflowcontroller:$(cat /workspace/mm.ver)
docker push gcr.io/ml-pipeline/google/pipelines-test/argoworkflowcontroller:$(cat /workspace/mm.ver)
Expand Down
2 changes: 1 addition & 1 deletion backend/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ COPY backend/requirements.txt .
RUN python3 -m pip install -r requirements.txt --no-cache-dir

# Downloading Argo CLI so that the samples are validated
ENV ARGO_VERSION v3.3.10
ENV ARGO_VERSION v3.4.16
RUN curl -sLO https://github.com/argoproj/argo-workflows/releases/download/${ARGO_VERSION}/argo-linux-amd64.gz && \
gunzip argo-linux-amd64.gz && \
chmod +x argo-linux-amd64 && \
Expand Down
89 changes: 51 additions & 38 deletions backend/src/agent/persistence/worker/metrics_reporter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,11 @@ import (
"encoding/json"
"errors"
"fmt"
"testing"

"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
"google.golang.org/protobuf/testing/protocmp"
"testing"

workflowapi "github.com/argoproj/argo-workflows/v3/pkg/apis/workflow/v1alpha1"
api "github.com/kubeflow/pipelines/backend/api/v1beta1/go_client"
Expand All @@ -46,8 +47,9 @@ func TestReportMetrics_NoCompletedNode_NoOP(t *testing.T) {
Status: workflowapi.WorkflowStatus{
Nodes: map[string]workflowapi.NodeStatus{
"node-1": workflowapi.NodeStatus{
ID: "node-1",
Phase: workflowapi.NodeRunning,
ID: "node-1",
TemplateName: "template-1",
Phase: workflowapi.NodeRunning,
},
},
},
Expand Down Expand Up @@ -98,8 +100,9 @@ func TestReportMetrics_NoArtifact_NoOP(t *testing.T) {
Status: workflowapi.WorkflowStatus{
Nodes: map[string]workflowapi.NodeStatus{
"node-1": workflowapi.NodeStatus{
ID: "node-1",
Phase: workflowapi.NodeSucceeded,
ID: "node-1",
TemplateName: "template-1",
Phase: workflowapi.NodeSucceeded,
},
},
},
Expand All @@ -125,8 +128,9 @@ func TestReportMetrics_NoMetricsArtifact_NoOP(t *testing.T) {
Status: workflowapi.WorkflowStatus{
Nodes: map[string]workflowapi.NodeStatus{
"node-1": workflowapi.NodeStatus{
ID: "node-1",
Phase: workflowapi.NodeSucceeded,
ID: "node-1",
TemplateName: "template-1",
Phase: workflowapi.NodeSucceeded,
Outputs: &workflowapi.Outputs{
Artifacts: []workflowapi.Artifact{{Name: "mlpipeline-ui-metadata"}},
},
Expand All @@ -153,8 +157,9 @@ func TestReportMetrics_Succeed(t *testing.T) {
Status: workflowapi.WorkflowStatus{
Nodes: map[string]workflowapi.NodeStatus{
"node-1": workflowapi.NodeStatus{
ID: "node-1",
Phase: workflowapi.NodeSucceeded,
ID: "node-1",
TemplateName: "template-1",
Phase: workflowapi.NodeSucceeded,
Outputs: &workflowapi.Outputs{
Artifacts: []workflowapi.Artifact{{Name: "mlpipeline-metrics"}},
},
Expand All @@ -167,7 +172,7 @@ func TestReportMetrics_Succeed(t *testing.T) {
pipelineFake.StubArtifact(
&api.ReadArtifactRequest{
RunId: "run-1",
NodeId: "node-1",
NodeId: "MY_NAME-template-1-1",
ArtifactName: "mlpipeline-metrics",
},
&api.ReadArtifactResponse{
Expand All @@ -185,12 +190,12 @@ func TestReportMetrics_Succeed(t *testing.T) {
Metrics: []*api.RunMetric{
{
Name: "accuracy",
NodeId: "node-1",
NodeId: "MY_NAME-template-1-1",
Value: &api.RunMetric_NumberValue{NumberValue: 0.77},
},
{
Name: "logloss",
NodeId: "node-1",
NodeId: "MY_NAME-template-1-1",
Value: &api.RunMetric_NumberValue{NumberValue: 1.2},
},
},
Expand All @@ -216,8 +221,9 @@ func TestReportMetrics_EmptyArchive_Fail(t *testing.T) {
Status: workflowapi.WorkflowStatus{
Nodes: map[string]workflowapi.NodeStatus{
"node-1": workflowapi.NodeStatus{
ID: "node-1",
Phase: workflowapi.NodeSucceeded,
ID: "node-1",
TemplateName: "template-1",
Phase: workflowapi.NodeSucceeded,
Outputs: &workflowapi.Outputs{
Artifacts: []workflowapi.Artifact{{Name: "mlpipeline-metrics"}},
},
Expand All @@ -229,7 +235,7 @@ func TestReportMetrics_EmptyArchive_Fail(t *testing.T) {
pipelineFake.StubArtifact(
&api.ReadArtifactRequest{
RunId: "run-1",
NodeId: "node-1",
NodeId: "MY_NAME-template-1-1",
ArtifactName: "mlpipeline-metrics",
},
&api.ReadArtifactResponse{
Expand Down Expand Up @@ -257,8 +263,9 @@ func TestReportMetrics_MultipleFilesInArchive_Fail(t *testing.T) {
Status: workflowapi.WorkflowStatus{
Nodes: map[string]workflowapi.NodeStatus{
"node-1": workflowapi.NodeStatus{
ID: "node-1",
Phase: workflowapi.NodeSucceeded,
ID: "MY_NAME-template-1-1",
TemplateName: "template-1",
Phase: workflowapi.NodeSucceeded,
Outputs: &workflowapi.Outputs{
Artifacts: []workflowapi.Artifact{{Name: "mlpipeline-metrics"}},
},
Expand All @@ -272,7 +279,7 @@ func TestReportMetrics_MultipleFilesInArchive_Fail(t *testing.T) {
pipelineFake.StubArtifact(
&api.ReadArtifactRequest{
RunId: "run-1",
NodeId: "node-1",
NodeId: "MY_NAME-template-1-1",
ArtifactName: "mlpipeline-metrics",
},
&api.ReadArtifactResponse{
Expand Down Expand Up @@ -300,8 +307,9 @@ func TestReportMetrics_InvalidMetricsJSON_Fail(t *testing.T) {
Status: workflowapi.WorkflowStatus{
Nodes: map[string]workflowapi.NodeStatus{
"node-1": workflowapi.NodeStatus{
ID: "node-1",
Phase: workflowapi.NodeSucceeded,
ID: "node-1",
TemplateName: "template-1",
Phase: workflowapi.NodeSucceeded,
Outputs: &workflowapi.Outputs{
Artifacts: []workflowapi.Artifact{{Name: "mlpipeline-metrics"}},
},
Expand All @@ -314,7 +322,7 @@ func TestReportMetrics_InvalidMetricsJSON_Fail(t *testing.T) {
pipelineFake.StubArtifact(
&api.ReadArtifactRequest{
RunId: "run-1",
NodeId: "node-1",
NodeId: "MY_NAME-template-1-1",
ArtifactName: "mlpipeline-metrics",
},
&api.ReadArtifactResponse{
Expand Down Expand Up @@ -342,15 +350,17 @@ func TestReportMetrics_InvalidMetricsJSON_PartialFail(t *testing.T) {
Status: workflowapi.WorkflowStatus{
Nodes: map[string]workflowapi.NodeStatus{
"node-1": workflowapi.NodeStatus{
ID: "node-1",
Phase: workflowapi.NodeSucceeded,
ID: "node-1",
TemplateName: "template-1",
Phase: workflowapi.NodeSucceeded,
Outputs: &workflowapi.Outputs{
Artifacts: []workflowapi.Artifact{{Name: "mlpipeline-metrics"}},
},
},
"node-2": workflowapi.NodeStatus{
ID: "node-2",
Phase: workflowapi.NodeSucceeded,
ID: "node-2",
TemplateName: "template-2",
Phase: workflowapi.NodeSucceeded,
Outputs: &workflowapi.Outputs{
Artifacts: []workflowapi.Artifact{{Name: "mlpipeline-metrics"}},
},
Expand All @@ -366,7 +376,7 @@ func TestReportMetrics_InvalidMetricsJSON_PartialFail(t *testing.T) {
pipelineFake.StubArtifact(
&api.ReadArtifactRequest{
RunId: "run-1",
NodeId: "node-1",
NodeId: "MY_NAME-template-1-1",
ArtifactName: "mlpipeline-metrics",
},
&api.ReadArtifactResponse{
Expand All @@ -375,7 +385,7 @@ func TestReportMetrics_InvalidMetricsJSON_PartialFail(t *testing.T) {
pipelineFake.StubArtifact(
&api.ReadArtifactRequest{
RunId: "run-1",
NodeId: "node-2",
NodeId: "MY_NAME-template-2-2",
ArtifactName: "mlpipeline-metrics",
},
&api.ReadArtifactResponse{
Expand All @@ -392,12 +402,12 @@ func TestReportMetrics_InvalidMetricsJSON_PartialFail(t *testing.T) {
Metrics: []*api.RunMetric{
&api.RunMetric{
Name: "accuracy",
NodeId: "node-2",
NodeId: "MY_NAME-template-2-2",
Value: &api.RunMetric_NumberValue{NumberValue: 0.77},
},
&api.RunMetric{
Name: "logloss",
NodeId: "node-2",
NodeId: "MY_NAME-template-2-2",
Value: &api.RunMetric_NumberValue{NumberValue: 1.2},
},
},
Expand All @@ -423,8 +433,9 @@ func TestReportMetrics_CorruptedArchiveFile_Fail(t *testing.T) {
Status: workflowapi.WorkflowStatus{
Nodes: map[string]workflowapi.NodeStatus{
"node-1": workflowapi.NodeStatus{
ID: "node-1",
Phase: workflowapi.NodeSucceeded,
ID: "node-1",
TemplateName: "template-1",
Phase: workflowapi.NodeSucceeded,
Outputs: &workflowapi.Outputs{
Artifacts: []workflowapi.Artifact{{Name: "mlpipeline-metrics"}},
},
Expand All @@ -435,7 +446,7 @@ func TestReportMetrics_CorruptedArchiveFile_Fail(t *testing.T) {
pipelineFake.StubArtifact(
&api.ReadArtifactRequest{
RunId: "run-1",
NodeId: "node-1",
NodeId: "MY_NAME-template-1-1",
ArtifactName: "mlpipeline-metrics",
},
&api.ReadArtifactResponse{
Expand Down Expand Up @@ -463,8 +474,9 @@ func TestReportMetrics_MultiplMetricErrors_TransientErrowWin(t *testing.T) {
Status: workflowapi.WorkflowStatus{
Nodes: map[string]workflowapi.NodeStatus{
"node-1": workflowapi.NodeStatus{
ID: "node-1",
Phase: workflowapi.NodeSucceeded,
ID: "node-1",
TemplateName: "template-1",
Phase: workflowapi.NodeSucceeded,
Outputs: &workflowapi.Outputs{
Artifacts: []workflowapi.Artifact{{Name: "mlpipeline-metrics"}},
},
Expand All @@ -478,7 +490,7 @@ func TestReportMetrics_MultiplMetricErrors_TransientErrowWin(t *testing.T) {
pipelineFake.StubArtifact(
&api.ReadArtifactRequest{
RunId: "run-1",
NodeId: "node-1",
NodeId: "MY_NAME-template-1-1",
ArtifactName: "mlpipeline-metrics",
},
&api.ReadArtifactResponse{
Expand Down Expand Up @@ -526,8 +538,9 @@ func TestReportMetrics_Unauthorized(t *testing.T) {
Status: workflowapi.WorkflowStatus{
Nodes: map[string]workflowapi.NodeStatus{
"node-1": workflowapi.NodeStatus{
ID: "node-1",
Phase: workflowapi.NodeSucceeded,
ID: "node-1",
TemplateName: "template-1",
Phase: workflowapi.NodeSucceeded,
Outputs: &workflowapi.Outputs{
Artifacts: []workflowapi.Artifact{{Name: "mlpipeline-metrics"}},
},
Expand All @@ -540,7 +553,7 @@ func TestReportMetrics_Unauthorized(t *testing.T) {
pipelineFake.StubArtifact(
&api.ReadArtifactRequest{
RunId: "run-1",
NodeId: "node-1",
NodeId: "MY_NAME-template-1-1",
ArtifactName: "mlpipeline-metrics",
},
&api.ReadArtifactResponse{
Expand Down
4 changes: 2 additions & 2 deletions backend/src/apiserver/resource/resource_manager_util_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ status:

newWfString, err := yaml.Marshal(newWf)
assert.Nil(t, err)
assert.Equal(t, []string{"resubmit-hl9ft-3879090716"}, nodes)
assert.Equal(t, []string{"resubmit-hl9ft-random-fail-3879090716"}, nodes)

expectedNewWfString := `apiVersion: argoproj.io/v1alpha1
kind: Workflow
Expand Down Expand Up @@ -202,7 +202,7 @@ status:
startedAt: "2021-05-26T09:14:07Z"
templateName: rand-fail-dag
type: DAG
resubmit-hl9ft-3929423573:
resubmit-hl9ft-random-fail-3929423573:
boundaryID: resubmit-hl9ft
children:
- resubmit-hl9ft-3879090716
Expand Down
4 changes: 2 additions & 2 deletions backend/src/apiserver/server/api_converter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3112,7 +3112,7 @@ func Test_toModelTasks_v2(t *testing.T) {
func Test_toModelTasks_wf(t *testing.T) {
expectedWf := []*model.Task{
{
PodName: "boudary_exec_id-node0",
PodName: "run1-file-passing-pipelines-node0",
Namespace: "kubeflow",
RunId: "run1_uid_true",
CreatedTimestamp: -62135596800,
Expand All @@ -3123,7 +3123,7 @@ func Test_toModelTasks_wf(t *testing.T) {
ChildrenPods: []string{"boudary_exec_id-node1"},
},
{
PodName: "boudary_exec_id-node1",
PodName: "run1-print-text-node1",
Namespace: "kubeflow",
RunId: "run1_uid_true",
CreatedTimestamp: -62135596800,
Expand Down
2 changes: 1 addition & 1 deletion backend/src/apiserver/template/argo_template.go
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ func ValidateWorkflow(template []byte) (*util.Workflow, error) {
if wf.Kind != argoK8sResource {
return nil, util.NewInvalidInputError("Unexpected resource type. Expected: %v. Received: %v", argoK8sResource, wf.Kind)
}
_, err = validate.ValidateWorkflow(nil, nil, &wf, validate.ValidateOpts{
err = validate.ValidateWorkflow(nil, nil, &wf, validate.ValidateOpts{
Lint: true,
IgnoreEntrypoint: true,
WorkflowTemplateValidation: false, // not used by kubeflow
Expand Down
2 changes: 1 addition & 1 deletion backend/src/common/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ package common
type ExecutionPhase string

// borrow from Workflow.Status.Phase:
// https://pkg.go.dev/github.com/argoproj/argo-workflows/v3@v3.3.10/pkg/apis/workflow/v1alpha1#WorkflowPhase
// https://pkg.go.dev/github.com/argoproj/argo-workflows/v3@v3.4.16/pkg/apis/workflow/v1alpha1#WorkflowPhase
const (
ExecutionUnknown ExecutionPhase = ""
ExecutionPending ExecutionPhase = "Pending" // pending some set-up - rarely used
Expand Down
Loading

0 comments on commit 809d576

Please sign in to comment.