From eeb410d21d685e97ca0577ef63dafd444f92f9d7 Mon Sep 17 00:00:00 2001 From: Avital Pinnick Date: Sun, 21 Feb 2021 11:24:18 +0200 Subject: [PATCH] Velero debug tool --- .../migrating_3_4/troubleshooting-3-4.adoc | 2 + .../troubleshooting-4-1-4.adoc | 2 + .../troubleshooting-4-2-4.adoc | 2 + .../migration-debugging-velero-resources.adoc | 59 ++++++++++++++ modules/migration-partial-failure-velero.adoc | 80 +++++++++++++++++++ .../migration-rolling-back-migration-cli.adoc | 23 ++---- 6 files changed, 152 insertions(+), 16 deletions(-) create mode 100644 modules/migration-debugging-velero-resources.adoc create mode 100644 modules/migration-partial-failure-velero.adoc diff --git a/migration/migrating_3_4/troubleshooting-3-4.adoc b/migration/migrating_3_4/troubleshooting-3-4.adoc index dd7831c7f19f..9ed277311a8c 100644 --- a/migration/migrating_3_4/troubleshooting-3-4.adoc +++ b/migration/migrating_3_4/troubleshooting-3-4.adoc @@ -21,6 +21,8 @@ include::modules/migration-downloading-logs.adoc[leveloffset=+1] include::modules/migration-updating-deprecated-gvks.adoc[leveloffset=+1] include::modules/migration-error-messages.adoc[leveloffset=+1] include::modules/migration-dvm-error-node-selectors.adoc[leveloffset=+1] +include::modules/migration-debugging-velero-resources.adoc[leveloffset=+1] +include::modules/migration-partial-failure-velero.adoc[leveloffset=+2] include::modules/migration-using-must-gather.adoc[leveloffset=+1] [id="rolling-back-migration_{context}"] diff --git a/migration/migrating_4_1_4/troubleshooting-4-1-4.adoc b/migration/migrating_4_1_4/troubleshooting-4-1-4.adoc index c27f7efbe26c..e67638905f94 100644 --- a/migration/migrating_4_1_4/troubleshooting-4-1-4.adoc +++ b/migration/migrating_4_1_4/troubleshooting-4-1-4.adoc @@ -20,6 +20,8 @@ include::modules/migration-using-mig-log-reader.adoc[leveloffset=+1] include::modules/migration-downloading-logs.adoc[leveloffset=+1] include::modules/migration-error-messages.adoc[leveloffset=+1] include::modules/migration-dvm-error-node-selectors.adoc[leveloffset=+1] +include::modules/migration-debugging-velero-resources.adoc[leveloffset=+1] +include::modules/migration-partial-failure-velero.adoc[leveloffset=+2] include::modules/migration-using-must-gather.adoc[leveloffset=+1] [id="rolling-back-migration_{context}"] diff --git a/migration/migrating_4_2_4/troubleshooting-4-2-4.adoc b/migration/migrating_4_2_4/troubleshooting-4-2-4.adoc index d9842562f7a0..55a506bf94b0 100644 --- a/migration/migrating_4_2_4/troubleshooting-4-2-4.adoc +++ b/migration/migrating_4_2_4/troubleshooting-4-2-4.adoc @@ -20,6 +20,8 @@ include::modules/migration-using-mig-log-reader.adoc[leveloffset=+1] include::modules/migration-downloading-logs.adoc[leveloffset=+1] include::modules/migration-error-messages.adoc[leveloffset=+1] include::modules/migration-dvm-error-node-selectors.adoc[leveloffset=+1] +include::modules/migration-debugging-velero-resources.adoc[leveloffset=+1] +include::modules/migration-partial-failure-velero.adoc[leveloffset=+2] include::modules/migration-using-must-gather.adoc[leveloffset=+1] [id="rolling-back-migration_{context}"] diff --git a/modules/migration-debugging-velero-resources.adoc b/modules/migration-debugging-velero-resources.adoc new file mode 100644 index 000000000000..fdb8d0737c3d --- /dev/null +++ b/modules/migration-debugging-velero-resources.adoc @@ -0,0 +1,59 @@ +// Module included in the following assemblies: +// * migration/migrating_3_4/troubleshooting-3-4.adoc +// * migration/migrating_4_1_4/troubleshooting-4-1-4.adoc +// * migration/migrating_4_2_4/troubleshooting-4-2-4.adoc + +[id="migration-debugging-velero-resources_{context}"] += Using the Velero CLI to debug Backup and Restore CRs + +You can debug the `Backup` and `Restore` custom resources (CRs) and partial migration failures with the Velero command line interface (CLI). The Velero CLI runs in the `velero` pod. + +[id="velero-command-syntax_{context}"] +== Velero command syntax + +Velero CLI commands use the following syntax: +[source,terminal] +---- +$ oc exec $(oc get pods -n openshift-migration -o name | grep velero) -- ./velero +---- + +You can specify `velero- -n openshift-migration` in place of `$(oc get pods -n openshift-migration -o name | grep velero)`. + +[id="help-command_{context}"] +== Help command + +The Velero `help` command lists all the Velero CLI commands: +[source,terminal] +---- +$ oc exec $(oc get pods -n openshift-migration -o name | grep velero) -- ./velero --help +---- + +[id="describe-command_{context}"] +== Describe command + +The Velero `describe` command provides a summary of warnings and errors associated with a Velero resource: +[source,terminal] +---- +$ oc exec $(oc get pods -n openshift-migration -o name | grep velero) -- ./velero describe +---- + +.Example +[source,terminal] +---- +$ oc exec $(oc get pods -n openshift-migration -o name | grep velero) -- ./velero backup describe 0e44ae00-5dc3-11eb-9ca8-df7e5254778b-2d8ql +---- + +[id="logs-command_{context}"] +== Logs command + +The Velero `logs` command provides the logs associated with a Velero resource: +[source,terminal] +---- +velero logs +---- + +.Example +[source,terminal] +---- +$ oc exec $(oc get pods -n openshift-migration -o name | grep velero) -- ./velero restore logs ccc7c2d0-6017-11eb-afab-85d0007f5a19-x4lbf +---- diff --git a/modules/migration-partial-failure-velero.adoc b/modules/migration-partial-failure-velero.adoc new file mode 100644 index 000000000000..ed0374b7ba91 --- /dev/null +++ b/modules/migration-partial-failure-velero.adoc @@ -0,0 +1,80 @@ +// Module included in the following assemblies: +// * migration/migrating_3_4/troubleshooting-3-4.adoc +// * migration/migrating_4_1_4/troubleshooting-4-1-4.adoc +// * migration/migrating_4_2_4/troubleshooting-4-2-4.adoc + +[id="migration-partial-failure-velero_{context}"] += Debugging a partial migration failure + +You can debug a partial migration failure warning message by using the Velero CLI to examine the `Restore` custom resource (CR) logs. + +A partial failure occurs when Velero encounters an issue that does not cause a migration to fail. For example, if a custom resource definition (CRD) is missing or if there is a discrepancy between CRD versions on the source and target clusters, the migration completes but the CR is not created on the target cluster. + +Velero logs the issue as a partial failure and then processes the rest of the objects in the `Backup` CR. + +.Procedure + +. Check the status of a `MigMigration` CR: ++ +[source,terminal] +---- +$ oc get migmigration -o yaml +---- ++ +.Example output ++ +[source,yaml] +---- +status: + conditions: + - category: Warn + durable: true + lastTransitionTime: "2021-01-26T20:48:40Z" + message: 'Final Restore openshift-migration/ccc7c2d0-6017-11eb-afab-85d0007f5a19-x4lbf: partially failed on destination cluster' + status: "True" + type: VeleroFinalRestorePartiallyFailed + - category: Advisory + durable: true + lastTransitionTime: "2021-01-26T20:48:42Z" + message: The migration has completed with warnings, please look at `Warn` conditions. + reason: Completed + status: "True" + type: SucceededWithWarnings +---- + +. Check the status of the `Restore` CR by using the Velero `describe` command: ++ +[source,yaml] +---- +$ oc exec $(oc get pods -n openshift-migration -o name | grep velero) -n openshift-migration -- ./velero restore describe +---- ++ +.Example output ++ +[source,yaml] +---- +Phase: PartiallyFailed (run 'velero restore logs ccc7c2d0-6017-11eb-afab-85d0007f5a19-x4lbf' for more information) + +Errors: + Velero: + Cluster: + Namespaces: + migration-example: error restoring example.com/migration-example/migration-example: the server could not find the requested resource +---- + +. Check the `Restore` CR logs by using the Velero `logs` command: ++ +[source,yaml] +---- +$ oc exec $(oc get pods -n openshift-migration -o name | grep velero) -n openshift-migration -- ./velero restore logs +---- ++ +.Example output ++ +[source,yaml] +---- +time="2021-01-26T20:48:37Z" level=info msg="Attempting to restore migration-example: migration-example" logSource="pkg/restore/restore.go:1107" restore=openshift-migration/ccc7c2d0-6017-11eb-afab-85d0007f5a19-x4lbf +time="2021-01-26T20:48:37Z" level=info msg="error restoring migration-example: the server could not find the requested resource" logSource="pkg/restore/restore.go:1170" restore=openshift-migration/ccc7c2d0-6017-11eb-afab-85d0007f5a19-x4lbf +---- ++ +The `Restore` CR log error message, `the server could not find the requested resource`, indicates the cause of the partially failed migration. diff --git a/modules/migration-rolling-back-migration-cli.adoc b/modules/migration-rolling-back-migration-cli.adoc index dfd1c98b2335..8f8a7a08b672 100644 --- a/modules/migration-rolling-back-migration-cli.adoc +++ b/modules/migration-rolling-back-migration-cli.adoc @@ -6,7 +6,7 @@ [id='migration-rolling-back-migration-cli_{context}'] == Rolling back a migration from the CLI -You can roll back a migration by using the CLI. +You can roll back a migration by creating a `MigMigration` custom resource (CR) from the CLI. If your application was stopped during a failed migration, you must roll back the migration in order to prevent data corruption in the persistent volume. @@ -14,12 +14,11 @@ Rollback is not required if the application was not stopped during migration bec .Procedure -. Create a `MigMigration` CR object based on the following example: +. Create a `MigMigration` CR based on the following example: + [source,yaml] ---- $ cat << EOF | oc apply -f - ---- apiVersion: migration.openshift.io/v1alpha1 kind: MigMigration metadata: @@ -28,23 +27,15 @@ metadata: name: migration-rollback namespace: openshift-migration spec: - # 'canceled: true' cancels the migration - canceled: false - # 'rollback: true' rolls back the migration +... rollback: true - # 'stage: true' runs a stage migration without quiescing the application on the source cluster. - stage: false - # 'quiescePods: true' scales the pods on the source cluster to '0' after the 'Backup' stage of a migration has finished - quiescePods: false - # 'keepAnnotations: true' retains the labels and annotations applied by the migration - keepAnnotations: false - +... migPlanRef: - name: <1> + name: <1> namespace: openshift-migration EOF ---- -<1> Specify the name of the migration plan that you want to roll back. +<1> Specify the name of the associated `MigPlan` CR. -. In the {mtc-short} console, verify that the migrated project resources have been removed from the target cluster. +. In the {mtc-short} web console, verify that the migrated project resources have been removed from the target cluster. . Verify that the migrated project resources are present in the source cluster and that the application is running.