Skip to content
This repository has been archived by the owner on Aug 25, 2023. It is now read-only.

YACHT-1256: Updating SLI latency view to filter out empty tables as i… #141

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 10 additions & 9 deletions src/backup/default_backup_predicate.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,19 +11,20 @@ def test(self, big_query_table_metadata, table_entity):
if not self._is_possible_to_copy_table(big_query_table_metadata):
return False

if self.__table_has_up_to_date_backup(big_query_table_metadata, table_entity):
logging.info('Backup is up to date')
return False

return True

def __table_has_up_to_date_backup(self, big_query_table_metadata, table_entity):
last_backup = self.__get_last_table_backup_if_any(table_entity)

if not last_backup:
return True

if big_query_table_metadata.is_empty() and last_backup.numBytes > 0:
logging.info("Source table is empty. Not empty backup exists.")
return False

return self.__is_table_backup_up_to_date(big_query_table_metadata,
last_backup)
if self.__is_table_backup_up_to_date(big_query_table_metadata, last_backup):
logging.info('Backup is up to date')
return False

return True

def __get_last_table_backup_if_any(self, table_entity):
if table_entity is None:
Expand Down
25 changes: 11 additions & 14 deletions terraform/sli/SLI_0_days_views.tf
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
resource "google_bigquery_table" "census_data_0_days_ago_view" {
project = "${local.SLI_views_destination_project}"
dataset_id = "${var.SLI_backup_creation_latency_views_dataset}"
table_id = "census_data_0_days_ago"
dataset_id = "${google_bigquery_dataset.SLI_backup_creation_latency_views_dataset.dataset_id}"table_id = "census_data_0_days_ago"
description = "All tables and partitions seen by GCP Census 0 days ago"

view {
query = <<EOF
#legacySQL
-- Shows all tables and partitions seen by census now
SELECT * FROM (
SELECT projectId, datasetId, tableId, partitionId, creationTime, lastModifiedTime
SELECT projectId, datasetId, tableId, partitionId, creationTime, lastModifiedTime, numRows
FROM (
SELECT
projectId, datasetId, tableId, creationTime, lastModifiedTime, 'None' AS partitionId,
projectId, datasetId, tableId, creationTime, lastModifiedTime, 'None' AS partitionId, numRows,
ROW_NUMBER() OVER (PARTITION BY projectId, datasetId, tableId ORDER BY snapshotTime DESC) AS rownum
FROM
[${var.gcp_census_project}.bigquery.table_metadata_v1_0]
Expand All @@ -21,10 +21,10 @@ resource "google_bigquery_table" "census_data_0_days_ago_view" {
)
WHERE rownum = 1
), (
SELECT projectId, datasetId, tableId, partitionId, creationTime, lastModifiedTime
SELECT projectId, datasetId, tableId, partitionId, creationTime, lastModifiedTime, numRows
FROM (
SELECT
projectId, datasetId, tableId, partitionId, creationTime, lastModifiedTime,
projectId, datasetId, tableId, partitionId, creationTime, lastModifiedTime, numRows,
ROW_NUMBER() OVER (PARTITION BY projectId, datasetId, tableId, partitionId ORDER BY snapshotTime DESC) AS rownum
FROM
[${var.gcp_census_project}.bigquery.partition_metadata_v1_0]
Expand All @@ -36,14 +36,12 @@ resource "google_bigquery_table" "census_data_0_days_ago_view" {
EOF
use_legacy_sql = true
}

depends_on = ["google_bigquery_dataset.SLI_backup_creation_latency_views_dataset"]
}

resource "google_bigquery_table" "SLI_0_days_view" {
project = "${local.SLI_views_destination_project}"
dataset_id = "${var.SLI_backup_creation_latency_views_dataset}"
table_id = "SLI_0_days"
dataset_id = "${google_bigquery_dataset.SLI_backup_creation_latency_views_dataset.dataset_id}"table_id = "SLI_0_days"
description = "All tables and partitions which backups potentially violate 0 days latency"

view {
query = <<EOF
Expand All @@ -58,12 +56,12 @@ resource "google_bigquery_table" "SLI_0_days_view" {
IFNULL(last_backups.backup_created, MSEC_TO_TIMESTAMP(0)) as backup_created,
IFNULL(last_backups.backup_last_modified, MSEC_TO_TIMESTAMP(0)) as backup_last_modified
FROM
[${local.SLI_views_destination_project}.${var.SLI_backup_creation_latency_views_dataset}.census_data_0_days_ago] AS census
[${google_bigquery_table.census_data_0_days_ago_view.id}] as census
LEFT JOIN (
SELECT
backup_created, backup_last_modified, source_project_id, source_dataset_id, source_table_id, source_partition_id
FROM
[${local.datastore_export_project}.${var.datastore_export_views_dataset}.last_available_backup_for_every_table_entity]
[${google_bigquery_table.last_available_backup_for_every_table_entity_view.id}]
) AS last_backups
ON
census.projectId=last_backups.source_project_id AND
Expand All @@ -74,11 +72,10 @@ resource "google_bigquery_table" "SLI_0_days_view" {
projectId != "${var.bbq_project}"
AND projectId != "${var.bbq_restoration_project}"
AND partitionId != "__UNPARTITIONED__"
AND census.numRows != 0
AND IFNULL(last_backups.backup_created, MSEC_TO_TIMESTAMP(0)) < CURRENT_TIMESTAMP()
AND IFNULL(last_backups.backup_last_modified, MSEC_TO_TIMESTAMP(0)) < lastModifiedTime
EOF
use_legacy_sql = true
}

depends_on = ["google_bigquery_table.census_data_0_days_ago_view", "google_bigquery_table.last_available_backup_for_every_table_entity_view"]
}
25 changes: 11 additions & 14 deletions terraform/sli/SLI_3_days_views.tf
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
resource "google_bigquery_table" "census_data_3_days_ago_view" {
project = "${local.SLI_views_destination_project}"
dataset_id = "${var.SLI_backup_creation_latency_views_dataset}"
table_id = "census_data_3_days_ago"
dataset_id = "${google_bigquery_dataset.SLI_backup_creation_latency_views_dataset.dataset_id}"table_id = "census_data_3_days_ago"
description = "All tables and partitions seen by GCP Census 3 days ago"

view {
query = <<EOF
#legacySQL
-- Shows all tables and partitions seen by census 3 days ago
SELECT * FROM (
SELECT projectId, datasetId, tableId, partitionId, creationTime, lastModifiedTime
SELECT projectId, datasetId, tableId, partitionId, creationTime, lastModifiedTime, numRows
FROM (
SELECT
projectId, datasetId, tableId, creationTime, lastModifiedTime, 'None' AS partitionId,
projectId, datasetId, tableId, creationTime, lastModifiedTime, 'None' AS partitionId, numRows,
ROW_NUMBER() OVER (PARTITION BY projectId, datasetId, tableId ORDER BY snapshotTime DESC) AS rownum
FROM
[${var.gcp_census_project}.bigquery.table_metadata_v1_0]
Expand All @@ -21,10 +21,10 @@ resource "google_bigquery_table" "census_data_3_days_ago_view" {
)
WHERE rownum = 1
), (
SELECT projectId, datasetId, tableId, partitionId, creationTime, lastModifiedTime
SELECT projectId, datasetId, tableId, partitionId, creationTime, lastModifiedTime, numRows
FROM (
SELECT
projectId, datasetId, tableId, partitionId, creationTime, lastModifiedTime,
projectId, datasetId, tableId, partitionId, creationTime, lastModifiedTime, numRows,
ROW_NUMBER() OVER (PARTITION BY projectId, datasetId, tableId, partitionId ORDER BY snapshotTime DESC) AS rownum
FROM
[${var.gcp_census_project}.bigquery.partition_metadata_v1_0]
Expand All @@ -36,15 +36,13 @@ resource "google_bigquery_table" "census_data_3_days_ago_view" {
EOF
use_legacy_sql = true
}

depends_on = ["google_bigquery_dataset.SLI_backup_creation_latency_views_dataset"]
}


resource "google_bigquery_table" "SLI_3_days_view" {
project = "${local.SLI_views_destination_project}"
dataset_id = "${var.SLI_backup_creation_latency_views_dataset}"
table_id = "SLI_3_days"
dataset_id = "${google_bigquery_dataset.SLI_backup_creation_latency_views_dataset.dataset_id}"table_id = "SLI_3_days"
description = "All tables and partitions which backups potentially violate 3 days latency"

view {
query = <<EOF
Expand All @@ -59,12 +57,12 @@ resource "google_bigquery_table" "SLI_3_days_view" {
IFNULL(last_backups.backup_created, MSEC_TO_TIMESTAMP(0)) as backup_created,
IFNULL(last_backups.backup_last_modified, MSEC_TO_TIMESTAMP(0)) as backup_last_modified
FROM
[${local.SLI_views_destination_project}.${var.SLI_backup_creation_latency_views_dataset}.census_data_3_days_ago] AS census
[${google_bigquery_table.census_data_3_days_ago_view.id}] as census
LEFT JOIN (
SELECT
backup_created, backup_last_modified, source_project_id, source_dataset_id, source_table_id, source_partition_id
FROM
[${local.datastore_export_project}.${var.datastore_export_views_dataset}.last_available_backup_for_every_table_entity]
[${google_bigquery_table.last_available_backup_for_every_table_entity_view.id}]
) AS last_backups
ON
census.projectId=last_backups.source_project_id AND
Expand All @@ -75,11 +73,10 @@ resource "google_bigquery_table" "SLI_3_days_view" {
projectId != "${var.bbq_project}"
AND projectId != "${var.bbq_restoration_project}"
AND partitionId != "__UNPARTITIONED__"
AND census.numRows != 0
AND IFNULL(last_backups.backup_created, MSEC_TO_TIMESTAMP(0)) < TIMESTAMP(DATE_ADD(CURRENT_TIMESTAMP(), -3 , "DAY"))
AND IFNULL(last_backups.backup_last_modified, MSEC_TO_TIMESTAMP(0)) < lastModifiedTime
EOF
use_legacy_sql = true
}

depends_on = ["google_bigquery_table.census_data_3_days_ago_view", "google_bigquery_table.last_available_backup_for_every_table_entity_view"]
}
25 changes: 11 additions & 14 deletions terraform/sli/SLI_4_days_views.tf
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
resource "google_bigquery_table" "census_data_4_days_ago_view" {
project = "${local.SLI_views_destination_project}"
dataset_id = "${var.SLI_backup_creation_latency_views_dataset}"
table_id = "census_data_4_days_ago"
dataset_id = "${google_bigquery_dataset.SLI_backup_creation_latency_views_dataset.dataset_id}"table_id = "census_data_4_days_ago"
description = "All tables and partitions seen by GCP Census 4 days ago"

view {
query = <<EOF
#legacySQL
-- Shows all tables and partitions seen by census 4 days ago
SELECT * FROM (
SELECT projectId, datasetId, tableId, partitionId, creationTime, lastModifiedTime
SELECT projectId, datasetId, tableId, partitionId, creationTime, lastModifiedTime, numRows
FROM (
SELECT
projectId, datasetId, tableId, creationTime, lastModifiedTime, 'None' AS partitionId,
projectId, datasetId, tableId, creationTime, lastModifiedTime, 'None' AS partitionId, numRows,
ROW_NUMBER() OVER (PARTITION BY projectId, datasetId, tableId ORDER BY snapshotTime DESC) AS rownum
FROM
[${var.gcp_census_project}.bigquery.table_metadata_v1_0]
Expand All @@ -21,10 +21,10 @@ resource "google_bigquery_table" "census_data_4_days_ago_view" {
)
WHERE rownum = 1
), (
SELECT projectId, datasetId, tableId, partitionId, creationTime, lastModifiedTime
SELECT projectId, datasetId, tableId, partitionId, creationTime, lastModifiedTime, numRows
FROM (
SELECT
projectId, datasetId, tableId, partitionId, creationTime, lastModifiedTime,
projectId, datasetId, tableId, partitionId, creationTime, lastModifiedTime, numRows,
ROW_NUMBER() OVER (PARTITION BY projectId, datasetId, tableId, partitionId ORDER BY snapshotTime DESC) AS rownum
FROM
[${var.gcp_census_project}.bigquery.partition_metadata_v1_0]
Expand All @@ -36,15 +36,13 @@ resource "google_bigquery_table" "census_data_4_days_ago_view" {
EOF
use_legacy_sql = true
}

depends_on = ["google_bigquery_dataset.SLI_backup_creation_latency_views_dataset"]
}


resource "google_bigquery_table" "SLI_4_days_view" {
project = "${local.SLI_views_destination_project}"
dataset_id = "${var.SLI_backup_creation_latency_views_dataset}"
table_id = "SLI_4_days"
dataset_id = "${google_bigquery_dataset.SLI_backup_creation_latency_views_dataset.dataset_id}"table_id = "SLI_4_days"
description = "All tables and partitions which backups potentially violate 4 days latency"

view {
query = <<EOF
Expand All @@ -59,12 +57,12 @@ resource "google_bigquery_table" "SLI_4_days_view" {
IFNULL(last_backups.backup_created, MSEC_TO_TIMESTAMP(0)) as backup_created,
IFNULL(last_backups.backup_last_modified, MSEC_TO_TIMESTAMP(0)) as backup_last_modified
FROM
[${local.SLI_views_destination_project}.${var.SLI_backup_creation_latency_views_dataset}.census_data_4_days_ago] AS census
[${google_bigquery_table.census_data_4_days_ago_view.id}] as census
LEFT JOIN (
SELECT
backup_created, backup_last_modified, source_project_id, source_dataset_id, source_table_id, source_partition_id
FROM
[${local.datastore_export_project}.${var.datastore_export_views_dataset}.last_available_backup_for_every_table_entity]
[${google_bigquery_table.last_available_backup_for_every_table_entity_view.id}]
) AS last_backups
ON
census.projectId=last_backups.source_project_id AND
Expand All @@ -75,11 +73,10 @@ resource "google_bigquery_table" "SLI_4_days_view" {
projectId != "${var.bbq_project}"
AND projectId != "${var.bbq_restoration_project}"
AND partitionId != "__UNPARTITIONED__"
AND census.numRows != 0
AND IFNULL(last_backups.backup_created, MSEC_TO_TIMESTAMP(0)) < TIMESTAMP(DATE_ADD(CURRENT_TIMESTAMP(), -4 , "DAY"))
AND IFNULL(last_backups.backup_last_modified, MSEC_TO_TIMESTAMP(0)) < lastModifiedTime
EOF
use_legacy_sql = true
}

depends_on = ["google_bigquery_table.census_data_4_days_ago_view", "google_bigquery_table.last_available_backup_for_every_table_entity_view"]
}
26 changes: 12 additions & 14 deletions terraform/sli/SLI_5_days_views.tf
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
resource "google_bigquery_table" "census_data_5_days_ago_view" {
project = "${local.SLI_views_destination_project}"
dataset_id = "${var.SLI_backup_creation_latency_views_dataset}"
table_id = "census_data_5_days_ago"
dataset_id = "${google_bigquery_dataset.SLI_backup_creation_latency_views_dataset.dataset_id}"table_id = "census_data_5_days_ago"
description = "All tables and partitions seen by GCP Census 5 days ago"


view {
query = <<EOF
#legacySQL
-- Shows all tables and partitions seen by census 5 days ago
SELECT * FROM (
SELECT projectId, datasetId, tableId, partitionId, creationTime, lastModifiedTime
SELECT projectId, datasetId, tableId, partitionId, creationTime, lastModifiedTime, numRows
FROM (
SELECT
projectId, datasetId, tableId, creationTime, lastModifiedTime, 'None' AS partitionId,
projectId, datasetId, tableId, creationTime, lastModifiedTime, 'None' AS partitionId, numRows,
ROW_NUMBER() OVER (PARTITION BY projectId, datasetId, tableId ORDER BY snapshotTime DESC) AS rownum
FROM
[${var.gcp_census_project}.bigquery.table_metadata_v1_0]
Expand All @@ -21,10 +22,10 @@ resource "google_bigquery_table" "census_data_5_days_ago_view" {
)
WHERE rownum = 1
), (
SELECT projectId, datasetId, tableId, partitionId, creationTime, lastModifiedTime
SELECT projectId, datasetId, tableId, partitionId, creationTime, lastModifiedTime, numRows
FROM (
SELECT
projectId, datasetId, tableId, partitionId, creationTime, lastModifiedTime,
projectId, datasetId, tableId, partitionId, creationTime, lastModifiedTime, numRows,
ROW_NUMBER() OVER (PARTITION BY projectId, datasetId, tableId, partitionId ORDER BY snapshotTime DESC) AS rownum
FROM
[${var.gcp_census_project}.bigquery.partition_metadata_v1_0]
Expand All @@ -36,15 +37,13 @@ resource "google_bigquery_table" "census_data_5_days_ago_view" {
EOF
use_legacy_sql = true
}

depends_on = ["google_bigquery_dataset.SLI_backup_creation_latency_views_dataset"]
}


resource "google_bigquery_table" "SLI_5_days_view" {
project = "${local.SLI_views_destination_project}"
dataset_id = "${var.SLI_backup_creation_latency_views_dataset}"
table_id = "SLI_5_days"
dataset_id = "${google_bigquery_dataset.SLI_backup_creation_latency_views_dataset.dataset_id}"table_id = "SLI_5_days"
description = "All tables and partitions which backups potentially violate 5 days latency"

view {
query = <<EOF
Expand All @@ -59,12 +58,12 @@ resource "google_bigquery_table" "SLI_5_days_view" {
IFNULL(last_backups.backup_created, MSEC_TO_TIMESTAMP(0)) as backup_created,
IFNULL(last_backups.backup_last_modified, MSEC_TO_TIMESTAMP(0)) as backup_last_modified
FROM
[${local.SLI_views_destination_project}.${var.SLI_backup_creation_latency_views_dataset}.census_data_5_days_ago] AS census
[${google_bigquery_table.census_data_5_days_ago_view.id}] as census
LEFT JOIN (
SELECT
backup_created, backup_last_modified, source_project_id, source_dataset_id, source_table_id, source_partition_id
FROM
[${local.datastore_export_project}.${var.datastore_export_views_dataset}.last_available_backup_for_every_table_entity]
[${google_bigquery_table.last_available_backup_for_every_table_entity_view.id}]
) AS last_backups
ON
census.projectId=last_backups.source_project_id AND
Expand All @@ -75,11 +74,10 @@ resource "google_bigquery_table" "SLI_5_days_view" {
projectId != "${var.bbq_project}"
AND projectId != "${var.bbq_restoration_project}"
AND partitionId != "__UNPARTITIONED__"
AND numRows != 0
AND IFNULL(last_backups.backup_created, MSEC_TO_TIMESTAMP(0)) < TIMESTAMP(DATE_ADD(CURRENT_TIMESTAMP(), -5 , "DAY"))
AND IFNULL(last_backups.backup_last_modified, MSEC_TO_TIMESTAMP(0)) < lastModifiedTime
EOF
use_legacy_sql = true
}

depends_on = ["google_bigquery_table.census_data_5_days_ago_view", "google_bigquery_table.last_available_backup_for_every_table_entity_view"]
}
Loading