Skip to content
This repository has been archived by the owner on Aug 25, 2023. It is now read-only.

Commit

Permalink
Merge fa159f2 into de63f78
Browse files Browse the repository at this point in the history
  • Loading branch information
przemyslaw-jasinski committed May 22, 2019
2 parents de63f78 + fa159f2 commit ca20174
Show file tree
Hide file tree
Showing 13 changed files with 63 additions and 25 deletions.
9 changes: 2 additions & 7 deletions SETUP.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,13 +83,8 @@ To perform backup, BBQ needs rights to read BigQuery data from the project which
* Grant this permission for the whole folder or organisation. It will be inherited by all of the projects underneath.
### Cloud Datastore export
BBQ may periodically export data from Datastore to Big Query. It's much easier to query the data in Big Query rather than Datastore. To enable export:
* Execute command below which assigns the **Cloud Datastore Import Export Admin** IAM role to BBQ default service account:
```bash
gcloud projects add-iam-policy-binding ${BBQ_PROJECT_ID} --member='serviceAccount:'${BBQ_PROJECT_ID}'@appspot.gserviceaccount.com' --role='roles/datastore.importExportAdmin'
```
* (Optionally) Configure schedule time and kinds to export in [cron.yaml](./config/cron.yaml) file.
BBQ may periodically export data from Datastore to Big Query. It's much easier to query the data in Big Query rather than Datastore.
It is possible to configure schedule time and kinds to export in [cron.yaml](./config/cron.yaml) file.
### Security Layers
BBQ has configured multiple layers of security to limit access to your data.
Expand Down
7 changes: 7 additions & 0 deletions config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,5 +22,12 @@ project_settings:
backup_project_id: 'BBQ-project-id'
debug_mode: False

# metadata_storage_project_id -
# The main purpose of this project is to separate BBQ backups
# that should be immutable and invisible for developers without any particular reason
# from metadata such as datastore exports, SLI views etc.
# that are used and constantly modified by developers.
metadata_storage_project_id: 'BBQ-metadata-project-id'

# default_restoration_project_id - project into which data will be restored by default during restoration process
# default_restoration_project_id: 'default-restoration-storage-project-id'
4 changes: 4 additions & 0 deletions src/commons/config/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ def backup_settings_custom_project_list(self):
def backup_project_id(self):
return self.__project_config['project_settings'].get('backup_project_id')

@property
def metadata_storage_project_id(self):
return self.__project_config['project_settings'].get('metadata_storage_project_id')

@property
def default_restoration_project_id(self):
return self.__project_config['project_settings'].get('default_restoration_project_id', '')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
class LoadDatastoreBackupsToBigQueryException(Exception):
pass


class LoadDatastoreBackupsToBigQueryService(object):

def __init__(self, date):
Expand All @@ -25,7 +26,7 @@ def __init__(self, date):

def load(self, source_uri, kinds):
self.big_query.create_dataset(
configuration.backup_project_id,
configuration.metadata_storage_project_id,
DATASET_ID, self.location
)

Expand All @@ -52,7 +53,7 @@ def __create_job_body(self, source_uri, kind):
".export_metadata".format(source_uri, kind, kind)
],
"destinationTable": {
"projectId": configuration.backup_project_id,
"projectId": configuration.metadata_storage_project_id,
"datasetId": DATASET_ID,
"tableId": kind + "_" + self.date
}
Expand Down Expand Up @@ -84,8 +85,7 @@ def __wait_till_done(self, load_job):
result = self.big_query.get_job(load_job)
if 'errors' in result['status']:
raise LoadDatastoreBackupsToBigQueryException(
"Export from GCS to BQ failed, job reference: {}"
.format(load_job)
"Export from GCS to BQ failed, job reference: {}".format(load_job)
)
if result['status']['state'] == 'DONE':
return
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,11 @@ def __init__(self, x_days):

def query_string(self):
projects_to_skip = tuple(configuration.projects_to_skip)
return \
"SELECT * FROM [{}:SLI_backup_creation_latency_views.SLI_{}_days] WHERE projectId NOT IN {}" \
.format(configuration.backup_project_id, self.x_days, projects_to_skip)
return "SELECT * FROM " \
"[{}:SLI_backup_creation_latency_views.SLI_{}_days] " \
"WHERE projectId NOT IN {}"\
.format(configuration.metadata_storage_project_id,
self.x_days, projects_to_skip)

def format_query_results(self, results, snapshot_time):
formatted_results = [{"snapshotTime": snapshot_time,
Expand Down
13 changes: 7 additions & 6 deletions src/slo/backup_quality/quality_query_specification.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,15 @@

class QualityQuerySpecification(object):

def query_string(self):
@staticmethod
def query_string():
projects_to_skip = tuple(configuration.projects_to_skip)
return \
"SELECT * FROM [{}:SLI_backup_quality_views.SLI_quality] WHERE source_project_id NOT IN {}"\
.format(configuration.backup_project_id, projects_to_skip)
return "SELECT * FROM [{}:SLI_backup_quality_views.SLI_quality] " \
"WHERE source_project_id NOT IN {}"\
.format(configuration.metadata_storage_project_id, projects_to_skip)

def format_query_results(self, results, snapshot_time):
@staticmethod
def format_query_results(results, snapshot_time):
formatted_results = [{"snapshotTime": snapshot_time,
"projectId": result['f'][0]['v'],
"datasetId": result['f'][1]['v'],
Expand All @@ -36,4 +38,3 @@ def to_table_reference(table):
dataset_id=table['datasetId'],
table_id=table['tableId'],
partition_id=partition_id)

2 changes: 1 addition & 1 deletion src/slo/sli_results_streamer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ class SLIResultsStreamer(object):
def __init__(self,
table_id,
dataset_id="SLI_history",
project_id=configuration.backup_project_id
project_id=configuration.metadata_storage_project_id
):
self.data_streamer = DataStreamer(project_id=project_id, dataset_id=dataset_id, table_id=table_id)

Expand Down
18 changes: 18 additions & 0 deletions terraform/bbq/project_iams.tf
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,21 @@ resource "google_project_iam_member" "project_bigqueryjobuser_iam" {
role = "roles/bigquery.jobUser"
member = "serviceAccount:${var.bbq_project}@appspot.gserviceaccount.com"
}

resource "google_project_iam_member" "project_bigquerydataeditor_iam" {
project = "${var.bbq_metadata_project}"
role = "roles/bigquery.dataEditor"
member = "serviceAccount:${var.bbq_project}@appspot.gserviceaccount.com"
}

resource "google_project_iam_member" "project_bigqueryjobuser_iam" {
project = "${var.bbq_metadata_project}"
role = "roles/bigquery.jobUser"
member = "serviceAccount:${var.bbq_project}@appspot.gserviceaccount.com"
}

resource "google_project_iam_member" "project_storageobjectviewer_iam" {
project = "${var.bbq_metadata_project}"
role = "roles/storage.objectViewer"
member = "serviceAccount:${var.bbq_project}@appspot.gserviceaccount.com"
}
4 changes: 4 additions & 0 deletions terraform/bbq/vars.tf
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@ variable "bbq_project" {
description = "it is needed to filter out backups from SLI"
}

variable "bbq_metadata_project" {
description = "it is needed to export datastore backups from GCS to BQ"
}

variable "bbq_restoration_project" {
description = "it is needed to filter out restored backups from SLI"
}
Expand Down
2 changes: 2 additions & 0 deletions terraform/sli/SLI_history_dataset.tf
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ resource "google_bigquery_table" "SLI_backup_creation_latency" {

time_partitioning {
type = "DAY"
expiration_ms = "${local.one_year_in_ms}"
}

schema= "${file("${path.module}/SLI_backup_creation_latency_filtered_table_schema.json")}"
Expand Down Expand Up @@ -75,6 +76,7 @@ resource "google_bigquery_table" "SLI_backup_quality" {

time_partitioning {
type = "DAY"
expiration_ms = "${local.one_year_in_ms}"
}

schema= "${file("${path.module}/SLI_backup_quality_filtered_table_schema.json")}"
Expand Down
9 changes: 7 additions & 2 deletions terraform/sli/vars.tf
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ variable "bbq_restoration_project" {
description = "it is needed to filter out restored backups from SLI"
}

variable "bbq_metadata_project" {
description = "storage project for bbq metadata"
}

variable "gcp_census_project" {
description = "project where GCP Census data resides. More specifically we need bigquery.table_metadata_v1_0 and bigquery.partition_metadata_v1_0 table from that project"
}
Expand All @@ -21,8 +25,9 @@ variable "datastore_export_project" {
}

locals {
datastore_export_project = "${var.datastore_export_project != "" ? var.datastore_export_project : var.bbq_project}"
SLI_views_destination_project = "${var.SLI_views_destination_project != "" ? var.SLI_views_destination_project : var.bbq_project}"
datastore_export_project = "${var.datastore_export_project != "" ? var.datastore_export_project : var.bbq_metadata_project}"
SLI_views_destination_project = "${var.SLI_views_destination_project != "" ? var.SLI_views_destination_project : var.bbq_metadata_project}"
one_year_in_ms = 31536000000
}

variable "datastore_export_dataset" {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@ def test_should_create_valid_query_with_skipped_projects_in_it(self, projects_to
latency_query_spec = LatencyQuerySpecification(3)
query = latency_query_spec.query_string()
self.assertEqual(query,
"SELECT * FROM [BBQ-project-id:SLI_backup_creation_latency_views.SLI_3_days] WHERE projectId NOT IN ('BBQ-project-id', '123')")
"SELECT * FROM [BBQ-metadata-project-id:SLI_backup_creation_latency_views.SLI_3_days] WHERE projectId NOT IN ('BBQ-project-id', '123')")
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@ def test_should_create_valid_query_with_skipped_projects_in_it(self, projects_to
quality_query_spec = QualityQuerySpecification()
query = quality_query_spec.query_string()
self.assertEqual(query,
"SELECT * FROM [BBQ-project-id:SLI_backup_quality_views.SLI_quality] WHERE source_project_id NOT IN ('BBQ-project-id', '123')")
"SELECT * FROM [BBQ-metadata-project-id:SLI_backup_quality_views.SLI_quality] WHERE source_project_id NOT IN ('BBQ-project-id', '123')")

0 comments on commit ca20174

Please sign in to comment.