Skip to content
This repository has been archived by the owner on Aug 25, 2023. It is now read-only.

Commit

Permalink
Merge a99bb8f into 15791a3
Browse files Browse the repository at this point in the history
  • Loading branch information
radkomateusz committed Sep 27, 2018
2 parents 15791a3 + a99bb8f commit df76bb6
Show file tree
Hide file tree
Showing 16 changed files with 513 additions and 61 deletions.
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,13 @@ It's worth to underline that:
* Backups for partitions are scheduled randomly within the range of time specified in [config.yaml](./config/config.yaml),
* It is possible to check the progress via [Task Queues](https://console.cloud.google.com/appengine/taskqueues).

### On-demand table backup
BBQ provides option for scheduling on-demand backup for single non-partitioned table or single partition of partitioned table.

Note that on-demand flow will ignore checking prerequisites before scheduling copy job, as opposed to normal flow.

On-demand table backup is available from _\<your-project-id>_.__appspot.com__ site in 'Advanced' section.

## How to find backup for given table?
### Option 1
In order to find backup __Y__ for table __X__:
Expand Down
4 changes: 4 additions & 0 deletions app.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,10 @@ handlers:
script: src.backup.table_backup_handler.app
secure: always
login: admin
- url: /tasks/backups/on_demand/table.*
script: src.backup.on_demand_table_backup_handler.app
secure: always
login: admin
- url: /tasks/backups/dataset
script: src.backup.dataset_backup_handler.app
secure: always
Expand Down
36 changes: 36 additions & 0 deletions src/backup/abstract_should_backup_predicate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import logging
from abc import abstractmethod


class AbstractShouldBackupPredicate(object):
def __init__(self, big_query_table_metadata):
self.big_query_table_metadata = big_query_table_metadata

def test(self, table_entity):

if not self.__is_possible_to_copy_table():
return False

if self.big_query_table_metadata.is_empty():
logging.info('This table is empty')

if self._is_table_has_up_to_date_backup(table_entity):
logging.info('Backup is up to date')
return False

return True

def __is_possible_to_copy_table(self):
if not self.big_query_table_metadata.table_exists():
logging.info('Table not found (404)')
return False
if not self.big_query_table_metadata.is_schema_defined():
logging.info('This table is without schema')
return False
if self.big_query_table_metadata.is_external_or_view_type():
return False
return True

@abstractmethod
def _is_table_has_up_to_date_backup(self, table_entity):
pass
17 changes: 11 additions & 6 deletions src/backup/backup_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,28 @@

from google.appengine.api import memcache

from src.commons.config.configuration import configuration
from src.backup.backup_creator import BackupCreator
from src.backup.dataset_id_creator import DatasetIdCreator
from src.backup.datastore.Table import Table
from src.backup.should_backup_predicate import ShouldBackupPredicate
from src.backup.backup_creator import BackupCreator
from src.backup.default_should_backup_predicate import \
DefaultShouldBackupPredicate
from src.backup.on_demand_should_backup_predicate import \
OnDemandShouldBackupPredicate
from src.commons.config.configuration import configuration
from src.commons.table_reference import TableReference


class BackupProcess(object):
def __init__(self, table_reference, big_query, big_query_table_metadata):
def __init__(self, table_reference, big_query, big_query_table_metadata, is_on_demand_backup):
self.project_id = table_reference.get_project_id()
self.dataset_id = table_reference.get_dataset_id()
self.table_id = table_reference.get_table_id()
self.partition_id = table_reference.get_partition_id()
self.big_query = big_query
self.big_query_table_metadata = big_query_table_metadata
self.should_backup_predicate = OnDemandShouldBackupPredicate(
self.big_query_table_metadata) if is_on_demand_backup else DefaultShouldBackupPredicate(
self.big_query_table_metadata)
self.now = None

def start(self):
Expand All @@ -41,8 +47,7 @@ def __backup_ever_done(table_entity):
return table_entity is not None

def __should_backup(self, table_entity):
return ShouldBackupPredicate(self.big_query_table_metadata)\
.test(table_entity)
return self.should_backup_predicate.test(table_entity)

def __create_backup(self, table_entity):
self.__ensure_dataset_for_backups_exists()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,38 +1,24 @@
import logging

from src.backup.abstract_should_backup_predicate import \
AbstractShouldBackupPredicate

class ShouldBackupPredicate(object):

class DefaultShouldBackupPredicate(AbstractShouldBackupPredicate):

TIMESTAMP_FORMAT = '%Y-%m-%d %H:%M:%S'

def __init__(self, big_query_table_metadata):
self.big_query_table_metadata = big_query_table_metadata
super(DefaultShouldBackupPredicate, self).__init__(big_query_table_metadata)

def test(self, table_entity):
if not self.big_query_table_metadata.table_exists():
logging.info('Table not found (404)')
return False
if not self.big_query_table_metadata.is_schema_defined():
logging.info('This table is without schema')
return False
if self.big_query_table_metadata.is_empty():
logging.info('This table is empty')
if self.big_query_table_metadata.is_external_or_view_type():
return False
if not self.__should_backup(table_entity):
logging.info('Backup is up to date')
return False
return True

# pylint: disable=R0201
def __should_backup(self, table_entity):
def _is_table_has_up_to_date_backup(self, table_entity):
# TODO: change name of this class or split this method into two
if table_entity is None:
return True
return False
last_backup = table_entity.last_backup
if last_backup is None:
logging.info('No backups so far')
return True
return False
source_table_last_modified_time = \
self.big_query_table_metadata.get_last_modified_datetime()
logging.info(
Expand All @@ -43,8 +29,8 @@ def __should_backup(self, table_entity):
)
if source_table_last_modified_time > last_backup.last_modified:
logging.info("Backup time is older than table metadata")
return True
return False
return False
return True

def __format_timestamp(self, datetime):
if datetime:
Expand Down
18 changes: 18 additions & 0 deletions src/backup/on_demand_should_backup_predicate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import logging

from src.backup.abstract_should_backup_predicate import \
AbstractShouldBackupPredicate


class OnDemandShouldBackupPredicate(AbstractShouldBackupPredicate):

def __init__(self, big_query_table_metadata):
super(OnDemandShouldBackupPredicate, self).__init__(big_query_table_metadata)

def _is_table_has_up_to_date_backup(self, table_entity):
logging.info(
"Performing on-demand backup for %s:%s.%s$%s. "
"It is performed without checking if table aready has up to date backup",
table_entity.project_id, table_entity.dataset_id,
table_entity.table_id, table_entity.partition_id)
return False
30 changes: 30 additions & 0 deletions src/backup/on_demand_table_backup_handler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import webapp2

from src.backup.table_backup import TableBackup
from src.commons.config.configuration import configuration
from src.commons.table_reference import TableReference
from src.commons.tasks import Tasks


class OnDemandTableBackupHandler(webapp2.RequestHandler):
def __init__(self, request=None, response=None):
super(OnDemandTableBackupHandler, self).__init__(request, response)

# now let's check if this task is not a retry of some previous (which
# failed for some reason) if so - let's log when it hits the defined
# mark so we can catch it on monitoring:
Tasks.log_task_metadata_for(request=self.request)

def get(self, project_id, dataset_id, table_id, partition_id=None): # nopep8 pylint: disable=R0201
table_reference = TableReference(project_id, dataset_id,
table_id, partition_id)
TableBackup.start(table_reference, is_on_demand_backup=True)


app = webapp2.WSGIApplication([
webapp2.Route('/tasks/backups/on_demand/table/<project_id:[^/]+>/<dataset_id:'
'[^/]+>/<table_id:[^/]+>', OnDemandTableBackupHandler),
webapp2.Route('/tasks/backups/on_demand/table/<project_id:[^/]+>/<dataset_id:'
'[^/]+>/<table_id:[^/]+>/<partition_id:[^/]+>',
OnDemandTableBackupHandler)
], debug=configuration.debug_mode)
10 changes: 6 additions & 4 deletions src/backup/table_backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,21 @@
class TableBackup(object):

@staticmethod
def start(table_reference):
def start(table_reference, is_on_demand_backup=False):
big_query = BigQuery()

big_query_table_metadata = BigQueryTableMetadata.get_table_by_reference(table_reference)

if big_query_table_metadata.is_daily_partitioned() and \
not big_query_table_metadata.is_partition():
not big_query_table_metadata.is_partition():
logging.info('Table (%s/%s/%s) is partitioned',
table_reference.get_project_id(),
table_reference.get_dataset_id(),
table_reference.get_table_id())
TablePartitionsBackupScheduler(table_reference,
big_query).start()
else:
BackupProcess(
table_reference, big_query, big_query_table_metadata).start()
BackupProcess(table_reference=table_reference,
big_query=big_query,
big_query_table_metadata=big_query_table_metadata,
is_on_demand_backup=is_on_demand_backup).start()
8 changes: 7 additions & 1 deletion src/restore/restore_ui_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,16 @@ def get(self):
configuration.restoration_project_id)


class OnDemandTableBackupUIHandler(BaseHandler):
def get(self):
self.render_response('on_demand_table_backup.html')


app = webapp2.WSGIApplication([
('/', MainPage),
('/_ah/start', MainPage),
('/ui/restoreDataset', RestoreDatasetUIHandler),
('/ui/restoreList', RestoreListUIHandler),
('/ui/restoreTable', RestoreTableUIHandler)
('/ui/restoreTable', RestoreTableUIHandler),
('/ui/onDemandTableBackup', OnDemandTableBackupUIHandler)
], debug=configuration.debug_mode)
1 change: 1 addition & 0 deletions templates/bbq-menu.html
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">Advanced</a>
<div class="dropdown-menu" aria-labelledby="dropdown_advanced">
<a class="dropdown-item" href="/ui/restoreList">Restore custom backup list</a>
<a class="dropdown-item" href="/ui/onDemandTableBackup">On-demand table backup</a>
</div>
</li>
</ul>
Expand Down
89 changes: 89 additions & 0 deletions templates/on_demand_table_backup.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
{% extends "bbq-menu.html" %}
{% block main %}

<script type="text/javascript">
function OnSubmitForm() {
var url = '/tasks/backups/on_demand/table/' + document.restoreTableForm.projectId.value + '/'
+ document.restoreTableForm.datasetId.value + '/' + document.restoreTableForm.tableId.value
if (document.restoreTableForm.partitionId.value) {
url = url + '/' + document.restoreTableForm.partitionId.value
}


var xhttp = new XMLHttpRequest();
xhttp.open("GET", url, true);
xhttp.setRequestHeader("request_correlation_id", Date.now())
xhttp.timeout = 1000 * 3600;

xhttp.onload = function () {
document.getElementById("response").innerHTML = 'Http response: '+ xhttp.status
};
xhttp.ontimeout = function (e) {
document.getElementById("response").innerHTML = "Request timeout: " + e;
};

xhttp.send();
return false;
}
</script>
<h2>On-demand table backup</h2>
<p class="lead">
BBQ will schedule on-demand backup of given source table. <br>
On-demand flow doesn't check if table already has up to date backup<br>

Note that successfully scheduled on-demand table backup could fail during executing copy job. You need to check results via looking in application logs.
<br><br>
On demand backup works for:<br>
- non partitioned tables<br>
- single partition of partitioned table (not specifying source partition id for partitioned table will not force performing backups)

</p>

<form name="restoreTableForm" onsubmit="return OnSubmitForm();">
<div class="form-group row">
<label for="sourceProjectId" class="col-sm-2 col-form-label">Source project id</label>
<div class="col-sm-8">
<input type="text" class="form-control" id="sourceProjectId" name="projectId"
placeholder="project id" required/>
<small id="sourceProjectIdHelp" class="form-text text-muted">Source project id
</small>
</div>
</div>
<div class="form-group row">
<label for="sourceDatasetId" class="col-sm-2 col-form-label">Source dataset id</label>
<div class="col-sm-8">
<input type="text" class="form-control" id="sourceDatasetId" name="datasetId"
placeholder="dataset id">
<small id="sourceDatasetIdHelp" class="form-text text-muted">Source dataset id
</small>
</div>
</div>
<div class="form-group row">
<label for="sourceTableId" class="col-sm-2 col-form-label">Source table id</label>
<div class="col-sm-8">
<input type="text" class="form-control" id="sourceTableId" name="tableId"
placeholder="table id">
<small id="sourceTableIdHelp" class="form-text text-muted">Source table id </small>
</div>
</div>
<div class="form-group row">
<label for="sourcePartitionId" class="col-sm-2 col-form-label">Source partition id</label>
<div class="col-sm-8">
<input type="number" class="form-control" id="sourcePartitionId" name="partitionId"
placeholder="partition id">
<small id="sourcePartitionIdHelp" class="form-text text-muted">Source partition id for partitioned table
</small>
</div>
</div>
</div>
<div class="form-group row">
<div class="col-sm-4">
<button type="submit" class="btn btn-primary">Schedule backup</button>
</div>
</div>
</form>

<br><strong>Response:</strong> <br>
<pre id="response"></pre>

{% endblock %}
Loading

0 comments on commit df76bb6

Please sign in to comment.