Skip to content
This repository has been archived by the owner on Aug 25, 2023. It is now read-only.

Commit

Permalink
YACHT-910: small refactoring before implementation - making get_table…
Browse files Browse the repository at this point in the history
…_or_partition a private method of big_query class
  • Loading branch information
jarekdrabek committed Jul 3, 2018
1 parent 1bbdfbe commit f5ec4e5
Show file tree
Hide file tree
Showing 5 changed files with 55 additions and 34 deletions.
8 changes: 2 additions & 6 deletions src/backup/table_backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,8 @@ class TableBackup(object):
@staticmethod
def start(table_reference):
big_query = BigQuery()
big_query_table_metadata = big_query.get_table_or_partition(
table_reference.get_project_id(),
table_reference.get_dataset_id(),
table_reference.get_table_id(),
table_reference.get_partition_id()
)

big_query_table_metadata = big_query.get_table_by_reference(table_reference)

if big_query_table_metadata.is_daily_partitioned() \
and not big_query_table_metadata.is_empty():
Expand Down
20 changes: 10 additions & 10 deletions src/big_query/big_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,25 +163,25 @@ def __sync_query(self, query, timeout=30000, use_legacy_sql=False):
projectId=configuration.backup_project_id,
body=query_data).execute(num_retries=3)

def get_table_or_partition(self, project_id, dataset_id, table_id,
partition_id):
def __get_table_or_partition(self, project_id, dataset_id, table_id,
partition_id):
table_metadata = self.__get_table(project_id, dataset_id,
BigQuery.get_table_id_with_partition_id(
table_id, partition_id))
return BigQueryTableMetadata(table_metadata)

def get_table_by_reference(self, reference):
return self.get_table_or_partition(project_id=reference.project_id,
dataset_id=reference.dataset_id,
table_id=reference.table_id,
partition_id=reference.partition_id)
return self.__get_table_or_partition(project_id=reference.project_id,
dataset_id=reference.dataset_id,
table_id=reference.table_id,
partition_id=reference.partition_id)

@cached(time=300)
def get_table_by_reference_cached(self, reference):
return self.get_table_or_partition(project_id=reference.project_id,
dataset_id=reference.dataset_id,
table_id=reference.table_id,
partition_id=reference.partition_id)
return self.__get_table_or_partition(project_id=reference.project_id,
dataset_id=reference.dataset_id,
table_id=reference.table_id,
partition_id=reference.partition_id)

@staticmethod
def get_table_id_with_partition_id(table_id, partition_id):
Expand Down
12 changes: 9 additions & 3 deletions src/restore/test/table_randomizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from commons.decorators.retry import retry
from src.big_query.big_query import BigQuery, RandomizationError
from src.table_reference import TableReference


class DoesNotMeetSampleCriteriaException(BaseException):
Expand Down Expand Up @@ -52,12 +53,17 @@ def __get_random_partition(self, table_reference):
partitions = self.big_query.list_table_partitions(table_reference.project_id,
table_reference.dataset_id,
table_reference.table_id)
random_partition = self.__get_random_item_of_the_list(partitions)

new_table_reference = TableReference(table_reference.project_id, table_reference.dataset_id, table_reference.table_id, random_partition)

return self.big_query.get_table_by_reference(new_table_reference)

def __get_random_item_of_the_list(self, partitions):
number_of_partitions = len(partitions)
random_partition = partitions[
random.randint(0, number_of_partitions - 1)]["partitionId"]
table_metadata = self.big_query.get_table_or_partition(
table_reference.project_id, table_reference.dataset_id, table_reference.table_id, random_partition)
return table_metadata
return random_partition

@staticmethod
def __has_been_modified_since_midnight(table_metadata):
Expand Down
40 changes: 30 additions & 10 deletions tests/backup/test_table_backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
from google.appengine.ext import testbed, ndb
from mock import patch, Mock

from src.big_query.big_query_table_metadata import BigQueryTableMetadata
from src.big_query.big_query import BigQuery
from src.backup.backup_process import BackupProcess
from src.backup.table_backup import TableBackup
from src.backup.table_partitions_backup_scheduler import \
Expand All @@ -12,7 +14,6 @@


@freeze_time("2017-04-04")
@patch('src.big_query.big_query.BigQuery.__init__', Mock(return_value=None))
class TestTableBackup(unittest.TestCase):

def setUp(self):
Expand All @@ -27,12 +28,13 @@ def setUp(self):
def tearDown(self):
self.testbed.deactivate()

@patch('src.big_query.big_query.BigQuery.get_table_or_partition.return_value.is_daily_partitioned.return_value', True) # nopep8 pylint: disable=C0301
@patch('src.big_query.big_query.BigQuery.get_table_or_partition.return_value.is_empty.return_value', False) # nopep8 pylint: disable=C0301
@patch('src.big_query.big_query.BigQuery.get_table_or_partition')

@patch.object(TablePartitionsBackupScheduler, 'start')
def test_that_partition_backups_are_scheduled_for_partitioned_table(
self, _, table_partitions_backup_scheduler):
@patch.object(BigQuery, 'get_table_by_reference', return_value=BigQueryTableMetadata(None))
@patch.object(BigQueryTableMetadata, 'is_daily_partitioned', return_value=True)
@patch.object(BigQueryTableMetadata, 'is_empty', return_value=False)
def test_that_partition_backups_are_scheduled_for_non_empty_partitioned_table(
self, _, _1, _2, table_partitions_backup_scheduler):
# given
table_reference = TableReference(project_id="test-project",
dataset_id="test-dataset",
Expand All @@ -45,11 +47,29 @@ def test_that_partition_backups_are_scheduled_for_partitioned_table(
# then
table_partitions_backup_scheduler.assert_called_once()

@patch('src.big_query.big_query.BigQuery.get_table_or_partition.return_value.is_daily_partitioned.return_value', False) # nopep8 pylint: disable=C0301
@patch('src.big_query.big_query.BigQuery.get_table_or_partition')
@patch.object(BigQuery, 'get_table_by_reference', return_value=BigQueryTableMetadata(None))
@patch.object(BigQueryTableMetadata, 'is_daily_partitioned', return_value=False)
@patch.object(BackupProcess, 'start')
def test_that_table_backup_is_scheduled_for_not_partitioned_table(
self, backup_start, _, _1):
# given
table_reference = TableReference(project_id="test-project",
dataset_id="test-dataset",
table_id="test-table",
partition_id=None)

# when
TableBackup.start(table_reference)

# then
backup_start.assert_called_once()

@patch.object(BigQuery, 'get_table_by_reference', return_value=BigQueryTableMetadata(None))
@patch.object(BigQueryTableMetadata, 'is_daily_partitioned', return_value=True)
@patch.object(BigQueryTableMetadata, 'is_empty', return_value=True)
@patch.object(BackupProcess, 'start')
def test_that_backup_is_created_for_not_partitioned_table(
self, backup_start, _):
def test_that_table_backup_is_scheduled_for_empty_partitioned_table(
self, backup_start, _, _1, _2):
# given
table_reference = TableReference(project_id="test-project",
dataset_id="test-dataset",
Expand Down
9 changes: 4 additions & 5 deletions tests/restore/test/test_table_randomizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,18 +33,17 @@ def tearDown(self):


@patch.object(BigQuery,'fetch_random_table')
@patch.object(BigQuery,'get_table_by_reference')
@patch.object(BigQueryTableMetadata, 'table_exists', return_value=True)
@patch.object(BigQueryTableMetadata, 'is_external_or_view_type', return_value=False)
@patch.object(BigQueryTableMetadata, 'is_empty', return_value=False)
@patch.object(BigQueryTableMetadata, 'get_last_modified_datetime')
@patch.object(BigQueryTableMetadata, 'is_daily_partitioned', return_value=True)
@patch.object(BigQuery, 'list_table_partitions')
@patch.object(BigQuery, 'get_table_or_partition')
@patch.object(BigQuery, 'get_table_by_reference')
@patch.object(random, 'randint', return_value=1)
def test_return_random_partition_when_table_is_partitioned(
self, _, get_table_or_partition, list_table_partitions, _1,
get_last_modified_datetime, _2, _3, _4, get_table_by_reference, fetch_random_table):
self, _, get_table_by_reference, list_table_partitions, _1,
get_last_modified_datetime, _2, _3, _4, fetch_random_table):
# given
get_table_by_reference.return_value= BigQueryTableMetadata(None)

Expand All @@ -63,7 +62,7 @@ def test_return_random_partition_when_table_is_partitioned(
under_test.get_random_table_metadata()
#
# # then
get_table_or_partition.assert_called_with('p1', 'd1', 't1', "20170909")
get_table_by_reference.assert_called_with(TableReference('p1', 'd1', 't1', "20170909"))

@patch.object(BigQuery, 'get_table_by_reference', return_value=BigQueryTableMetadata(None))
@patch.object(BigQueryTableMetadata, 'get_last_modified_datetime')
Expand Down

0 comments on commit f5ec4e5

Please sign in to comment.