Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Daily quota improvements #16

Merged
merged 5 commits into from
Sep 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .env.test
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,7 @@ MONGODB_URI=mongodb://localhost:27017/
DATABASE=test
CLIENT_HOST=http://test/
IMAGE_STORE_PATH=/tmp/
PUBLIC_IMAGE_FOLDER=http://test-image-host/images/
PUBLIC_IMAGE_FOLDER=http://test-image-host/images/
RESPONSE_FAILURE_RETRY_SECONDS=0
RESPONSE_429_RETRY_SECONDS=0
PROCESS_DUPLICATE_SUBTASK_POLL_INTERVAL=0
9 changes: 9 additions & 0 deletions app/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,12 @@
PUBLIC_IMAGE_FOLDER = os.environ.get("PUBLIC_IMAGE_FOLDER")

CLIENT_HOST = os.environ.get("CLIENT_HOST")

RESPONSE_FAILURE_RETRY_SECONDS = int(
os.environ.get("RESPONSE_FAILURE_RETRY_SECONDS", 1)
)
RESPONSE_429_RETRY_SECONDS = int(os.environ.get("RESPONSE_429_RETRY_SECONDS", 30))

PROCESS_DUPLICATE_SUBTASK_POLL_INTERVAL = int(
os.environ.get("PROCESS_DUPLICATE_SUBTASK_POLL_INTERVAL", 3)
)
4 changes: 2 additions & 2 deletions app/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def user_info(user_id):
def media_item():
return {
# "baseUrl": "https://lh3.googleusercontent.com/lr/AJUiC1xWhyltXJ-YvIFjY2oePjh8BSa-LaqZEwTXFtUwUIWubYgxBuK6elu33DtYSkPpuUc1uwHzEolcX2SgV1CXVCvP3LKnwWGeoa_SeIZXBwKy5Y40gdYHFPWLuhiCvhxIVdL7gio_fzKzt8Hpl4QyfESqhf2SBl1AT-0EL_CuIlDYtOsGRfS9rEB30DKXx8gMGx4PQ7wyd_EBvUQ_FkWAsOAQDbS5QjxBBujpKQxC8IyDPgKJdkW9VsIqIs-RejpoWi6tKHZJaK_-iwKyMzEvCEsqbnQ7DT3OqpcNNjZR0jj3eyzPA00a4m2e48hhpzYvlXlKFXjTUX7F8J4yxWNc087ESdtW6I9Ocv0HSUDETPIUlCv01lhDE8eNAn85E3_oVDM1HuFxdHH1jGaxvF85EweWE8vPVeMmfZEKJYLDy6vkPXqWk9EcWnqBPaKzEj25HVdMf7YNe82UlDRQ6lq0Q_c-uCqYy6btUzKuJvW7wEIljPrIO1GmONgNgvk8-qYC07vd44AUZtXhhK5M5AGu_LmEMYtUcSwYwXjl1El3mP_v9EZZTzQEzx9ezBdlavEG8UWequjCvOls3oiyFLOIEbLixHbbN1rxCScPoKZV7_1MmC3_xcQqSpjgWV3vMEFPDRl7ez0Ac6IT8uc_mOsXqZJtXPl751FUWrUEZ1KRWO3G5JVsZtbRuuUjNAk2Q_0sF9WaS685nYQb6zcrS2dcqq5hSGvK1YQ8EaON0dodUEPEcTiP4Tx7o0h1YB8RWNb2yjZV98XZZu-N1vFZOxcmp2wlhThKZ8RvUTdJNFn_ZX4M1bwBMuq_CrEL0MkhXRtAwwfDd79mZbWolhjd0HCza7pChhlHU74X4ulWLu3ZLZ41-_uGQu9GBbrcxFCgmzeZqONYhBqtR87HnkLF6B9_gWTzCrluE_uC6ybwetkNQFtC_3YZA0rsUie70EwNKQDKhk24B8AIzUOTSz3ICmeQkAt3AGLp_0SfsDzWz7rtI0_hRFWeqrL4JRjMjByaiZA",
"baseUrl": "https://mack-public.s3.amazonaws.com/google_photos_deduper/test_images/test-image-dup-1a.jpg?test",
"baseUrl": "https://google-photos-deduper-public.s3.amazonaws.com/test_images/test-image-dup-1a.jpg?test",
"filename": "PXL_20230710_022047570.jpg",
"id": "AE-vYs7L3AedDPgIeE301REI023URzShGzi4j_XtLBYZwUJ7xbWTqsUBLeP2MxGbKL8s06TdprvhyhQNg9dAHInYwHwB7EOOaA",
"mediaMetadata": {
Expand All @@ -67,5 +67,5 @@ def media_item():
},
"mimeType": "image/jpeg",
# "productUrl": "https://photos.google.com/lr/photo/AE-vYs7L3AedDPgIeE301REI023URzShGzi4j_XtLBYZwUJ7xbWTqsUBLeP2MxGbKL8s06TdprvhyhQNg9dAHInYwHwB7EOOaA",
"productUrl": "https://mack-public.s3.amazonaws.com/google_photos_deduper/test_images/test-image-1a.jpg",
"productUrl": "https://google-photos-deduper-public.s3.amazonaws.com/test_images/test-image-1a.jpg",
}
101 changes: 0 additions & 101 deletions app/lib/get_media_items_size_task.py

This file was deleted.

33 changes: 14 additions & 19 deletions app/lib/media_items_image_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,29 +23,24 @@ def store_image(self, media_item) -> str:
success = False
while not success:
try:
request = requests.get(url, timeout=5)
response = requests.get(url, timeout=5)
response.raise_for_status()
with open(path, "wb") as file:
file.write(request.content)
file.write(response.content)
success = True
except requests.exceptions.RequestException as error:
attempts -= 1
if (
isinstance(error, requests.exceptions.HTTPError)
and error.response.status_code == 429
):
logging.warn(
f"Received {error} getting media item size\n"
f"See https://developers.google.com/photos/library/guides/api-limits-quotas#general-quota-limits\n"
f"Sleeping 60s before retry..."
)
time.sleep(60)
else:
logging.warn(
f"Received {error} downloading image\n"
f"media_item: {media_item}\n"
f"url: {url}\n"
f"attempts left: {attempts}"
)
sleep_time = app.config.RESPONSE_FAILURE_RETRY_SECONDS
if error.response is not None and error.response.status_code == 429:
sleep_time = app.config.RESPONSE_429_RETRY_SECONDS
logging.warning(
f"Received {error} downloading image\n"
f"media_item: {media_item}\n"
f"url: {url}\n"
f"attempts left: {attempts}\n"
f"sleeping for {sleep_time} seconds before retrying"
)
time.sleep(sleep_time)
if attempts <= 0:
raise error

Expand Down
107 changes: 107 additions & 0 deletions app/lib/media_items_image_store_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
import os
import shutil
from unittest.mock import DEFAULT
import pytest
import requests
from app.lib.media_items_image_store import MediaItemsImageStore


# TODO: Use remote image URLs only when INTEGRATION_TEST env is true
remote_images_folder_path = (
"https://google-photos-deduper-public.s3.amazonaws.com/test_images"
)
image_url = f"{remote_images_folder_path}/test-image-2.jpg?test"

local_images_folder_path = "app/test/images"
local_image_path = f"{local_images_folder_path}/test-image-2.jpg"


@pytest.fixture
def image_store():
return MediaItemsImageStore()


@pytest.fixture
def storable_media_item(media_item):
return media_item | {"id": "image1", "baseUrl": image_url}


expected_path = "/tmp/image1-250.jpg"


@pytest.fixture(autouse=True)
def run_before_and_after_tests():
"""Fixture to execute asserts before and after a test is run"""
if os.path.isfile(expected_path):
os.remove(expected_path)
yield


def test_store_image(storable_media_item, image_store):
"""It should download the image using the baseUrl and return the storage filename."""
result = image_store.store_image(storable_media_item)

# It should return the storage filename
assert result == f"{storable_media_item['id']}-250.jpg"
# It should download the image
assert os.path.isfile(expected_path)
with open(expected_path, "rb") as downloaded_file:
downloaded_file_content = downloaded_file.read()
with open(local_image_path, "rb") as expected_file:
expected_file_content = expected_file.read()
assert downloaded_file_content == expected_file_content


def test_store_image__file_exists(mocker, storable_media_item, image_store):
"""If the file already exists, it should not download it again."""
shutil.copyfile(local_image_path, expected_path)
p = mocker.patch.object(requests, "get")

image_store.store_image(storable_media_item)

p.assert_not_called()


def test_store_image__handled_request_exceptions(
mocker, storable_media_item, image_store
):
"""It should retry up to 3 times if it receives a request exception."""
p = mocker.patch.object(requests, "get")
mock_429_response = requests.models.Response()
mock_429_response.status_code = 429
timeout_error = requests.exceptions.Timeout()
mock_successful_response = requests.models.Response()
mock_successful_response.status_code = 200
mock_successful_response._content = b"test"
p.side_effect = [mock_429_response, timeout_error, mock_successful_response]

result = image_store.store_image(storable_media_item)

# It should return the storage filename
assert result == f"{storable_media_item['id']}-250.jpg"
# It should download the image
assert os.path.isfile(expected_path)
with open(expected_path, "rb") as downloaded_file:
assert downloaded_file.read() == b"test"


def test_store_image__raised_request_exceptions(
mocker, storable_media_item, image_store
):
"""It should raise the error if a 4th exception is received"""
p = mocker.patch.object(requests, "get")
mock_429_response = requests.models.Response()
mock_429_response.status_code = 429
timeout_error = requests.exceptions.Timeout()
p.side_effect = [timeout_error, timeout_error, mock_429_response]

with pytest.raises(requests.exceptions.HTTPError) as error:
image_store.store_image(storable_media_item)
assert error == mock_429_response


def test_get_storage_path(mocker, storable_media_item, image_store):
"""It should return the correct storage path for a given storage filename"""
result = image_store.get_storage_path("test.jpg")

assert result == "/tmp/test.jpg"
Loading