Skip to content

Commit 7a00d68

Browse files
[PROD-1648] Bugfix describe volumes (#97)
* add chunking to describe volumes call * bump version number * update type hints
1 parent 08795f3 commit 7a00d68

File tree

2 files changed

+26
-15
lines changed

2 files changed

+26
-15
lines changed

sync/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
"""Library for leveraging the power of Sync"""
2-
__version__ = "1.0.2"
2+
__version__ = "1.0.3"
33

44
TIME_FORMAT = "%Y-%m-%dT%H:%M:%SZ"

sync/awsdatabricks.py

Lines changed: 25 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import logging
33
from pathlib import Path
44
from time import sleep
5-
from typing import List, Tuple
5+
from typing import Generator, List, Tuple
66
from urllib.parse import urlparse
77

88
import boto3 as boto
@@ -481,28 +481,39 @@ def _get_ebs_volumes_for_instances(
481481
) -> List[dict]:
482482
"""Get all ebs volumes associated with a list of instance reservations"""
483483

484+
def get_chunk(instance_ids: list[str], chunk_size: int) -> Generator[list[str]]:
485+
"""
486+
Splits the instance_ids list into chunks of size determined by chunk_size.
487+
This function exists to respect thresholds required by the call to
488+
ec2_client.describe_volumes below.
489+
"""
490+
for idx in range(0, len(instance_ids), chunk_size):
491+
yield instance_ids[idx : idx + chunk_size]
492+
484493
instance_ids = []
485494
if instances:
486495
for instance in instances:
487496
instance_ids.append(instance.get("InstanceId"))
488497

489498
volumes = []
490-
if instance_ids:
491-
filters = [
492-
{"Name": "tag:Vendor", "Values": ["Databricks"]},
493-
{"Name": "attachment.instance-id", "Values": instance_ids},
494-
]
495-
496-
response = ec2_client.describe_volumes(Filters=filters)
497-
volumes = response.get("Volumes", [])
498-
next_token = response.get("NextToken")
499+
MAX_CHUNK_SIZE = 199
499500

500-
while next_token:
501-
response = ec2_client.describe_volumes(Filters=filters, NextToken=next_token)
502-
volumes += response.get("Volumes", [])
501+
if instance_ids:
502+
for chunk in get_chunk(instance_ids, MAX_CHUNK_SIZE):
503+
filters = [
504+
{"Name": "tag:Vendor", "Values": ["Databricks"]},
505+
{"Name": "attachment.instance-id", "Values": chunk},
506+
]
507+
508+
response = ec2_client.describe_volumes(Filters=filters)
509+
volumes = response.get("Volumes", [])
503510
next_token = response.get("NextToken")
504511

512+
while next_token:
513+
response = ec2_client.describe_volumes(Filters=filters, NextToken=next_token)
514+
volumes += response.get("Volumes", [])
515+
next_token = response.get("NextToken")
516+
505517
num_vol = len(volumes)
506518
logger.info(f"Identified {num_vol} ebs volumes in cluster")
507-
508519
return volumes

0 commit comments

Comments
 (0)