Skip to content

Commit bce4ffc

Browse files
authored
[inventory][fix] Better detect and remove unused regions (#2222)
1 parent 7f41f80 commit bce4ffc

File tree

13 files changed

+166
-91
lines changed

13 files changed

+166
-91
lines changed

fixlib/fixlib/baseresources.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
from fixlib.json import from_json as _from_json, to_json as _to_json
1919
from fixlib.logger import log
2020
from fixlib.types import Json
21-
from fixlib.utils import make_valid_timestamp, utc_str
21+
from fixlib.utils import make_valid_timestamp, utc_str, utc
2222
from fixlib.basecategories import Category
2323

2424

@@ -369,7 +369,7 @@ def to_json(self) -> Json:
369369
return _to_json(self, strip_nulls=True)
370370

371371
def log(self, msg: str, data: Optional[Any] = None, exception: Optional[Exception] = None) -> None:
372-
now = datetime.utcnow().replace(tzinfo=timezone.utc)
372+
now = utc()
373373
log_entry = {
374374
"timestamp": now,
375375
"msg": str(msg),
@@ -418,23 +418,23 @@ def chksum(self) -> str:
418418

419419
@property
420420
def age(self) -> Optional[timedelta]:
421-
now = datetime.utcnow().replace(tzinfo=timezone.utc)
421+
now = utc()
422422
if self.ctime is not None:
423423
return now - self.ctime
424424
else:
425425
return None
426426

427427
@property
428428
def last_access(self) -> Optional[timedelta]:
429-
now = datetime.utcnow().replace(tzinfo=timezone.utc)
429+
now = utc()
430430
if self.atime is not None:
431431
return now - self.atime
432432
else:
433433
return None
434434

435435
@property
436436
def last_update(self) -> Optional[timedelta]:
437-
now = datetime.utcnow().replace(tzinfo=timezone.utc)
437+
now = utc()
438438
if self.mtime is not None:
439439
return now - self.mtime
440440
else:
@@ -894,6 +894,7 @@ class BaseRegion(PhantomBaseResource):
894894
long_name: Optional[str] = None
895895
latitude: Optional[float] = None
896896
longitude: Optional[float] = None
897+
region_in_use: Optional[bool] = field(default=None, metadata={"description": "Indicates if the region is in use."})
897898

898899
def _keys(self) -> Tuple[Any, ...]:
899900
if self._graph is None:

fixlib/fixlib/graph/__init__.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from enum import Enum
1010
from functools import lru_cache
1111
from time import time
12-
from typing import Dict, Iterator, List, Tuple, Optional, Union, Any, Type, TypeVar, Set
12+
from typing import Dict, Iterator, List, Tuple, Optional, Union, Any, Type, TypeVar, Set, Iterable
1313

1414
import networkx
1515
from attr import resolve_types
@@ -167,6 +167,13 @@ def add_node(self, node_for_adding: BaseResource, **attr: Any) -> None:
167167
# which stores it as a weakref.
168168
node_for_adding._graph = self
169169

170+
def remove_recursively(self, nodes: Iterable[Any]) -> None:
171+
remove_nodes = set()
172+
for node in nodes:
173+
remove_nodes.add(node)
174+
remove_nodes.update(self.successors(node))
175+
self.remove_nodes_from(remove_nodes)
176+
170177
def has_edge(
171178
self, src: BaseResource, dst: BaseResource, key: Optional[EdgeKey] = None, edge_type: Optional[EdgeType] = None
172179
) -> bool:

fixlib/fixlib/json_bender.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import json
44
import logging
55
from abc import ABC
6-
from datetime import datetime
6+
from datetime import datetime, timezone
77
from enum import Enum
88
from typing import Dict, Any, Type, Union, Optional, Callable, List
99

@@ -418,7 +418,7 @@ def execute(self, source: Any) -> Any:
418418
if isinstance(source, str):
419419
return datetime.strptime(source, self._format)
420420
elif isinstance(source, (int, float)):
421-
return datetime.utcfromtimestamp(source)
421+
return datetime.fromtimestamp(source, timezone.utc)
422422
else:
423423
return source
424424

@@ -435,7 +435,7 @@ def __init__(self, out_format: str = "%Y-%m-%dT%H:%M:%SZ", **kwargs: Any):
435435

436436
def execute(self, source: Any) -> Any:
437437
if isinstance(source, (int, float)):
438-
return datetime.utcfromtimestamp(source).strftime(self._out_format)
438+
return datetime.fromtimestamp(source, timezone.utc).strftime(self._out_format)
439439
else:
440440
return source
441441

@@ -582,7 +582,7 @@ def execute(self, source: Any) -> Any:
582582

583583
class SecondsFromEpochToDatetime(Bender):
584584
def execute(self, source: int) -> str:
585-
return utc_str(datetime.utcfromtimestamp(source))
585+
return utc_str(datetime.fromtimestamp(source, timezone.utc))
586586

587587

588588
EmptyToNone = EmptyToNoneBender()

plugins/aws/fix_plugin_aws/collector.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
from collections import defaultdict
21
import logging
2+
from collections import defaultdict
33
from concurrent.futures import Future, ThreadPoolExecutor
44
from datetime import datetime, timedelta, timezone
55
from typing import List, Type, Optional, ClassVar, Union, cast, Dict, Any
@@ -262,7 +262,7 @@ def get_last_run() -> Optional[datetime]:
262262
# wait for all futures to finish
263263
shared_queue.wait_for_submitted_work()
264264
# remove unused nodes
265-
self.remove_unused()
265+
self.remove_unused(global_builder)
266266
self.core_feedback.progress_done(self.account.dname, 1, 1, context=[self.cloud.id])
267267
self.error_accumulator.report_all(global_builder.core_feedback)
268268

@@ -334,10 +334,9 @@ def collect_and_set_metrics(
334334

335335
builder.submit_work("cloudwatch", collect_and_set_metrics, start, region, queries)
336336

337-
def remove_unused(self) -> None:
338-
remove_nodes = []
339-
340-
def rm_nodes(cls, ignore_kinds: Optional[Type[Any]] = None, check_pred: bool = True) -> None: # type: ignore
337+
def remove_unused(self, builder: GraphBuilder) -> None:
338+
def rm_leaf_nodes(cls: Any, ignore_kinds: Optional[Type[Any]] = None, check_pred: bool = True) -> None:
339+
remove_nodes = []
341340
for node in self.graph.nodes:
342341
if not isinstance(node, cls):
343342
continue
@@ -356,9 +355,10 @@ def rm_nodes(cls, ignore_kinds: Optional[Type[Any]] = None, check_pred: bool = T
356355
continue
357356
removed.add(node)
358357
self.graph.remove_node(node)
359-
remove_nodes.clear()
360358

361-
rm_nodes(bedrock.AwsBedrockFoundationModel, check_pred=False)
359+
rm_leaf_nodes(bedrock.AwsBedrockFoundationModel, check_pred=False)
360+
# remove regions that are not in use
361+
self.graph.remove_recursively(builder.nodes(AwsRegion, lambda r: r.region_in_use is False))
362362

363363
# TODO: move into separate AwsAccountSettings
364364
def update_account(self) -> None:

plugins/aws/fix_plugin_aws/resource/base.py

Lines changed: 35 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,24 +2,23 @@
22

33
import logging
44
import re
5-
from urllib.parse import quote_plus as urlquote
65
from abc import ABC
76
from concurrent.futures import Future
87
from datetime import datetime, timezone, timedelta
98
from functools import lru_cache
109
from typing import Any, Callable, ClassVar, Dict, Iterator, List, Optional, Type, TypeVar, Tuple
10+
from urllib.parse import quote_plus as urlquote
1111

12-
from math import ceil
13-
14-
from attr import evolve, field
12+
from attr import evolve
1513
from attrs import define
1614
from boto3.exceptions import Boto3Error
15+
from fixinventorydata.cloud import instances as cloud_instance_data, regions as cloud_region_data
16+
from math import ceil
1717

1818
from fix_plugin_aws.aws_client import AwsClient
1919
from fix_plugin_aws.configuration import AwsConfig
2020
from fix_plugin_aws.resource.pricing import AwsPricingPrice
2121
from fix_plugin_aws.utils import arn_partition
22-
from fixlib.utils import utc
2322
from fixlib.baseresources import (
2423
BaseAccount,
2524
BaseIamPrincipal,
@@ -40,7 +39,7 @@
4039
from fixlib.proc import set_thread_name
4140
from fixlib.threading import ExecutorQueue
4241
from fixlib.types import Json
43-
from fixinventorydata.cloud import instances as cloud_instance_data, regions as cloud_region_data
42+
from fixlib.utils import utc
4443

4544
log = logging.getLogger("fix.plugins.aws")
4645

@@ -362,7 +361,6 @@ class AwsRegion(BaseRegion, AwsResource):
362361
]
363362
}
364363
}
365-
region_in_use: Optional[bool] = field(default=None, metadata={"description": "Indicates if the region is in use."})
366364

367365
def __attrs_post_init__(self) -> None:
368366
super().__attrs_post_init__()
@@ -374,12 +372,30 @@ def __attrs_post_init__(self) -> None:
374372

375373
def complete_graph(self, builder: GraphBuilder, source: Json) -> None:
376374
count = 0
377-
# A region with less than 10 real resources is considered not in use.
375+
ignore_kinds = {
376+
"aws_athena_work_group",
377+
"aws_cloud_trail",
378+
"aws_ec2_internet_gateway",
379+
"aws_ec2_network_acl",
380+
"aws_ec2_security_group",
381+
"aws_ec2_subnet",
382+
"aws_ec2_route_table",
383+
}
384+
385+
def ignore_for_count(resource: BaseResource) -> bool:
386+
if isinstance(resource, PhantomBaseResource):
387+
return True
388+
if resource.kind == "aws_vpc" and getattr(resource, "vpc_is_default", False):
389+
return True
390+
if resource.kind in ignore_kinds:
391+
return True
392+
return False
393+
394+
# A region with less than 3 real resources is considered not in use.
378395
# AWS is creating a couple of resources in every region automatically.
379-
# The number 10 is chosen by looking into different empty regions.
380-
empty_region = 10
396+
empty_region = 3
381397
for succ in builder.graph.descendants(self):
382-
if not isinstance(succ, PhantomBaseResource):
398+
if not ignore_for_count(succ):
383399
count += 1
384400
if count > empty_region:
385401
break
@@ -487,11 +503,17 @@ def node(self, clazz: Optional[Type[AwsResourceType]] = None, **node: Any) -> Op
487503
return n # type: ignore
488504
return None
489505

490-
def nodes(self, clazz: Optional[Type[AwsResourceType]] = None, **node: Any) -> Iterator[AwsResourceType]:
506+
def nodes(
507+
self, clazz: Optional[Type[AwsResourceType]] = None, filter: Optional[Callable[[Any], bool]] = None, **node: Any
508+
) -> Iterator[AwsResourceType]:
491509
with self.graph_nodes_access.read_access:
492510
for n in self.graph:
493511
is_clazz = isinstance(n, clazz) if clazz else True
494-
if is_clazz and all(getattr(n, k, None) == v for k, v in node.items()):
512+
if (
513+
is_clazz
514+
and (filter(n) if filter else True)
515+
and all(getattr(n, k, None) == v for k, v in node.items())
516+
):
495517
yield n
496518

497519
def add_node(

plugins/aws/fix_plugin_aws/resource/sqs.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from datetime import datetime
1+
from datetime import datetime, timezone
22
from typing import ClassVar, Dict, List, Optional, Type, Any
33

44

@@ -51,8 +51,8 @@ class AwsSqsQueue(AwsResource, BaseQueue):
5151
mapping: ClassVar[Dict[str, Bender]] = {
5252
"id": S("QueueName"),
5353
"name": S("QueueName"),
54-
"ctime": S("CreatedTimestamp") >> AsInt() >> F(lambda x: utc_str(datetime.utcfromtimestamp(x))),
55-
"mtime": S("LastModifiedTimestamp") >> AsInt() >> F(lambda x: utc_str(datetime.utcfromtimestamp(x))),
54+
"ctime": S("CreatedTimestamp") >> AsInt() >> F(lambda x: utc_str(datetime.fromtimestamp(x, timezone.utc))),
55+
"mtime": S("LastModifiedTimestamp") >> AsInt() >> F(lambda x: utc_str(datetime.fromtimestamp(x, timezone.utc))),
5656
"arn": S("QueueArn"),
5757
"sqs_queue_url": S("QueueUrl"),
5858
"sqs_approximate_number_of_messages": S("ApproximateNumberOfMessages") >> AsInt(),

plugins/azure/fix_plugin_azure/collector.py

Lines changed: 30 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,16 @@
2323
resources as compute_resources,
2424
)
2525
from fix_plugin_azure.resource.containerservice import resources as aks_resources
26+
from fix_plugin_azure.resource.cosmosdb import (
27+
AzureCosmosDBLocation,
28+
resources as cosmosdb_resources,
29+
)
2630
from fix_plugin_azure.resource.keyvault import resources as keyvault_resources
31+
from fix_plugin_azure.resource.machinelearning import (
32+
AzureMachineLearningUsage,
33+
AzureMachineLearningVirtualMachineSize,
34+
resources as ml_resources,
35+
)
2736
from fix_plugin_azure.resource.microsoft_graph import (
2837
MicrosoftGraphOrganization,
2938
resources as graph_resources,
@@ -43,17 +52,8 @@
4352
)
4453
from fix_plugin_azure.resource.security import resources as security_resources
4554
from fix_plugin_azure.resource.sql_server import resources as sql_resources
46-
from fix_plugin_azure.resource.cosmosdb import (
47-
AzureCosmosDBLocation,
48-
resources as cosmosdb_resources,
49-
)
5055
from fix_plugin_azure.resource.storage import AzureStorageAccountUsage, AzureStorageSku, resources as storage_resources
5156
from fix_plugin_azure.resource.web import resources as web_resources
52-
from fix_plugin_azure.resource.machinelearning import (
53-
AzureMachineLearningUsage,
54-
AzureMachineLearningVirtualMachineSize,
55-
resources as ml_resources,
56-
)
5757
from fixlib.baseresources import Cloud, GraphRoot, BaseAccount, BaseRegion
5858
from fixlib.core.actions import CoreFeedback, ErrorAccumulator
5959
from fixlib.graph import Graph
@@ -100,6 +100,7 @@ def __init__(
100100
core_feedback: CoreFeedback,
101101
task_data: Optional[Json] = None,
102102
max_resources_per_account: Optional[int] = None,
103+
filter_unused_resources: bool = True,
103104
):
104105
self.config = config
105106
self.cloud = cloud
@@ -108,6 +109,7 @@ def __init__(
108109
self.core_feedback = core_feedback
109110
self.graph = Graph(root=account, max_nodes=max_resources_per_account)
110111
self.task_data = task_data
112+
self.filter_unused_resources = filter_unused_resources
111113

112114
def collect(self) -> None:
113115
with ThreadPoolExecutor(
@@ -165,13 +167,14 @@ def get_last_run() -> Optional[datetime]:
165167
queue.wait_for_submitted_work()
166168

167169
# post-process nodes
168-
self.remove_unused()
170+
if self.filter_unused_resources:
171+
self.remove_unused(builder)
169172
for node, data in list(self.graph.nodes(data=True)):
170173
if isinstance(node, MicrosoftResource):
171174
node.after_collect(builder, data.get("source", {}))
172175

173176
# delete unnecessary nodes after all work is completed
174-
self.after_collect()
177+
self.after_collect(builder)
175178
# report all accumulated errors
176179
error_accumulator.report_all(self.core_feedback)
177180
self.core_feedback.progress_done(self.account.id, 1, 1, context=[self.cloud.id])
@@ -205,10 +208,10 @@ def collect_with(self, builder: GraphBuilder, locations: Dict[str, BaseRegion])
205208
def locations(self, builder: GraphBuilder) -> Dict[str, BaseRegion]:
206209
pass
207210

208-
def remove_unused(self) -> None:
211+
def remove_unused(self, builder: GraphBuilder) -> None:
209212
pass
210213

211-
def after_collect(self) -> None:
214+
def after_collect(self, builder: GraphBuilder) -> None:
212215
pass
213216

214217

@@ -234,10 +237,10 @@ def collect_with(self, builder: GraphBuilder, locations: Dict[str, BaseRegion])
234237
self.collect_resource_list(location.safe_name, builder.with_location(location), regional_resources)
235238
processed_locations.add(location.safe_name)
236239

237-
def remove_unused(self) -> None:
240+
def remove_unused(self, builder: GraphBuilder) -> None:
238241
remove_nodes = []
239242

240-
def rm_nodes(cls, ignore_kinds: Optional[Type[Any]] = None, check_pred: bool = True) -> None: # type: ignore
243+
def rm_leaf_nodes(cls: Any, ignore_kinds: Optional[Type[Any]] = None, check_pred: bool = True) -> None:
241244
for node in self.graph.nodes:
242245
if not isinstance(node, cls):
243246
continue
@@ -263,19 +266,20 @@ def remove_usage_zero_value() -> None:
263266
remove_nodes.append(node)
264267
self._delete_nodes(remove_nodes)
265268

266-
rm_nodes(AzureComputeVirtualMachineSize, AzureLocation)
267-
rm_nodes(AzureNetworkExpressRoutePortsLocation, AzureSubscription)
268-
rm_nodes(AzureNetworkVirtualApplianceSku, AzureSubscription)
269-
rm_nodes(AzureComputeDiskType, AzureSubscription)
270-
rm_nodes(AzureMachineLearningVirtualMachineSize, AzureLocation)
271-
rm_nodes(AzureStorageSku, AzureLocation)
272-
rm_nodes(AzureMysqlServerType, AzureSubscription)
273-
rm_nodes(AzurePostgresqlServerType, AzureSubscription)
274-
rm_nodes(AzureCosmosDBLocation, AzureLocation, check_pred=False)
275-
rm_nodes(AzureLocation, check_pred=False)
269+
rm_leaf_nodes(AzureComputeVirtualMachineSize, AzureLocation)
270+
rm_leaf_nodes(AzureNetworkExpressRoutePortsLocation, AzureSubscription)
271+
rm_leaf_nodes(AzureNetworkVirtualApplianceSku, AzureSubscription)
272+
rm_leaf_nodes(AzureComputeDiskType, AzureSubscription)
273+
rm_leaf_nodes(AzureMachineLearningVirtualMachineSize, AzureLocation)
274+
rm_leaf_nodes(AzureStorageSku, AzureLocation)
275+
rm_leaf_nodes(AzureMysqlServerType, AzureSubscription)
276+
rm_leaf_nodes(AzurePostgresqlServerType, AzureSubscription)
277+
rm_leaf_nodes(AzureCosmosDBLocation, AzureLocation, check_pred=False)
278+
rm_leaf_nodes(AzureLocation, check_pred=False)
276279
remove_usage_zero_value()
280+
self.graph.remove_recursively(builder.nodes(AzureLocation, lambda r: r.region_in_use is False))
277281

278-
def after_collect(self) -> None:
282+
def after_collect(self, builder: GraphBuilder) -> None:
279283
# Filter unnecessary nodes such as AzureComputeDiskTypePricing
280284
nodes_to_remove = []
281285
node_types = (AzureComputeDiskTypePricing,)

0 commit comments

Comments
 (0)