diff --git a/koku/api/migrations/0061_alter_providerinfrastructuremap_unique_together.py b/koku/api/migrations/0061_alter_providerinfrastructuremap_unique_together.py new file mode 100644 index 0000000000..cc38a56e5f --- /dev/null +++ b/koku/api/migrations/0061_alter_providerinfrastructuremap_unique_together.py @@ -0,0 +1,16 @@ +# Generated by Django 3.2.22 on 2023-12-05 14:38 +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ("api", "0060_provider_polling_timestamp"), + ] + + operations = [ + migrations.AlterUniqueTogether( + name="providerinfrastructuremap", + unique_together={("infrastructure_type", "infrastructure_provider")}, + ), + ] diff --git a/koku/api/provider/models.py b/koku/api/provider/models.py index 82a856ff1a..4670ef8342 100644 --- a/koku/api/provider/models.py +++ b/koku/api/provider/models.py @@ -568,5 +568,8 @@ class ProviderInfrastructureMap(models.Model): associated provider the cluster is installed on. """ + class Meta: + unique_together = ("infrastructure_type", "infrastructure_provider") + infrastructure_type = models.CharField(max_length=50, choices=Provider.CLOUD_PROVIDER_CHOICES, blank=False) infrastructure_provider = models.ForeignKey("Provider", on_delete=models.CASCADE) diff --git a/koku/masu/database/trino_sql/azure/reporting_ocpinfrastructure_provider_map.sql b/koku/masu/database/trino_sql/azure/reporting_ocpinfrastructure_provider_map.sql index 1f84d03833..fd5c2179bf 100644 --- a/koku/masu/database/trino_sql/azure/reporting_ocpinfrastructure_provider_map.sql +++ b/koku/masu/database/trino_sql/azure/reporting_ocpinfrastructure_provider_map.sql @@ -1,6 +1,6 @@ WITH cte_azure_instances AS ( - SELECT DISTINCT split_part(coalesce(azure.resourceid, azure.instanceid), '/', 9) as instance, + SELECT DISTINCT split_part(coalesce(nullif(azure.resourceid, ''), azure.instanceid), '/', 9) as instance, azure.source FROM hive.{{schema | sqlsafe}}.azure_line_items AS azure WHERE coalesce(azure.date, azure.usagedatetime) >= {{start_date}} diff --git a/koku/masu/database/trino_sql/reporting_azurecostentrylineitem_daily_summary.sql b/koku/masu/database/trino_sql/reporting_azurecostentrylineitem_daily_summary.sql index b4a23b379e..a247d158cc 100644 --- a/koku/masu/database/trino_sql/reporting_azurecostentrylineitem_daily_summary.sql +++ b/koku/masu/database/trino_sql/reporting_azurecostentrylineitem_daily_summary.sql @@ -21,17 +21,17 @@ INSERT INTO postgres.{{schema | sqlsafe}}.reporting_azurecostentrylineitem_daily WITH cte_line_items AS ( SELECT date(coalesce(date, usagedatetime)) as usage_date, INTEGER '{{bill_id | sqlsafe}}' as cost_entry_bill_id, - coalesce(subscriptionid, subscriptionguid) as subscription_guid, + coalesce(nullif(subscriptionid, ''), subscriptionguid) as subscription_guid, resourcelocation as resource_location, - coalesce(servicename, metercategory) as service_name, + coalesce(nullif(servicename, ''), metercategory) as service_name, json_extract_scalar(json_parse(additionalinfo), '$.ServiceType') as instance_type, - cast(coalesce(quantity, usagequantity) as DECIMAL(24,9)) as usage_quantity, - cast(coalesce(costinbillingcurrency, pretaxcost) as DECIMAL(24,9)) as pretax_cost, - coalesce(billingcurrencycode, currency, billingcurrency) as currency, + cast(coalesce(nullif(quantity, 0), usagequantity) as DECIMAL(24,9)) as usage_quantity, + cast(coalesce(nullif(costinbillingcurrency, 0), pretaxcost) as DECIMAL(24,9)) as pretax_cost, + coalesce(nullif(billingcurrencycode, ''), nullif(currency, ''), billingcurrency) as currency, json_parse(tags) as tags, - coalesce(resourceid, instanceid) as instance_id, + coalesce(nullif(resourceid, ''), instanceid) as instance_id, cast(source as UUID) as source_uuid, - coalesce(subscriptionname, subscriptionid, subscriptionguid) as subscription_name, + coalesce(nullif(subscriptionname, ''), nullif(subscriptionid, ''), subscriptionguid) as subscription_name, CASE WHEN regexp_like(split_part(unitofmeasure, ' ', 1), '^\d+(\.\d+)?$') AND NOT (unitofmeasure = '100 Hours' AND metercategory='Virtual Machines') AND NOT split_part(unitofmeasure, ' ', 2) = '' THEN cast(split_part(unitofmeasure, ' ', 1) as INTEGER) diff --git a/koku/masu/database/trino_sql/reporting_ocpazurecostlineitem_daily_summary.sql b/koku/masu/database/trino_sql/reporting_ocpazurecostlineitem_daily_summary.sql index f78a878271..ec393ec7fd 100644 --- a/koku/masu/database/trino_sql/reporting_ocpazurecostlineitem_daily_summary.sql +++ b/koku/masu/database/trino_sql/reporting_ocpazurecostlineitem_daily_summary.sql @@ -149,10 +149,10 @@ INSERT INTO hive.{{schema | sqlsafe}}.azure_openshift_daily_resource_matched_tem ) SELECT cast(uuid() as varchar) as uuid, coalesce(azure.date, azure.usagedatetime) as usage_start, - split_part(coalesce(resourceid, instanceid), '/', 9) as resource_id, - coalesce(servicename, metercategory) as service_name, + split_part(coalesce(nullif(resourceid, ''), instanceid), '/', 9) as resource_id, + coalesce(nullif(servicename, ''), metercategory) as service_name, max(json_extract_scalar(json_parse(azure.additionalinfo), '$.ServiceType')) as instance_type, - coalesce(azure.subscriptionid, azure.subscriptionguid) as subscription_guid, + coalesce(nullif(azure.subscriptionid, ''), azure.subscriptionguid) as subscription_guid, azure.resourcelocation as resource_location, max(CASE WHEN split_part(unitofmeasure, ' ', 2) = 'Hours' @@ -163,9 +163,9 @@ SELECT cast(uuid() as varchar) as uuid, THEN split_part(unitofmeasure, ' ', 2) ELSE unitofmeasure END) as unit_of_measure, - sum(coalesce(azure.quantity, azure.usagequantity)) as usage_quantity, - coalesce(azure.billingcurrencycode, azure.currency) as currency, - sum(coalesce(azure.costinbillingcurrency, azure.pretaxcost)) as pretax_cost, + sum(coalesce(nullif(azure.quantity, 0), azure.usagequantity)) as usage_quantity, + coalesce(nullif(azure.billingcurrencycode, ''), azure.currency) as currency, + sum(coalesce(nullif(azure.costinbillingcurrency, 0), azure.pretaxcost)) as pretax_cost, azure.tags, max(azure.resource_id_matched) as resource_id_matched, {{ocp_source_uuid}} as ocp_source, @@ -179,11 +179,11 @@ WHERE azure.source = {{azure_source_uuid}} AND coalesce(azure.date, azure.usagedatetime) < date_add('day', 1, {{end_date}}) AND azure.resource_id_matched = TRUE GROUP BY coalesce(azure.date, azure.usagedatetime), - split_part(coalesce(resourceid, instanceid), '/', 9), - coalesce(servicename, metercategory), - coalesce(subscriptionid, subscriptionguid), + split_part(coalesce(nullif(resourceid, ''), instanceid), '/', 9), + coalesce(nullif(servicename, ''), metercategory), + coalesce(nullif(subscriptionid, ''), subscriptionguid), azure.resourcelocation, - coalesce(azure.billingcurrencycode, azure.currency), + coalesce(nullif(azure.billingcurrencycode, ''), azure.currency), azure.tags ; @@ -219,9 +219,9 @@ WITH cte_enabled_tag_keys AS ( SELECT cast(uuid() as varchar) as uuid, coalesce(azure.date, azure.usagedatetime) as usage_start, split_part(coalesce(resourceid, instanceid), '/', 9) as resource_id, - coalesce(servicename, metercategory) as service_name, + coalesce(nullif(servicename, ''), metercategory) as service_name, max(json_extract_scalar(json_parse(azure.additionalinfo), '$.ServiceType')) as instance_type, - coalesce(azure.subscriptionid, azure.subscriptionguid) as subscription_guid, + coalesce(nullif(azure.subscriptionid, ''), azure.subscriptionguid) as subscription_guid, azure.resourcelocation as resource_location, max(CASE WHEN split_part(unitofmeasure, ' ', 2) = 'Hours' @@ -232,9 +232,9 @@ SELECT cast(uuid() as varchar) as uuid, THEN split_part(unitofmeasure, ' ', 2) ELSE unitofmeasure END) as unit_of_measure, - sum(coalesce(azure.quantity, azure.usagequantity)) as usage_quantity, - coalesce(azure.billingcurrencycode, azure.currency) as currency, - sum(coalesce(azure.costinbillingcurrency, azure.pretaxcost)) as pretax_cost, + sum(coalesce(nullif(azure.quantity, 0), azure.usagequantity)) as usage_quantity, + coalesce(nullif(azure.billingcurrencycode, ''), azure.currency) as currency, + sum(coalesce(nullif(azure.costinbillingcurrency, 0), azure.pretaxcost)) as pretax_cost, json_format( cast( map_filter( @@ -257,10 +257,10 @@ WHERE azure.source = {{azure_source_uuid}} AND (azure.resource_id_matched = FALSE OR azure.resource_id_matched IS NULL) GROUP BY coalesce(azure.date, azure.usagedatetime), split_part(coalesce(resourceid, instanceid), '/', 9), - coalesce(servicename, metercategory), - coalesce(subscriptionid, subscriptionguid), + coalesce(nullif(servicename, ''), metercategory), + coalesce(nullif(subscriptionid, ''), subscriptionguid), azure.resourcelocation, - coalesce(azure.billingcurrencycode, azure.currency), + coalesce(nullif(azure.billingcurrencycode, ''), azure.currency), 12, -- tags azure.matched_tag ; diff --git a/koku/masu/external/downloader/aws/aws_report_downloader.py b/koku/masu/external/downloader/aws/aws_report_downloader.py index db05e90822..0b90cfbade 100644 --- a/koku/masu/external/downloader/aws/aws_report_downloader.py +++ b/koku/masu/external/downloader/aws/aws_report_downloader.py @@ -118,7 +118,7 @@ def create_daily_archives( local_file, chunksize=settings.PARQUET_PROCESSING_BATCH_SIZE, usecols=lambda x: x in use_cols, - dtype="str", + dtype=pd.StringDtype(storage="pyarrow"), ) as reader: for i, data_frame in enumerate(reader): if data_frame.empty: diff --git a/koku/masu/external/downloader/azure/azure_report_downloader.py b/koku/masu/external/downloader/azure/azure_report_downloader.py index daa16ea395..5fbb425272 100644 --- a/koku/masu/external/downloader/azure/azure_report_downloader.py +++ b/koku/masu/external/downloader/azure/azure_report_downloader.py @@ -109,7 +109,10 @@ def create_daily_archives( {"UsageDateTime", "Date", "date", "usagedatetime"} )[0] with pd.read_csv( - local_file, chunksize=settings.PARQUET_PROCESSING_BATCH_SIZE, parse_dates=[time_interval], dtype="str" + local_file, + chunksize=settings.PARQUET_PROCESSING_BATCH_SIZE, + parse_dates=[time_interval], + dtype=pd.StringDtype(storage="pyarrow"), ) as reader: for i, data_frame in enumerate(reader): if data_frame.empty: diff --git a/koku/masu/external/downloader/gcp/gcp_report_downloader.py b/koku/masu/external/downloader/gcp/gcp_report_downloader.py index 0c461a1111..ce6576ffc1 100644 --- a/koku/masu/external/downloader/gcp/gcp_report_downloader.py +++ b/koku/masu/external/downloader/gcp/gcp_report_downloader.py @@ -52,7 +52,7 @@ class GCPReportDownloaderError(Exception): def pd_read_csv(local_file_path): try: - return pd.read_csv(local_file_path, dtype="str") + return pd.read_csv(local_file_path, dtype=pd.StringDtype(storage="pyarrow")) except Exception as error: LOG.error(log_json(msg="file could not be parsed", file_path=local_file_path), exc_info=error) raise GCPReportDownloaderError(error) diff --git a/koku/masu/external/downloader/oci/oci_report_downloader.py b/koku/masu/external/downloader/oci/oci_report_downloader.py index b916ef3e4a..5e123e5eb5 100644 --- a/koku/masu/external/downloader/oci/oci_report_downloader.py +++ b/koku/masu/external/downloader/oci/oci_report_downloader.py @@ -40,7 +40,7 @@ def divide_csv_monthly(file_path, filename): directory = os.path.dirname(file_path) try: - data_frame = pd.read_csv(file_path, dtype="str") + data_frame = pd.read_csv(file_path, dtype=pd.StringDtype(storage="pyarrow")) except Exception as error: LOG.error(f"File {file_path} could not be parsed. Reason: {error}") raise error diff --git a/koku/masu/external/kafka_msg_handler.py b/koku/masu/external/kafka_msg_handler.py index 3ea8b07a34..fa1ef41052 100644 --- a/koku/masu/external/kafka_msg_handler.py +++ b/koku/masu/external/kafka_msg_handler.py @@ -75,7 +75,7 @@ def divide_csv_daily(file_path: os.PathLike, manifest_id: int): daily_files = [] try: - data_frame = pd.read_csv(file_path, dtype="str") + data_frame = pd.read_csv(file_path, dtype=pd.StringDtype(storage="pyarrow")) except Exception as error: LOG.error(f"File {file_path} could not be parsed. Reason: {str(error)}") raise error diff --git a/koku/masu/test/util/aws/test_common.py b/koku/masu/test/util/aws/test_common.py index 4a655f8342..3213e69f31 100644 --- a/koku/masu/test/util/aws/test_common.py +++ b/koku/masu/test/util/aws/test_common.py @@ -13,7 +13,6 @@ from uuid import uuid4 import boto3 -import numpy as np import pandas as pd from botocore.exceptions import ClientError from dateutil.relativedelta import relativedelta @@ -755,7 +754,7 @@ def test_match_openshift_labels_with_nan_resources(self): matched_tags = [{"key": "value"}] data = [ - {"lineitem_resourceid": np.nan, "lineitem_unblendedcost": 1, "resourcetags": '{"key": "value"}'}, + {"lineitem_resourceid": "", "lineitem_unblendedcost": 1, "resourcetags": '{"key": "value"}'}, ] df = pd.DataFrame(data) @@ -779,7 +778,7 @@ def test_match_openshift_resource_with_nan_labels(self): matched_tags = [{"key": "value"}] data = [ - {"lineitem_resourceid": "id1", "lineitem_unblendedcost": 1, "resourcetags": np.nan}, + {"lineitem_resourceid": "id1", "lineitem_unblendedcost": 1, "resourcetags": ""}, ] df = pd.DataFrame(data) diff --git a/koku/masu/test/util/azure/test_azure_post_processor.py b/koku/masu/test/util/azure/test_azure_post_processor.py index 200a2d7cc1..b6a23b8e09 100644 --- a/koku/masu/test/util/azure/test_azure_post_processor.py +++ b/koku/masu/test/util/azure/test_azure_post_processor.py @@ -16,7 +16,7 @@ from masu.util.azure.azure_post_processor import AzurePostProcessor from masu.util.azure.common import INGRESS_REQUIRED_COLUMNS from reporting.provider.all.models import EnabledTagKeys -from reporting.provider.azure.models import TRINO_COLUMNS +from reporting.provider.azure.models import TRINO_REQUIRED_COLUMNS class TestAzurePostProcessor(MasuTestCase): @@ -44,9 +44,9 @@ def test_azure_process_dataframe(self): result, _ = self.post_processor.process_dataframe(df) columns = list(result) expected_columns = sorted( - col.replace("-", "_").replace("/", "_").replace(":", "_").lower() for col in TRINO_COLUMNS + col.replace("-", "_").replace("/", "_").replace(":", "_").lower() for col in TRINO_REQUIRED_COLUMNS ) - self.assertEqual(columns, expected_columns) + self.assertEqual(sorted(columns), sorted(expected_columns)) def test_azure_date_converter(self): """Test that we convert the new Azure date format.""" diff --git a/koku/masu/test/util/azure/test_common.py b/koku/masu/test/util/azure/test_common.py index f8c94e0633..521498aa31 100644 --- a/koku/masu/test/util/azure/test_common.py +++ b/koku/masu/test/util/azure/test_common.py @@ -3,7 +3,6 @@ # Copyright 2021 Red Hat Inc. # SPDX-License-Identifier: Apache-2.0 # -import numpy as np import pandas as pd from django_tenants.utils import schema_context @@ -116,9 +115,9 @@ def test_match_openshift_resources_and_labels_resource_nan(self): ] matched_tags = [] data = [ - {"resourceid": np.nan, "instanceid": "id1", "pretaxcost": 1, "tags": '{"key": "value"}'}, - {"resourceid": np.nan, "instanceid": "id2", "pretaxcost": 1, "tags": '{"key": "other_value"}'}, - {"resourceid": np.nan, "instanceid": "id3", "pretaxcost": 1, "tags": '{"keyz": "value"}'}, + {"resourceid": "", "instanceid": "id1", "pretaxcost": 1, "tags": '{"key": "value"}'}, + {"resourceid": "", "instanceid": "id2", "pretaxcost": 1, "tags": '{"key": "other_value"}'}, + {"resourceid": "", "instanceid": "id3", "pretaxcost": 1, "tags": '{"keyz": "value"}'}, ] df = pd.DataFrame(data) @@ -148,7 +147,7 @@ def test_match_openshift_resource_with_nan_labels(self): matched_tags = [{"key": "value"}] data = [ - {"resourceid": "id1", "pretaxcost": 1, "tags": np.nan}, + {"resourceid": "id1", "pretaxcost": 1, "tags": ""}, ] df = pd.DataFrame(data) diff --git a/koku/masu/util/aws/common.py b/koku/masu/util/aws/common.py index 50839cb6bc..e4ba1f8c2d 100644 --- a/koku/masu/util/aws/common.py +++ b/koku/masu/util/aws/common.py @@ -880,14 +880,14 @@ def match_openshift_resources_and_labels(data_frame, cluster_topologies, matched resource_ids = tuple(resource_ids) data_frame["resource_id_matched"] = False resource_id_df = data_frame["lineitem_resourceid"] - if not resource_id_df.isna().values.all(): + if not resource_id_df.eq("").all(): LOG.info("Matching OpenShift on AWS by resource ID.") resource_id_matched = resource_id_df.str.endswith(resource_ids) data_frame["resource_id_matched"] = resource_id_matched data_frame["special_case_tag_matched"] = False tags = data_frame["resourcetags"] - if not tags.isna().values.all(): + if not tags.eq("").all(): tags = tags.str.lower() LOG.info("Matching OpenShift on AWS by tags.") special_case_tag_matched = tags.str.contains( @@ -903,7 +903,7 @@ def match_openshift_resources_and_labels(data_frame, cluster_topologies, matched tag_values.extend(list(tag.values())) any_tag_matched = None - if not tags.isna().values.all(): + if not tags.eq("").all(): tag_matched = tags.str.contains("|".join(tag_keys)) & tags.str.contains("|".join(tag_values)) data_frame["tag_matched"] = tag_matched any_tag_matched = tag_matched.any() diff --git a/koku/masu/util/azure/azure_post_processor.py b/koku/masu/util/azure/azure_post_processor.py index 8323f7f5ee..ecc8ddf441 100644 --- a/koku/masu/util/azure/azure_post_processor.py +++ b/koku/masu/util/azure/azure_post_processor.py @@ -1,4 +1,5 @@ import json +import logging import ciso8601 import pandas @@ -10,7 +11,9 @@ from masu.util.common import populate_enabled_tag_rows_with_limit from masu.util.common import safe_float from masu.util.common import strip_characters_from_column_name -from reporting.provider.azure.models import TRINO_COLUMNS +from reporting.provider.azure.models import TRINO_REQUIRED_COLUMNS + +LOG = logging.getLogger(__name__) def azure_json_converter(tag_str): @@ -101,11 +104,9 @@ def process_dataframe(self, data_frame): data_frame = data_frame.rename(columns=column_name_map) - columns = set(data_frame) - columns = set(TRINO_COLUMNS).union(columns) - columns = sorted(columns) - - data_frame = data_frame.reindex(columns=columns) + missing = set(TRINO_REQUIRED_COLUMNS).difference(data_frame) + to_add = {k: TRINO_REQUIRED_COLUMNS[k] for k in missing} + data_frame = data_frame.assign(**to_add) unique_tags = set() for tags_json in data_frame["tags"].values: diff --git a/koku/masu/util/azure/common.py b/koku/masu/util/azure/common.py index 347968aba1..9ea096e41c 100644 --- a/koku/masu/util/azure/common.py +++ b/koku/masu/util/azure/common.py @@ -181,17 +181,17 @@ def match_openshift_resources_and_labels(data_frame, cluster_topologies, matched matchable_resources = list(nodes) + list(volumes) data_frame["resource_id_matched"] = False resource_id_df = data_frame["resourceid"] - if resource_id_df.isna().values.all(): + if resource_id_df.eq("").all(): resource_id_df = data_frame["instanceid"] - if not resource_id_df.isna().values.all(): + if not resource_id_df.eq("").all(): LOG.info("Matching OpenShift on Azure by resource ID.") resource_id_matched = resource_id_df.str.contains("|".join(matchable_resources)) data_frame["resource_id_matched"] = resource_id_matched data_frame["special_case_tag_matched"] = False tags = data_frame["tags"] - if not tags.isna().values.all(): + if not tags.eq("").all(): tags = tags.str.lower() LOG.info("Matching OpenShift on Azure by tags.") special_case_tag_matched = tags.str.contains( @@ -207,7 +207,7 @@ def match_openshift_resources_and_labels(data_frame, cluster_topologies, matched tag_values.extend(list(tag.values())) any_tag_matched = None - if not tags.isna().values.all(): + if not tags.eq("").all(): tag_matched = tags.str.contains("|".join(tag_keys)) & tags.str.contains("|".join(tag_values)) data_frame["tag_matched"] = tag_matched any_tag_matched = tag_matched.any() diff --git a/koku/masu/util/ocp/ocp_post_processor.py b/koku/masu/util/ocp/ocp_post_processor.py index 4d100ffb6a..93c22af7c7 100644 --- a/koku/masu/util/ocp/ocp_post_processor.py +++ b/koku/masu/util/ocp/ocp_post_processor.py @@ -146,7 +146,7 @@ def _generate_daily_data(self, data_frame): new_cols = report.get("new_required_columns") for col in new_cols: if col not in daily_data_frame: - daily_data_frame[col] = None + daily_data_frame[col] = pd.Series(dtype=pd.StringDtype(storage="pyarrow")) return daily_data_frame diff --git a/koku/reporting/provider/azure/models.py b/koku/reporting/provider/azure/models.py index 55444493dc..8099ab302b 100644 --- a/koku/reporting/provider/azure/models.py +++ b/koku/reporting/provider/azure/models.py @@ -5,6 +5,7 @@ """Models for Azure cost and usage entry tables.""" from uuid import uuid4 +import pandas as pd from django.contrib.postgres.fields import ArrayField from django.db import models from django.db.models import JSONField @@ -14,71 +15,71 @@ TRINO_LINE_ITEM_DAILY_TABLE = TRINO_LINE_ITEM_TABLE TRINO_OCP_ON_AZURE_DAILY_TABLE = "azure_openshift_daily" -TRINO_COLUMNS = [ - "billingperiodstartdate", - "billingperiodenddate", - "usagedatetime", - "date", - "accountname", - "accountownerid", - "additionalinfo", - "availabilityzone", - "billingaccountid", - "billingaccountname", - "billingcurrencycode", - "billingcurrency", - "billingprofileid", - "billingprofilename", - "chargetype", - "consumedservice", - "costcenter", - "costinbillingcurrency", - "currency", - "effectiveprice", - "frequency", - "instanceid", - "invoicesectionid", - "invoicesectionname", - "isazurecrediteligible", - "metercategory", - "meterid", - "metername", - "meterregion", - "metersubcategory", - "offerid", - "partnumber", - "paygprice", - "planname", - "pretaxcost", - "pricingmodel", - "productname", - "productorderid", - "productordername", - "publishername", - "publishertype", - "quantity", - "reservationid", - "reservationname", - "resourcegroup", - "resourceid", - "resourcelocation", - "resourcename", - "resourcerate", - "resourcetype", - "servicefamily", - "serviceinfo1", - "serviceinfo2", - "servicename", - "servicetier", - "subscriptionguid", - "subscriptionid", - "subscriptionname", - "tags", - "term", - "unitofmeasure", - "unitprice", - "usagequantity", -] +TRINO_REQUIRED_COLUMNS = { + "billingperiodstartdate": pd.NaT, + "billingperiodenddate": pd.NaT, + "usagedatetime": pd.NaT, + "date": pd.NaT, + "accountname": "", + "accountownerid": "", + "additionalinfo": "", + "availabilityzone": "", + "billingaccountid": "", + "billingaccountname": "", + "billingcurrencycode": "", + "billingcurrency": "", + "billingprofileid": "", + "billingprofilename": "", + "chargetype": "", + "consumedservice": "", + "costcenter": "", + "costinbillingcurrency": 0.0, + "currency": "", + "effectiveprice": 0.0, + "frequency": "", + "instanceid": "", + "invoicesectionid": "", + "invoicesectionname": "", + "isazurecrediteligible": "", + "metercategory": "", + "meterid": "", + "metername": "", + "meterregion": "", + "metersubcategory": "", + "offerid": "", + "partnumber": "", + "paygprice": 0.0, + "planname": "", + "pretaxcost": 0.0, + "pricingmodel": "", + "productname": "", + "productorderid": "", + "productordername": "", + "publishername": "", + "publishertype": "", + "quantity": 0.0, + "reservationid": "", + "reservationname": "", + "resourcegroup": "", + "resourceid": "", + "resourcelocation": "", + "resourcename": "", + "resourcerate": 0.0, + "resourcetype": "", + "servicefamily": "", + "serviceinfo1": "", + "serviceinfo2": "", + "servicename": "", + "servicetier": "", + "subscriptionguid": "", + "subscriptionid": "", + "subscriptionname": "", + "tags": "", + "term": "", + "unitofmeasure": "", + "unitprice": 0.0, + "usagequantity": 0.0, +} UI_SUMMARY_TABLES = ( "reporting_azure_compute_summary_p",