Skip to content

Commit

Permalink
Merge pull request #117 from mheine3000/buildstock_fix_branch
Browse files Browse the repository at this point in the history
Buildstock fix branch
  • Loading branch information
jgu2 committed Apr 17, 2024
2 parents 92449d5 + 3bd78ac commit 53e21af
Show file tree
Hide file tree
Showing 7 changed files with 68 additions and 16 deletions.
3 changes: 2 additions & 1 deletion .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,5 @@ coverage.xml
.git
.pytest_cache
.env
*.egg-info
*.egg-info
azure
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM node:16-bullseye-slim
FROM node:20-bullseye-slim

# Setup environment variables
ENV LC_ALL=C.UTF-8
Expand Down
3 changes: 1 addition & 2 deletions oedi/AWS/data_lake/construct.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,9 @@ def create_crawler_role(self):
managed_policies=managed_policies,
)

def create_crawler(self, location, tags):
def create_crawler(self, location, table_prefix, tags):
"""Create crawler in data lake by given dataset location."""
crawler_name = generate_crawler_name(s3url=location)
table_prefix = generate_table_prefix(s3url=location)

if not self.crawler_role:
self.crawler_role()
Expand Down
10 changes: 8 additions & 2 deletions oedi/AWS/data_lake/stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,5 +27,11 @@ def __init__(self, scope: Construct, config: OEDIConfigBase) -> None:
data_lake.create_database()
data_lake.create_crawler_role()
#TODO: data_lake.create_workgroup()
for dataset_location in database['Locations']:
data_lake.create_crawler(location=dataset_location, tags=tags)
if 'Table Prefixes' in database.keys():
table_prefixes = database['Table Prefixes'] # Prefix for each table
elif 'Table Prefix' in database.keys():
table_prefixes = [database['Table Prefix']] * len(database['Locations']) # One prefix for all tables
else:
table_prefixes = ['table_'] * len(database['Locations']) # No prefix specified, use generic prefix
for dataset_location, table_prefix in zip(database['Locations'], table_prefixes):
data_lake.create_crawler(location=dataset_location, table_prefix=table_prefix, tags=tags)
3 changes: 2 additions & 1 deletion oedi/AWS/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def generate_crawler_name(s3url):
bucket, path = parse_s3url(s3url)
dashed_path = path.replace("/", "-")
name = f"{bucket}-{dashed_path}".replace("_", "-")
# name = name[-128:] # Crawler names have a limit of 128 characters
return name.lower()


Expand Down Expand Up @@ -93,5 +94,5 @@ def generate_table_prefix(s3url):
prefix = os.path.dirname(path).replace("/", "-") + "_"

table_prefix = prefix.replace("-", "_").lower()

table_prefix = table_prefix[-128:] # A table prefix has a limit of 128 characters
return table_prefix
2 changes: 1 addition & 1 deletion oedi/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.2.4"
__version__ = "0.2.5"
61 changes: 53 additions & 8 deletions oedi/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,46 @@ AWS:
- s3://oedi-data-lake/pv-rooftop/developable-planes/
- s3://oedi-data-lake/pv-rooftop/rasd/
- s3://oedi-data-lake/pv-rooftop-pr/developable-planes/
- Identifier: buildstock
Name: oedi_buildstock
Table Prefixes:
- pv_rooftop_
- pv_rooftop_
- pv_rooftop_
- pv_rooftop_
- pv_rooftop_pr_
- Identifier: comstock
Name: oedi_comstock_amy2018_release_2
Locations:
- s3://oedi-data-lake/nrel-pds-building-stock/end-use-load-profiles-for-us-building-stock/2021/comstock_amy2018_release_1/
- s3://oedi-data-lake/nrel-pds-building-stock/end-use-load-profiles-for-us-building-stock/2021/comstock_tmy3_release_1/
- s3://oedi-data-lake/nrel-pds-building-stock/end-use-load-profiles-for-us-building-stock/2021/resstock_amy2018_release_1/
- s3://oedi-data-lake/nrel-pds-building-stock/end-use-load-profiles-for-us-building-stock/2021/resstock_tmy3_release_1/
- s3://oedi-data-lake/nrel-pds-building-stock/end-use-load-profiles-for-us-building-stock/2023/comstock_amy2018_release_2/metadata/
- s3://oedi-data-lake/nrel-pds-building-stock/end-use-load-profiles-for-us-building-stock/2023/comstock_amy2018_release_2/weather/amy2018/
- s3://oedi-data-lake/nrel-pds-building-stock/end-use-load-profiles-for-us-building-stock/2023/comstock_amy2018_release_2/metadata_and_annual_results/national/parquet/
- s3://oedi-data-lake/nrel-pds-building-stock/end-use-load-profiles-for-us-building-stock/2023/comstock_amy2018_release_2/timeseries_individual_buildings/by_puma_midwest/
- s3://oedi-data-lake/nrel-pds-building-stock/end-use-load-profiles-for-us-building-stock/2023/comstock_amy2018_release_2/timeseries_individual_buildings/by_puma_northeast/
- s3://oedi-data-lake/nrel-pds-building-stock/end-use-load-profiles-for-us-building-stock/2023/comstock_amy2018_release_2/timeseries_individual_buildings/by_puma_south/
- s3://oedi-data-lake/nrel-pds-building-stock/end-use-load-profiles-for-us-building-stock/2023/comstock_amy2018_release_2/timeseries_individual_buildings/by_puma_west/
- s3://oedi-data-lake/nrel-pds-building-stock/end-use-load-profiles-for-us-building-stock/2023/comstock_amy2018_release_2/timeseries_individual_buildings/by_state/
Table Prefixes:
- amy_2018_
- weather_
- metadata_and_annual_results_national_
- timeseries_individual_buildings_
- timeseries_individual_buildings_
- timeseries_individual_buildings_
- timeseries_individual_buildings_
- timeseries_individual_buildings_
- Identifier: resstock
Name: oedi_resstock_2022_tmy3_1.1
Locations:
- s3://oedi-data-lake/nrel-pds-building-stock/end-use-load-profiles-for-us-building-stock/2022/resstock_tmy3_release_1.1/metadata/
- s3://oedi-data-lake/nrel-pds-building-stock/end-use-load-profiles-for-us-building-stock/2022/resstock_tmy3_release_1.1/metadata_and_annual_results/national/parquet/
- s3://oedi-data-lake/nrel-pds-building-stock/end-use-load-profiles-for-us-building-stock/2022/resstock_tmy3_release_1.1/metadata_income/parquet/
- s3://oedi-data-lake/nrel-pds-building-stock/end-use-load-profiles-for-us-building-stock/2022/resstock_tmy3_release_1.1/timeseries_individual_buildings/by_state/
- s3://oedi-data-lake/nrel-pds-building-stock/end-use-load-profiles-for-us-building-stock/2022/resstock_tmy3_release_1.1/weather/
Table Prefixes:
- resstock_tmy3_
- metadata_and_annual_results_national_
- metadata_income_
- timeseries_individual_buildings_
- resstock_tmy3_
- Identifier: tracking_the_sun
Name: oedi_tracking_the_sun
Locations:
Expand All @@ -26,6 +59,7 @@ AWS:
- s3://oedi-data-lake/tracking-the-sun/2021/
- s3://oedi-data-lake/tracking-the-sun/2022/
- s3://oedi-data-lake/tracking-the-sun/2023/
Table Prefix: tracking_the_sun_
- Identifier: atb
Name: oedi_atb
Locations:
Expand All @@ -38,6 +72,16 @@ AWS:
- s3://oedi-data-lake/ATB/transportation/parquet/2022/fuels
- s3://oedi-data-lake/ATB/transportation/parquet/2022/vehicles
- s3://oedi-data-lake/ATB/transportation/parquet/2022/vehicles_fuels
Table Prefixes:
- atb_electricity_
- atb_electricity_
- atb_electricity_
- atb_electricity_
- atb_electricity_
- atb_transportation_2022_
- atb_transportation_2022_
- atb_transportation_2022_
- atb_transportation_2022_
- Identifier: pvdaq
Name: oedi_pvdaq
Locations:
Expand All @@ -50,6 +94,7 @@ AWS:
- s3://oedi-data-lake/pvdaq/parquet/mount/
- s3://oedi-data-lake/pvdaq/parquet/other-instruments/
- s3://oedi-data-lake/pvdaq/parquet/pvdata/
Table Prefix: pvdaq_
- Identifier: nso
Name: oedi_nso
Locations:
Expand All @@ -60,10 +105,10 @@ AWS:
- s3://oedi-data-lake-rawdata/NSO-2/loads_20Hz/
- s3://oedi-data-lake-rawdata/NSO-2/wake_masts_1min/
- s3://oedi-data-lake-rawdata/NSO-2/wake_masts_20Hz/

Table Prefix: 'nso_'
Staging Location: s3://user-owned-staging-bucket/
Tags:
- Key: Project
Value: OEDI
- Key: Release
Value: 0.2.4
Value: 0.2.5

0 comments on commit 53e21af

Please sign in to comment.