diff --git a/LICENSE b/LICENSE
index d716797..da7e522 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,6 @@
 BSD 3-Clause License
 
-Copyright (c) 2023 Alliance for Sustainable Energy, LLC and Skye Analytics, Inc.
+Copyright (c) 2024 Alliance for Sustainable Energy, LLC and Skye Analytics, Inc.
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
diff --git a/README.md b/README.md
index 7c7ee54..29ef781 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
 # Open Data Access Tools
 The Open Energy Data Initiative (OEDI) provides a number of tools to enable the use of the open data published through this initiative. The source is largely written in Python, including Jupyter notebooks.
 
-Copyright (c) 2023 Alliance for Sustainable Energy, LLC and Skye Analytics, Inc.
+Copyright (c) 2024 Alliance for Sustainable Energy, LLC and Skye Analytics, Inc.
 
 Open Data Access Tools: NREL SWR-20-57.
 Azure Data Tools: SWR-23-92.
diff --git a/azure/dev-env.yml b/azure/dev-env.yml
new file mode 100644
index 0000000..bdb2dad
--- /dev/null
+++ b/azure/dev-env.yml
@@ -0,0 +1,18 @@
+name: oedi-azure-dev
+channels:
+  - conda-forge
+  - defaults
+dependencies:
+  - python=3.10.12
+  - h5py=3.9.0
+  - boto3
+  - cftime
+  - kerchunk
+  - planetary-computer
+  - s3fs=2023.6.0
+  - pandas
+  - ujson
+  - xarray
+  - zarr
+  - ipykernel
+  - adlfs
diff --git a/azure/documentation/ pv_rooftop.md b/azure/documentation/ pv_rooftop.md
new file mode 100644
index 0000000..2ce34c1
--- /dev/null
+++ b/azure/documentation/ pv_rooftop.md	
@@ -0,0 +1,346 @@
+# PV Rooftop Database
+
+## Overview
+
+The National Renewable Energy Laboratory's (NREL) PV Rooftop Database (PVRDB) is a lidar-derived, geospatially-resolved dataset of suitable roof surfaces and their PV technical potential for 128 metropolitan regions in the United States. The source lidar data and building footprints were obtained by the U.S. Department of Homeland Security Homeland Security Infrastructure Program for 2006-2014. Using GIS methods, NREL identified suitable roof surfaces based on their size, orientation, and shading parameters Gagnon et al. (2016). Standard 2015 technical potential was then estimated for each plane using NREL's System Advisory Model.
+
+The PVRDB is down-loadable by city and year of lidar collection. Four geospatial layers are available for each city and year: 1) the raster extent of the lidar collection, 2) buildings identified from the lidar data, 3) suitable developable planes for each building, and 4) aspect values of the developable planes.
+
+## Storage Resources
+
+The pv-rooftop Dataset is made available in Parquet format in the following container:
+
+`https://nrel.blob.core.windows.net/oedi`
+
+### Data
+
+The data are located in the `pv-rooftops/` directory. The four main datasets are stored in the following subdirectories:
+
+Main datasets
+ - `/aspects`
+ - `/buildings`
+ - `/developable_planes`
+ - `/rasd`
+
+Each partition is stored in an individual folder within each subdirectory.
+
+Partitions
+
+- `/city_year=<city>_<state>_<year>`
+
+e.g. `/city_year=dover_de_09`
+
+### Data Format
+
+The PV Rooftops dataset is provided in geoparquet format partitioned by city_year. There are 4 core datasets:
+ 
+#### `oedi/pv-rooftop/aspects`
+field | data_type | description
+-- | -- | --
+`gid` | bigint |  
+`city` | string | city of source lidar dataset
+`state` | string | state of source lidar dataset
+`year` | bigint | year of source lidar dataset
+`bldg_fid` | bigint | building id
+`aspect` | bigint | aspect value
+`the_geom_96703` | string | projected geometry ([US Contiguous Albers Equal Area Conic - SRID 6703](https://spatialreference.org/ref/sr-org/6703/))
+`the_geom_4326` | string | geometry ([WGS 1984 - SRID   4326](https://spatialreference.org/ref/epsg/4326/))
+`region_id` | bigint |  
+
+
+#### `oedi/pv-rooftop/buildings`
+
+field | data_type | description
+-- | -- | --
+`gid` | bigint |  
+`bldg_fid` | bigint | the building fid
+`the_geom_96703` | string | projected geometry ([US Contiguous Albers Equal Area Conic - SRID 6703](https://spatialreference.org/ref/sr-org/6703/))
+`the_geom_4326` | string | geometry ([WGS 1984 - SRID   4326](https://spatialreference.org/ref/epsg/4326/))
+`city` | string | the city of the source lidar data
+`state` | string | the state of the source lidar data
+`year` | bigint | the year of the source lidar data
+`region_id` | bigint |  
+
+
+#### `oedi/pv-rooftop/developable_planes`
+
+field | data_type | description
+-- | -- | --
+`bldg_fid` | bigint | building ID associated with the developable plane
+`footprint_m2` | double | developable plane footprint area (m2)
+`slope` | bigint | slope value
+`flatarea_m2` | double | flat area of the developable plane (m2)
+`slopeconversion` | double | the slope conversion factor used to convert the flat area into the sloped   area
+`slopearea_m2` | double | sloped area of the developable plane (m2)
+`zip` | string | zipcode
+`zip_perc` | double |  
+`aspect` | bigint | the aspect value of the developable plane
+`gid` | bigint | unique developable plane ID
+`city` | string | the city of the source lidar data
+`state` | string | the state of the source lidar data
+`year` | bigint | the year of the source lidar data
+`region_id` | bigint |  
+`the_geom_96703` | string | projected geometry ([US Contiguous Albers Equal Area Conic - SRID 6703](https://spatialreference.org/ref/sr-org/6703/))
+`the_geom_4326` | string | geometry ([WGS 1984 - SRID   4326](https://spatialreference.org/ref/epsg/4326/))
+
+
+#### `oedi/pv-rooftop/rasd`
+
+field | data_type | description
+-- | -- | --
+`gid` | bigint | the unique geographic ID of the raster domain
+`the_geom_96703` | string | projected geometry ([US Contiguous Albers Equal Area Conic - SRID 6703](https://spatialreference.org/ref/sr-org/6703/))
+`the_geom_4326` | string | geometry ([WGS 1984 - SRID   4326](https://spatialreference.org/ref/epsg/4326/))
+`city` | string | the city of the source lidar data
+`state` | string | the state of the source lidar data
+`year` | bigint | the year of the source lidar data
+`region_id` | bigint |  
+`serial_id` | bigint |  
+`__index_level_0__` | bigint |  
+
+
+Within each core dataset there are paritions by city_state_year(YY) that can be queried using Apache pyarrow tools or dask, or downloaded as individual geoparquet format data files.
+
+Aspects Lookup:
+```
+1	337.5 - 22.5	north
+2	22.5 - 67.5	northeast
+3	67.5 - 112.5	east
+4	112.5 - 157.5 southeast
+5	157.5 - 202.5	south
+6	202.5 - 247.5	southwest
+7	247.5 - 292.5	west
+8	292.5 - 337.5	northwest
+0	flat	flat
+```
+
+Regions Lookup:
+```
+1	Albany	NY	2006-01-01
+2	Albany	NY	2013-01-01
+3	Albuquerque	NM	2006-01-01
+4	Albuquerque	NM	2012-01-01
+5	Allentown	PA	2006-01-01
+6	Amarillo	TX	2008-01-01
+7	Anaheim	CA	2010-01-01
+8	Arnold	MO	2006-01-01
+9	Atlanta	GA	2008-01-01
+10	Atlanta	GA	2013-01-01
+11	Augusta	GA	2010-01-01
+12	Augusta	ME	2008-01-01
+13	Austin	TX	2006-01-01
+14	Austin	TX	2012-01-01
+15	Bakersfield	CA	2010-01-01
+16	Baltimore	MD	2008-01-01
+17	Baltimore	MD	2013-01-01
+18	Baton Rouge	LA	2006-01-01
+19	Baton Rouge	LA	2012-01-01
+20	Birmingham	AL	2008-01-01
+21	Bismarck	ND	2008-01-01
+22	Boise	ID	2007-01-01
+23	Boise	ID	2013-01-01
+24	Boulder	CO	2014-01-01
+25	Bridgeport	CT	2006-01-01
+26	Bridgeport	CT	2013-01-01
+27	Buffalo	NY	2008-01-01
+28	Carson City	NV	2009-01-01
+29	Charleston	SC	2010-01-01
+30	Charleston	WV	2009-01-01
+31	Charlotte	NC	2006-01-01
+32	Charlotte	NC	2012-01-01
+33	Cheyenne	WY	2008-01-01
+34	Chicago	IL	2008-01-01
+35	Chicago	IL	2012-01-01
+36	Cincinnati	OH	2010-01-01
+37	Cleveland	OH	2012-01-01
+38	Colorado Springs	CO	2006-01-01
+39	Colorado Springs	CO	2013-01-01
+40	Columbia	SC	2009-01-01
+41	Columbus	GA	2009-01-01
+42	Columbus	OH	2006-01-01
+43	Columbus	OH	2012-01-01
+44	Concord	NH	2009-01-01
+45	Corpus Christi	TX	2012-01-01
+46	Dayton	OH	2006-01-01
+47	Dayton	OH	2012-01-01
+48	Denver	CO	2012-01-01
+49	Des Moines	IA	2010-01-01
+50	Detroit	MI	2012-01-01
+51	Dover	DE	2009-01-01
+52	El Paso	TX	2007-01-01
+53	Flint	MI	2009-01-01
+54	Fort Wayne	IN	2008-01-01
+55	Frankfort	KY	2012-01-01
+56	Fresno	CA	2006-01-01
+57	Fresno	CA	2013-01-01
+58	Ft Belvoir	DC	2012-01-01
+59	Grand Rapids	MI	2013-01-01
+60	Greensboro	NC	2009-01-01
+61	Harrisburg	PA	2009-01-01
+62	Hartford	CT	2006-01-01
+63	Hartford	CT	2013-01-01
+64	Helena	MT	2007-01-01
+65	Helena	MT	2013-01-01
+66	Houston	TX	2010-01-01
+67	Huntsville	AL	2009-01-01
+68	Indianapolis	IN	2006-01-01
+69	Indianapolis	IN	2012-01-01
+70	Jackson	MS	2007-01-01
+71	Jacksonville	FL	2010-01-01
+72	Jefferson City	MO	2008-01-01
+73	Kansas City	MO	2010-01-01
+74	Kansas City	MO	2012-01-01
+75	LaGuardia JFK	NY	2012-01-01
+76	Lancaster	PA	2010-01-01
+77	Lansing	MI	2007-01-01
+78	Lansing	MI	2013-01-01
+79	Las Vegas	NV	2009-01-01
+80	Lexington	KY	2012-01-01
+81	Lincoln	NE	2008-01-01
+82	Little Rock	AR	2008-01-01
+83	Los Angeles	CA	2007-01-01
+84	Louisville	KY	2006-01-01
+85	Louisville	KY	2012-01-01
+86	Lubbock	TX	2008-01-01
+87	Madison	WI	2010-01-01
+88	Manhattan	NY	2007-01-01
+89	McAllen	TX	2008-01-01
+90	Miami	FL	2009-01-01
+91	Milwaukee	WI	2007-01-01
+92	Milwaukee	WI	2013-01-01
+93	Minneapolis	MN	2007-01-01
+94	Minneapolis	MN	2012-01-01
+95	Mission Viejo	CA	2013-01-01
+96	Mobile	AL	2010-01-01
+97	Modesto	CA	2010-01-01
+98	Montgomery	AL	2007-01-01
+99	Montpelier	VT	2009-01-01
+100	Newark	NJ	2007-01-01
+101	New Haven	CT	2007-01-01
+102	New Haven	CT	2013-01-01
+103	New Orleans	LA	2008-01-01
+104	New Orleans	LA	2012-01-01
+105	New York	NY	2005-01-01
+106	New York	NY	2013-01-01
+107	Norfolk	VA	2007-01-01
+108	Oklahoma City	OK	2007-01-01
+109	Oklahoma City	OK	2013-01-01
+110	Olympia	WA	2010-01-01
+111	Omaha	NE	2007-01-01
+112	Omaha	NE	2013-01-01
+113	Orlando	FL	2009-01-01
+114	Oxnard	CA	2010-01-01
+115	Palm Bay	FL	2010-01-01
+116	Pensacola	FL	2009-01-01
+117	Philadelphia	PA	2007-01-01
+118	Pierre	SD	2008-01-01
+119	Pittsburgh	PA	2004-01-01
+120	Pittsburgh	PA	2012-01-01
+121	Portland	OR	2012-01-01
+122	Poughkeepsie	NY	2012-01-01
+123	Providence	RI	2004-01-01
+124	Providence	RI	2012-01-01
+125	Raleigh-Durham	NC	2010-01-01
+126	Reno	NV	2007-01-01
+127	Richmond	VA	2008-01-01
+128	Richmond	VA	2013-01-01
+129	Rochester	NY	2008-01-01
+130	Rochester	NY	2014-01-01
+131	Sacramento	CA	2012-01-01
+132	Salem	OR	2008-01-01
+133	Salt Lake City	UT	2012-01-01
+134	San Antonio	TX	2008-01-01
+135	San Antonio	TX	2013-01-01
+137	San Diego	CA	2008-01-01
+138	San Diego	CA	2013-01-01
+139	San Francisco	CA	2013-01-01
+140	Santa Fe	NM	2009-01-01
+141	Sarasota	FL	2009-01-01
+142	Scranton	PA	2008-01-01
+143	Seattle	WA	2011-01-01
+144	Shreveport	LA	2008-01-01
+145	Spokane	WA	2008-01-01
+146	Springfield	IL	2009-01-01
+147	Springfield	MA	2007-01-01
+148	Springfield	MA	2013-01-01
+149	St Louis	MO	2008-01-01
+150	St Louis	MO	2013-01-01
+151	Stockton	CA	2010-01-01
+152	Syracuse	NY	2008-01-01
+153	Tallahassee	FL	2009-01-01
+154	Tampa	FL	2008-01-01
+155	Toledo	OH	2006-01-01
+156	Toledo	OH	2012-01-01
+157	Topeka	KS	2008-01-01
+158	Trenton	NJ	2008-01-01
+159	Tucson	AZ	2007-01-01
+160	Tulsa	OK	2008-01-01
+161	Washington	DC	2009-01-01
+162	Washington	DC	2012-01-01
+163	Wichita	KS	2012-01-01
+164	Winston-Salem	NC	2009-01-01
+165	Worcester	MA	2009-01-01
+166	Youngstown	OH	2008-01-01
+167	Andrews AFB	DC	2012-01-01
+136	San Bernardino-Riverside	CA	2012-01-01
+168	Tampa	FL	2013-01-01
+```
+
+
+## Sample code
+
+A complete Python example of accessing and visualizing some of these data is available in the accompanying [sample notebook](https://nbviewer.jupyter.org/github/microsoft/AIforEarthDataSets/blob/main/data/pv_rooftop.ipynb).
+
+## Mounting the container
+
+We also provide a read-only SAS (shared access signature) token to allow access via, e.g., [BlobFuse](https://github.com/Azure/azure-storage-fuse), which allows you to mount blob containers as drives:
+
+`https://nrel.blob.core.windows.net/oedi?sv=2019-12-12&si=oedi-ro&sr=c&sig=uslpLxKf3%2Foeu79ufIHbJkpI%2FTWDH3lblJMa5KQRPmM%3D`
+
+Mounting instructions for Linux are [here](https://docs.microsoft.com/en-us/azure/storage/blobs/storage-how-to-mount-container-linux).
+
+## References
+
+Main References:
+1. [Rooftop Solar Photovoltaic Technical Potential in the United States: A Detailed Assessment](https://www.nrel.gov/docs/fy16osti/65298.pdf)
+
+2. [Using GIS-based methods and lidar data to estimate rooftop solar technical potential in US cities](https://iopscience.iop.org/article/10.1088/1748-9326/aa7225/pdf)
+
+3. [Estimating rooftop solar technical potential across the US using a combination of GIS-based methods, lidar data, and statistical modeling](https://iopscience.iop.org/article/10.1088/1748-9326/aaa554/pdf)
+
+4. [Rooftop Photovoltaic Technical Potential in the United States](https://data.nrel.gov/submissions/121)
+
+5. [U.S. PV-Suitable Rooftop Resources](https://data.nrel.gov/submissions/47)
+
+Related Reference:
+
+1. [Rooftop Solar Technical Potential for Low-to-Moderate Income Households in the United States](https://www.nrel.gov/docs/fy18osti/70901.pdf)
+
+2. [Rooftop Energy Potential of Low Income Communities in America REPLICA](https://data.nrel.gov/submissions/81)
+
+3. [Puerto Rico Solar-for-All: LMI PV Rooftop Technical Potential and Solar Savings Potential](https://data.nrel.gov/submissions/144)
+
+
+## Disclaimer and Attribution
+
+Copyright (c) 2020, Alliance for Sustainable Energy LLC, All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+
+* Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+## Contact
+
+For questions about this dataset, contact [`aiforearthdatasets@microsoft.com`](mailto:aiforearthdatasets@microsoft.com?subject=oedi%20question).
+
+
+## Notices
+
+Microsoft provides this dataset on an "as is" basis.  Microsoft makes no warranties (express or implied), guarantees, or conditions with respect to your use of the dataset.  To the extent permitted under your local law, Microsoft disclaims all liability for any damages or losses * including direct, consequential, special, indirect, incidental, or punitive * resulting from your use of this dataset.  This dataset is provided under the original terms that Microsoft received source data.
\ No newline at end of file
diff --git a/azure/documentation/PR100.md b/azure/documentation/PR100.md
new file mode 100644
index 0000000..69e9052
--- /dev/null
+++ b/azure/documentation/PR100.md
@@ -0,0 +1,55 @@
+# NREL Puerto Rico 100 Dataset (PR100)
+
+
+## Overview
+
+The [Puerto Rico Grid Resilience and Transitions to 100% Renewable Energy Study](https://www.energy.gov/gdo/puerto-rico-grid-resilience-and-transitions-100-renewable-energy-study-pr100) is a 2-year study by the U.S. Department of Energy’s (DOE’s) Grid Deployment Office and six national laboratories to comprehensively analyze stakeholder-driven pathways to Puerto Rico’s clean energy future.
+
+The PR100 dataset is a collection of geospasial data that will be useful for renewable energy development in Puerto Rico. The dataset is curated by the National Renewable Energy Laboratory.
+
+
+## Storage resources
+
+The data are stored in Azure Blob Storage, in the following container:
+
+`https://nrel.blob.core.windows.net/oedi`
+
+
+### Data
+
+The data are located in the `pr100/` directory and have been categorized into 5 subdirectories:
+
+- `Boundaries/`
+- `Habitat/`
+- `Hazards/`
+- `Infrastructure/`
+- `Topography/`
+
+
+### Data format
+
+Vector data are stored in the geoparquet format and rasters are stored as cloud-optimized geotiffs.
+
+
+## Sample code
+
+A complete Python example of accessing and visualizing some of these data is available in the accompanying [sample notebook](https://nbviewer.jupyter.org/github/microsoft/AIforEarthDataSets/blob/main/data/PR100.ipynb).
+
+
+## Mounting the container
+
+We also provide a read-only SAS (shared access signature) token to allow access via, e.g., [BlobFuse](https://github.com/Azure/azure-storage-fuse), which allows you to mount blob containers as drives:
+
+`https://nrel.blob.core.windows.net/oedi?sv=2019-12-12&si=oedi-ro&sr=c&sig=uslpLxKf3%2Foeu79ufIHbJkpI%2FTWDH3lblJMa5KQRPmM%3D`
+
+Mounting instructions for Linux are [here](https://docs.microsoft.com/en-us/azure/storage/blobs/storage-how-to-mount-container-linux).
+
+
+## Contact
+
+For questions about this dataset, contact [`aiforearthdatasets@microsoft.com`](mailto:aiforearthdatasets@microsoft.com?subject=oedi%20question).
+
+
+## Notices
+
+Microsoft provides this dataset on an "as is" basis.  Microsoft makes no warranties (express or implied), guarantees, or conditions with respect to your use of the dataset.  To the extent permitted under your local law, Microsoft disclaims all liability for any damages or losses * including direct, consequential, special, indirect, incidental, or punitive * resulting from your use of this dataset.  This dataset is provided under the original terms that Microsoft received source data.
\ No newline at end of file
diff --git a/azure/documentation/az_cli_guide.md b/azure/documentation/az_cli_guide.md
new file mode 100644
index 0000000..5e153be
--- /dev/null
+++ b/azure/documentation/az_cli_guide.md
@@ -0,0 +1,19 @@
+## Azure CLI Guide
+
+OEDI data exist as blobs in Azure. Blobs live in containers. Containers live in storage accounts. For most of our data, the storage account is 'nrel' and the container is 'oedi'. There is a directory structure within the container to organize different data sets. Currently, the datasets present are 'PR100', 'pv-rooftop' and part of 'sup3rcc'. NSRDB lives in the 'nrel' storage account but in a different container called 'nrel-nsrdb'.
+
+In order to access data from the command line, you will need to obtain a temporary SAS token from the planetary computer. You can then use that token as an argument for any commands you make with the CLI. CLI reference for interacting with blobs: https://learn.microsoft.com/en-us/cli/azure/storage/blob?view=azure-cli-latest#az-storage-blob-download
+
+Finally, if the goal is to move large amounts of data from blob storage to S3 or local, then the best tool is azcopy: https://learn.microsoft.com/en-us/azure/storage/common/storage-use-azcopy-v10
+
+Obtain a planetary computer temporary access token:
+
+`curl https://planetarycomputer.microsoft.com/api/sas/v1/token/nrel/oedi > sas.json`
+
+View a list of blobs in the PR100 dataset:
+
+`az storage blob list --account-name nrel --container-name oedi --output table --prefix PR100 --sas-token "<SAS Token>"`
+
+Download a blob from the PR100 dataset:
+
+`az storage blob download --account-name nrel --container-name oedi --name PR100/Infrastructure/setbacks_runway.parquet --file setbacks_runway.parquet --sas-token "<SAS Token>"`
diff --git a/azure/documentation/sup3rcc.md b/azure/documentation/sup3rcc.md
new file mode 100644
index 0000000..10679b6
--- /dev/null
+++ b/azure/documentation/sup3rcc.md
@@ -0,0 +1,103 @@
+# Super-Resolution for Renewable Energy Resource Data with Climate Change Impacts (Sup3rCC)
+
+
+## Overview
+
+The Super-Resolution for Renewable Energy Resource Data with Climate Change Impacts (Sup3rCC) data is a collection of 4km hourly wind, solar, temperature, humidity, and pressure fields for the contiguous United States under climate change scenarios.
+
+Sup3rCC is downscaled Global Climate Model (GCM) data. For example, the initial dataset "sup3rcc_conus_mriesm20_ssp585_r1i1p1f1" is downscaled from MRI ESM 2.0 for climate change scenario SSP5 8.5 and variant label r1i1p1f1. The downscaling process was performed using a generative machine learning approach called sup3r: Super-Resolution for Renewable Energy Resource Data ([Sup3r GitHub Repo](https://github.com/NREL/sup3r)). The data includes both historical and future weather years, although the historical years represent the historical average climate, not the actual historical weather that we experienced.
+
+The Sup3rCC data is intended to help researchers study the impact of climate change on energy systems with high levels of wind and solar capacity. Please note that all climate change data is only a representation of the *possible* future climate and contains significant uncertainty. Analysis of multiple climate change scenarios and multiple climate models can help quantify this uncertainty.
+
+For more info, view the [OEDI Sup3rcc catalogue entry](https://data.openei.org/submissions/5839).
+
+## Storage Resources
+
+The Sup3rcc Dataset is made available in h5 format in the following container:
+
+`https://nrel.blob.core.windows.net/oedi`
+
+### Data
+
+The data are located in the `sup3rcc/` directory. The initial datset is the subdirectory `conus_mriesm20_ssp585_r1i1p1f1/`.
+
+Each h5 file's name encodes info about the variables it contains and the year.
+
+e.g. `sup3rcc_conus_mriesm20_ssp585_r1i1p1f1_pressure_2015.h5`
+
+### Data Format
+
+The Sup3rcc dataset is provided in h5 format. A kerchunk reference file is also included to facilitate faster access.
+ 
+#### `Dimensions:`
+field | data_type
+-- | --
+`time_index` | int
+`latitude` | float
+`longitude` | float
+
+#### `Location Metadata:`
+
+field | data_type
+-- | --
+`country` | string
+`state` | string
+`county` | string
+`timezone` | string
+`eez` | string
+`elevation` | string
+`offshore` | string
+
+#### `Variables:`
+
+field | data_type
+-- | --
+`dhi` | float
+`dni` | float
+`ghi` | float
+`pressure_0m` | float
+`relativehumidity_2m` | float
+`temperature_2m` | float
+`winddirection_100m` | float
+`winddirection_10m` | float
+`winddirection_200m` | float
+`windspeed_100m` | float
+`windspeed_10m` | float
+`windspeed_200m` | float
+`offshore` | float
+
+## Sample code
+
+A complete Python example of accessing and visualizing some of these data is available in the accompanying [sample notebook](https://nbviewer.jupyter.org/github/microsoft/AIforEarthDataSets/blob/main/data/sup3rcc.ipynb).
+
+## Mounting the container
+
+We also provide a read-only SAS (shared access signature) token to allow access via, e.g., [BlobFuse](https://github.com/Azure/azure-storage-fuse), which allows you to mount blob containers as drives:
+
+`https://nrel.blob.core.windows.net/oedi?sv=2019-12-12&si=oedi-ro&sr=c&sig=uslpLxKf3%2Foeu79ufIHbJkpI%2FTWDH3lblJMa5KQRPmM%3D`
+
+Mounting instructions for Linux are [here](https://docs.microsoft.com/en-us/azure/storage/blobs/storage-how-to-mount-container-linux).
+
+## Disclaimer and Attribution
+
+Copyright (c) 2020, Alliance for Sustainable Energy LLC, All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+
+* Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+## Contact
+
+For questions about this dataset, contact [`aiforearthdatasets@microsoft.com`](mailto:aiforearthdatasets@microsoft.com?subject=oedi%20question).
+
+
+## Notices
+
+Microsoft provides this dataset on an "as is" basis.  Microsoft makes no warranties (express or implied), guarantees, or conditions with respect to your use of the dataset.  To the extent permitted under your local law, Microsoft disclaims all liability for any damages or losses * including direct, consequential, special, indirect, incidental, or punitive * resulting from your use of this dataset.  This dataset is provided under the original terms that Microsoft received source data.
\ No newline at end of file
diff --git a/azure/examples/PR100.ipynb b/azure/examples/PR100.ipynb
new file mode 100644
index 0000000..476eb9f
--- /dev/null
+++ b/azure/examples/PR100.ipynb
@@ -0,0 +1,183 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Demo Notebook for Accessing PR100 Data on Azure\n",
+    "\n",
+    "Launched on February 2, 2022, a two-year study entitled Puerto Rico Grid Resilience and Transitions to 100% Renewable Energy (PR100) will perform a comprehensive analysis of stakeholder-driven pathways to Puerto Rico’s clean energy future. For more information, please visit [https://www.energy.gov/gdo/puerto-rico-grid-resilience-and-transitions-100-renewable-energy-study-pr100].\n",
+    "\n",
+    "To support the PR100 project, the Open Energy Data Initiative has made an assortment of data sets available for free public access. This notebook will demonstrate how to access the PR100 data located in Azure BLOB storage."
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Get Access Token\n",
+    "\n",
+    "You do not need an Azure account to access public data. Instead, you can obtain a temporary access token via the Planetary Computer's API. This can be accomplished via either the requests or planetary_computer libraries. Both options are shown below."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# get a token with requests\n",
+    "import requests\n",
+    "\n",
+    "token = requests.get(\n",
+    "    'https://planetarycomputer.microsoft.com/api/sas/v1/token/nrel/oedi'\n",
+    ").json()['token']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# get a token with planetary-computer\n",
+    "import planetary_computer\n",
+    "\n",
+    "token = planetary_computer.sas.get_token('nrel', 'oedi').token\n"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Explore Container\n",
+    "\n",
+    "Use the token to create a PyFileSystem object. You can explore the contents of the container using the get_file_info method. The PR100 data consists of geoparquet and geotiff files that are organized into directories:\n",
+    "- Boundaries\n",
+    "- Habitat\n",
+    "- Hazards\n",
+    "- Infrastructure\n",
+    "- Topography"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pyarrow.fs import PyFileSystem, FSSpecHandler, FileSelector\n",
+    "from adlfs import AzureBlobFileSystem\n",
+    "\n",
+    "# Create file system\n",
+    "fs = PyFileSystem(\n",
+    "    FSSpecHandler(\n",
+    "        AzureBlobFileSystem('nrel', credential=token)\n",
+    "    )\n",
+    ")\n",
+    "\n",
+    "# View files in the 'Boundaries' directory\n",
+    "fs.get_file_info(FileSelector('/oedi/PR100/Boundaries/'))"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Load Vector Data\n",
+    "\n",
+    "Let's load one of those files into a geodataframe and visualize it."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import geopandas\n",
+    "\n",
+    "df = geopandas.read_parquet('oedi/PR100/Boundaries/land_protected_areas.parquet', filesystem=fs)\n",
+    "df.explore()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Load Raster Data\n",
+    "\n",
+    "If we look in the Topography directory, we'll see some tif files. These are cloud optimized GeoTiffs."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fs.get_file_info(FileSelector('/oedi/PR100/Topography/'))"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can load these files with the rasterio package."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import rasterio\n",
+    "import rasterio.plot\n",
+    "\n",
+    "with fs.open_input_file('oedi/PR100/Topography/elevation.tif') as file:\n",
+    "    raster = rasterio.open(file)\n",
+    "    print(raster.meta)\n",
+    "    rasterio.plot.show(raster, adjust=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "pr100-env",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.9"
+  },
+  "orig_nbformat": 4,
+  "vscode": {
+   "interpreter": {
+    "hash": "d2e0ca302a5f5f673bd05f1fbb5f2420578af44ff0f5cef95f9f5d4b68b66ae3"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/azure/examples/pv_rooftop.ipynb b/azure/examples/pv_rooftop.ipynb
new file mode 100644
index 0000000..1bcb163
--- /dev/null
+++ b/azure/examples/pv_rooftop.ipynb
@@ -0,0 +1,308 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Demo Notebook for Accessing PV Rooftop Data on Azure\n",
+    "\n",
+    "The National Renewable Energy Laboratory's (NREL) PV Rooftop Database (PVRDB) is a lidar-derived, geospatially-resolved dataset of suitable roof surfaces and their PV technical potential for 128 metropolitan regions in the United States. The source lidar data and building footprints were obtained by the U.S. Department of Homeland Security Homeland Security Infrastructure Program for 2006-2014. Using GIS methods, NREL identified suitable roof surfaces based on their size, orientation, and shading parameters Gagnon et al. (2016). Standard 2015 technical potential was then estimated for each plane using NREL's System Advisory Model.\n",
+    "\n",
+    "This notebook will demonstrate how to access the PV Rooftop data located in Azure BLOB storage."
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Get Access Token\n",
+    "\n",
+    "You do not need an Azure account to access public data. Instead, you can obtain a temporary access token via the Planetary Computer's API. This can be accomplished via either the requests or planetary_computer libraries. Both options are shown below."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# get a token with requests\n",
+    "import requests\n",
+    "\n",
+    "token = requests.get(\n",
+    "    'https://planetarycomputer.microsoft.com/api/sas/v1/token/nrel/oedi'\n",
+    ").json()['token']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# get a token with planetary-computer\n",
+    "import planetary_computer\n",
+    "\n",
+    "token = planetary_computer.sas.get_token('nrel', 'oedi').token"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Explore Container\n",
+    "\n",
+    "First, we use the token to create a PyFileSystem object. We can then use ParquetDataset objects to explore the metadata for each table. pv_rooftop consists of 4 tables:\n",
+    "- buildings\n",
+    "- aspects\n",
+    "- developable-planes\n",
+    "- rasd\n",
+    "\n",
+    "Each table is partitioned by city_year."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pyarrow.fs import PyFileSystem, FSSpecHandler\n",
+    "from adlfs import AzureBlobFileSystem\n",
+    "import pyarrow.parquet as pq\n",
+    "\n",
+    "# Create file system using token\n",
+    "fs = PyFileSystem(\n",
+    "    FSSpecHandler(\n",
+    "        AzureBlobFileSystem('nrel', credential=token)\n",
+    "    )\n",
+    ")\n",
+    "\n",
+    "# Create ParquetDataset for the buildings table\n",
+    "buildings_dataset = pq.ParquetDataset('oedi/pv-rooftop/buildings', filesystem=fs)\n",
+    "\n",
+    "# View the partition keys\n",
+    "city_years = buildings_dataset.partitioning.dictionaries\n",
+    "city_years\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# View the schema for the buildings table\n",
+    "buildings_dataset.schema"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Read Data\n",
+    "\n",
+    "pv_rooftop is a large data set. For the purposes of this example, we will read data from a single partition, city_year=albany_ny_13, and take a random sample of 100 buildings. We will read the tables directly into geodataframes. This may take several minutes."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import geopandas as gpd\n",
+    "\n",
+    "# Read the bldg_fid column from the buildings table and take a random sample of 100 buildings.\n",
+    "bldg_fid_sample = pd.read_parquet(\n",
+    "    'oedi/pv-rooftop/buildings',\n",
+    "    filesystem=fs,\n",
+    "    filters=[('city_year', '=', 'albany_ny_13')],\n",
+    "    columns=['bldg_fid']\n",
+    ").sample(100)['bldg_fid']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Read buildings table using bldg_fid_sample as a filter\n",
+    "buildings = gpd.read_parquet(\n",
+    "    'oedi/pv-rooftop/buildings',\n",
+    "    filesystem=fs,\n",
+    "    filters=[\n",
+    "        ('city_year', '=', 'albany_ny_13'),\n",
+    "        ('bldg_fid', 'in', bldg_fid_sample)\n",
+    "    ],\n",
+    "    columns=['gid', 'city', 'state', 'year', 'bldg_fid', 'the_geom_4326']\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Read aspects table using bldg_fid_sample as a filter\n",
+    "aspects = gpd.read_parquet(\n",
+    "    'oedi/pv-rooftop/aspects',\n",
+    "    filesystem=fs,\n",
+    "    filters=[\n",
+    "        ('city_year', '=', 'albany_ny_13'),\n",
+    "        ('bldg_fid', 'in', bldg_fid_sample)\n",
+    "    ],\n",
+    "    columns=['gid', 'city', 'state', 'year', 'bldg_fid', 'aspect', 'the_geom_4326']\n",
+    ")\n",
+    "\n",
+    "# Add a column for the aspect_string\n",
+    "aspect_lookup = {\n",
+    "    0: 'flat',\n",
+    "    1: 'north',\n",
+    "    2: 'northeast',\n",
+    "    3: 'east',\n",
+    "    4: 'southeast',\n",
+    "    5: 'south',\n",
+    "    6: 'southwest',\n",
+    "    7: 'west',\n",
+    "    8: 'northwest'\n",
+    "}\n",
+    "aspects['aspect_string'] = aspects['aspect'].replace(aspect_lookup)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Read developable-planes table using bldg_fid_sample as a filter\n",
+    "developable_planes = gpd.read_parquet(\n",
+    "    'oedi/pv-rooftop/developable-planes',\n",
+    "    filesystem=fs,\n",
+    "    filters=[\n",
+    "        ('city_year', '=', 'albany_ny_13'),\n",
+    "        ('bldg_fid', 'in', bldg_fid_sample)\n",
+    "    ],\n",
+    "    columns=['gid', 'city', 'state', 'year', 'bldg_fid', 'footprint_m2', 'slope', 'flatarea_m2', 'slopeconversion', 'slopearea_m2', 'aspect', 'the_geom_4326']\n",
+    ")\n",
+    "\n",
+    "# Add a column for the aspect_string\n",
+    "developable_planes['aspect_string'] = developable_planes['aspect'].replace(aspect_lookup)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Read rasd table\n",
+    "rasd = gpd.read_parquet(\n",
+    "    'oedi/pv-rooftop/rasd',\n",
+    "    filesystem=fs,\n",
+    "    filters=[\n",
+    "        ('city_year', '=', 'albany_ny_13')\n",
+    "    ],\n",
+    "    columns=['gid', 'city', 'state', 'year', 'the_geom_4326']\n",
+    ")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Visualize Data\n",
+    "\n",
+    "We are now ready to visualize the data using folium."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import folium\n",
+    "\n",
+    "# Dictionary for coloring the polygons based on aspect\n",
+    "color_dict = {\n",
+    "    'flat':      'yellow',\n",
+    "    'north':     'red',\n",
+    "    'northeast': 'red',\n",
+    "    'east':      'yellow',\n",
+    "    'southeast': 'green',\n",
+    "    'south':     'green',\n",
+    "    'southwest': 'green',\n",
+    "    'west':      'yellow',\n",
+    "    'northwest': 'red'\n",
+    "}\n",
+    "color = aspects['aspect_string'].replace(color_dict)\n",
+    "m = buildings.explore(color='gray', name='buildings')\n",
+    "m = aspects.explore(m=m, name='aspects', color=color)\n",
+    "m = developable_planes.explore(m=m, name='developable-planes', color=color)\n",
+    "m = rasd.explore(m=m, name='rasd')\n",
+    "folium.LayerControl().add_to(m)\n",
+    "m"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Export Data\n",
+    "\n",
+    "There are many options for exporting the data for use in GIS software. Here, we demonstrate writing a geopackage."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "file_name = 'pv_rooftop_albany_ny_13.gpkg'\n",
+    "buildings.to_file(file_name, layer='buildings', driver=\"GPKG\")\n",
+    "aspects.to_file(file_name, layer='aspects', driver=\"GPKG\")\n",
+    "developable_planes.to_file(file_name, layer='developable-planes', driver=\"GPKG\")\n",
+    "rasd.to_file(file_name, layer='rasd', driver=\"GPKG\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "oedi-env",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.9"
+  },
+  "orig_nbformat": 4,
+  "vscode": {
+   "interpreter": {
+    "hash": "4c7bf1489743dc7ac4eb5d54993539996d2b573f88c885c7af86ecea3199729c"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/azure/pipeline/ASL/job.json b/azure/pipeline/ASL/job.json
new file mode 100644
index 0000000..dee78c2
--- /dev/null
+++ b/azure/pipeline/ASL/job.json
@@ -0,0 +1,21 @@
+{
+    "jobName": "test3",
+    "jobDefinition": "arn:aws:batch:us-west-2:351672045885:job-definition/kerchunk-h5-new:1",
+    "jobQueue": "arn:aws:batch:us-west-2:351672045885:job-queue/kerchunk-h5",
+    "dependsOn": [],
+    "arrayProperties": {},
+    "parameters": {},
+    "containerOverrides": {
+      "resourceRequirements": [],
+      "environment": [
+        {
+          "name": "staging_bucket",
+          "value": "kerchunk-staging"
+        },
+        {
+          "name": "s3_file",
+          "value": "nrel-pds-wtk/south_atlantic/yearly_hr/v1.0.0/satlantic_2000_hr.h5"
+        }
+      ]
+    }
+  }
\ No newline at end of file
diff --git a/azure/pipeline/ASL/job_definition.json b/azure/pipeline/ASL/job_definition.json
new file mode 100644
index 0000000..f831696
--- /dev/null
+++ b/azure/pipeline/ASL/job_definition.json
@@ -0,0 +1,31 @@
+{
+    "jobDefinitionName": "kerchunk-h5",
+    "type": "container",
+    "containerProperties": {
+        "image": "351672045885.dkr.ecr.us-west-2.amazonaws.com/transform_h5_container",
+        "jobRoleArn": "arn:aws:iam::351672045885:role/ecsTaskExecutionRole",
+        "executionRoleArn": "arn:aws:iam::351672045885:role/ecsTaskExecutionRole",
+        "resourceRequirements": [
+        {
+            "value": "1",
+            "type": "VCPU"
+        },
+        {
+            "value": "100000",
+            "type": "MEMORY"
+        }
+        ],
+        "environment": [],
+        "secrets": [],
+        "linuxParameters": {
+            "tmpfs": [],
+            "devices": []
+        },
+        "mountPoints": [],
+        "ulimits": []
+    },
+    "platformCapabilities": [
+        "EC2"
+    ],
+    "parameters": {}
+}
\ No newline at end of file
diff --git a/azure/pipeline/ASL/kerchunk-1TB.json b/azure/pipeline/ASL/kerchunk-1TB.json
new file mode 100644
index 0000000..c88cb75
--- /dev/null
+++ b/azure/pipeline/ASL/kerchunk-1TB.json
@@ -0,0 +1 @@
+{"EbsOptimized":true,"IamInstanceProfile":{"Arn":"arn:aws:iam::351672045885:instance-profile\/ec2-base-role"},"BlockDeviceMappings":[{"DeviceName":"\/dev\/xvda","Ebs":{"Encrypted":true,"DeleteOnTermination":true,"Iops":3000,"KmsKeyId":"arn:aws:kms:us-west-2:351672045885:key\/a14e1832-d4ca-4667-a986-631341c44db8","SnapshotId":"snap-0b98405d74debf232","VolumeSize":1000,"VolumeType":"gp3","Throughput":125}}],"NetworkInterfaces":[{"AssociatePublicIpAddress":false,"DeleteOnTermination":true,"Description":"","DeviceIndex":0,"Groups":["sg-0dd899f63f3874c77"],"InterfaceType":"interface","Ipv6Addresses":[],"PrivateIpAddresses":[{"Primary":true,"PrivateIpAddress":"172.18.37.24"}],"SubnetId":"subnet-002fd73ee4a6c6baf","NetworkCardIndex":0}],"ImageId":"ami-038c0c1c6c6b1fb07","InstanceType":"x2iedn.xlarge","KeyName":"matt-key","Monitoring":{"Enabled":false},"Placement":{"AvailabilityZone":"us-west-2b","GroupName":"","Tenancy":"default"},"DisableApiTermination":false,"InstanceInitiatedShutdownBehavior":"stop","TagSpecifications":[{"ResourceType":"instance","Tags":[{"Key":"Name","Value":"kerchunk-1TiB"}]}],"CpuOptions":{"CoreCount":2,"ThreadsPerCore":2},"CapacityReservationSpecification":{"CapacityReservationPreference":"open"},"HibernationOptions":{"Configured":false},"MetadataOptions":{"HttpTokens":"required","HttpPutResponseHopLimit":2,"HttpEndpoint":"enabled","HttpProtocolIpv6":"disabled","InstanceMetadataTags":"disabled"},"EnclaveOptions":{"Enabled":false},"PrivateDnsNameOptions":{"HostnameType":"ip-name","EnableResourceNameDnsARecord":false,"EnableResourceNameDnsAAAARecord":false},"MaintenanceOptions":{"AutoRecovery":"default"},"DisableApiStop":false}
\ No newline at end of file
diff --git a/azure/pipeline/ASL/state_machine_input.json b/azure/pipeline/ASL/state_machine_input.json
new file mode 100644
index 0000000..79be9b9
--- /dev/null
+++ b/azure/pipeline/ASL/state_machine_input.json
@@ -0,0 +1 @@
+{"s3_files":["nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2000-01.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2000-02.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2000-03.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2000-04.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2000-05.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2000-06.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2000-07.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2000-08.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2000-09.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2000-10.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2000-11.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2000-12.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2001-01.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2001-02.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2001-03.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2001-04.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2001-05.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2001-06.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2001-07.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2001-08.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2001-09.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2001-10.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2001-11.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2001-12.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2002-01.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2002-02.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2002-03.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2002-04.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2002-05.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2002-06.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2002-07.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2002-08.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2002-09.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2002-10.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2002-11.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2002-12.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2003-01.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2003-02.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2003-03.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2003-04.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2003-05.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2003-06.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2003-07.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2003-08.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2003-09.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2003-10.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2003-11.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2003-12.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2004-01.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2004-02.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2004-03.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2004-04.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2004-05.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2004-06.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2004-07.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2004-08.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2004-09.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2004-10.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2004-11.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2004-12.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2005-01.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2005-02.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2005-03.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2005-04.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2005-05.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2005-06.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2005-07.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2005-08.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2005-09.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2005-10.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2005-11.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2005-12.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2006-01.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2006-02.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2006-03.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2006-04.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2006-05.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2006-06.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2006-07.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2006-08.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2006-09.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2006-10.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2006-11.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2006-12.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2007-01.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2007-02.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2007-03.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2007-04.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2007-05.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2007-06.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2007-07.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2007-08.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2007-09.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2007-10.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2007-11.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2007-12.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2008-01.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2008-02.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2008-03.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2008-04.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2008-05.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2008-06.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2008-07.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2008-08.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2008-09.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2008-10.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2008-11.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2008-12.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2009-01.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2009-02.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2009-03.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2009-04.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2009-05.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2009-06.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2009-07.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2009-08.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2009-09.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2009-10.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2009-11.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2009-12.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2010-01.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2010-02.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2010-03.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2010-04.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2010-05.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2010-06.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2010-07.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2010-08.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2010-09.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2010-10.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2010-11.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2010-12.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2011-01.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2011-02.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2011-03.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2011-04.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2011-05.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2011-06.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2011-07.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2011-08.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2011-09.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2011-10.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2011-11.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2011-12.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2012-01.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2012-02.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2012-03.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2012-04.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2012-05.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2012-06.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2012-07.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2012-08.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2012-09.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2012-10.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2012-11.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2012-12.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2013-01.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2013-02.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2013-03.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2013-04.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2013-05.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2013-06.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2013-07.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2013-08.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2013-09.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2013-10.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2013-11.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2013-12.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2014-01.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2014-02.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2014-03.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2014-04.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2014-05.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2014-06.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2014-07.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2014-08.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2014-09.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2014-10.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2014-11.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2014-12.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2015-01.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2015-02.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2015-03.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2015-04.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2015-05.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2015-06.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2015-07.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2015-08.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2015-09.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2015-10.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2015-11.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2015-12.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2016-01.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2016-02.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2016-03.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2016-04.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2016-05.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2016-06.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2016-07.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2016-08.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2016-09.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2016-10.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2016-11.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2016-12.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2017-01.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2017-02.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2017-03.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2017-04.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2017-05.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2017-06.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2017-07.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2017-08.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2017-09.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2017-10.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2017-11.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2017-12.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2018-01.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2018-02.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2018-03.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2018-04.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2018-05.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2018-06.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2018-07.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2018-08.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2018-09.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2018-10.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2018-11.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2018-12.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2019-01.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2019-02.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2019-03.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2019-04.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2019-05.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2019-06.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2019-07.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2019-08.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2019-09.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2019-10.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2019-11.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2019-12.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2020-01.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2020-02.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2020-03.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2020-04.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2020-05.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2020-06.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2020-07.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2020-08.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2020-09.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2020-10.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2020-11.h5","nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/satlantic_2020-12.h5"],"staging_bucket":"kerchunk-staging","s3_comb_ref_file":"wtk\/south_atlantic\/kerchunk_5min_ref_s3.json","az_comb_ref_file":"wtk\/south_atlantic\/kerchunk_5min_ref.json","run_name":"south_atlantic-5min-2"}
\ No newline at end of file
diff --git a/azure/pipeline/ASL/state_machine_template.json b/azure/pipeline/ASL/state_machine_template.json
new file mode 100644
index 0000000..8db9ed2
--- /dev/null
+++ b/azure/pipeline/ASL/state_machine_template.json
@@ -0,0 +1,74 @@
+{
+    "Comment": "Takes a set of s3 paths to h5 files as input and transforms them to be compatible with kerchunk. Kerchunk reference files are generated for each file for both s3 and Azure,then the combined reference files are generated. All files are uploaded to an s3 staging bucket for testing.",
+    "StartAt": "Map",
+    "States": {
+        "Map": {
+            "Type": "Map",
+            "Parameters": {
+                "ContainerOverrides": {
+                    "Command": ["python", "transform.py"],
+                    "Environment": [
+                        {
+                            "Name": "s3_file",
+                            "Value.$": "$$.Map.Item.Value"
+                        },
+                        {
+                            "Name": "staging_bucket",
+                            "Value.$": "$.staging_bucket"
+                        }
+                    ]
+                }
+            },
+            "ItemProcessor": {
+                "ProcessorConfig": {
+                    "Mode": "INLINE"
+                },
+                "StartAt": "transform-h5-files",
+                "States": {
+                    "transform-h5-files": {
+                        "Type": "Task",
+                        "Resource": "arn:aws:states:::batch:submitJob.sync",
+                        "Parameters": {
+                            "JobName": "transform-file",
+                            "JobDefinition": "arn:aws:batch:us-west-2:351672045885:job-definition/kerchunk-h5",
+                            "JobQueue": "arn:aws:batch:us-west-2:351672045885:job-queue/kerchunk-h5",
+                            "ContainerOverrides.$": "$.ContainerOverrides"
+                        },
+                        "End": true
+                    }
+                }
+            },
+            "ItemsPath": "$.s3_files",
+            "MaxConcurrency": 20,
+            "Next": "generate-references",
+            "ResultPath": null
+        },
+        "generate-references": {
+            "Type": "Task",
+            "Resource": "arn:aws:states:::batch:submitJob.sync",
+            "Parameters": {
+                "JobName": "refjob",
+                "JobDefinition": "arn:aws:batch:us-west-2:351672045885:job-definition/kerchunk-h5",
+                "JobQueue": "arn:aws:batch:us-west-2:351672045885:job-queue/kerchunk-h5",
+                "ContainerOverrides": {
+                    "Command": ["python", "gen_ref.py"],
+                    "Environment": [
+                        {
+                            "Name": "staging_bucket",
+                            "Value.$": "$.staging_bucket"
+                        },
+                        {
+                            "Name": "run_name",
+                            "Value.$": "$.run_name"
+                        },
+                        {
+                            "Name": "s3_comb_ref_file",
+                            "Value.$": "$.s3_comb_ref_file"
+                        }
+                    ]
+                }
+            },
+            "End": true
+        }
+    }
+}
diff --git a/azure/pipeline/__init__.py b/azure/pipeline/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/azure/pipeline/aws_glob_patterns.json b/azure/pipeline/aws_glob_patterns.json
new file mode 100644
index 0000000..0deedfc
--- /dev/null
+++ b/azure/pipeline/aws_glob_patterns.json
@@ -0,0 +1 @@
+{"nrel-pds-wtk":{"pr100":{"hourly":"nrel-pds-wtk\/pr100\/hourly\/*.h5","5min":"nrel-pds-wtk\/pr100\/5min\/*.h5"},"south_atlantic":{"hourly":"nrel-pds-wtk\/south_atlantic\/yearly_hr\/v1.0.0\/*.h5","5min":"nrel-pds-wtk\/south_atlantic\/monthly\/v1.0.0\/*.h5"}}}
\ No newline at end of file
diff --git a/azure/pipeline/aws_tools.py b/azure/pipeline/aws_tools.py
new file mode 100644
index 0000000..5b7f465
--- /dev/null
+++ b/azure/pipeline/aws_tools.py
@@ -0,0 +1,291 @@
+import boto3
+import ujson
+import s3fs
+from etl_tools import load_oedi_sas, gen_ref_comb
+from azure.storage.blob import ContainerClient
+from dotenv import load_dotenv
+import subprocess
+import h5py
+
+def get_tags(org, billingid, task, owner):
+    tags = [
+        {
+            'key': 'org',
+            'value': org
+        },
+        {
+            'key': 'billingid',
+            'value': billingid
+        },
+        {
+            'key': 'task',
+            'value': task
+        },
+        {
+            'key': 'owner',
+            'value': owner
+        }
+    ]
+    return tags
+
+def get_dataset(bucket, prefix=None, extension='.h5', resolution=None):
+    """
+    This is a convenience function that generates a list of s3 bucket+key paths for a given dataset
+
+    Parameters
+    ----------
+    bucket : str (required)
+        Bucket in which the dataset lives (e.g. 'nrel-pds-wtk')
+    prefix : str
+        Prefix of all files in the dataset (e.g. 'Great_Lakes')
+    extension : str
+        File extension for all files in the dataset (e.g. '.h5')
+    resolution : str
+        For WIND data only. Options are 'hourly' or '5min'
+
+    Returns
+    -------
+    files : list
+        List of all bucket+key paths to all files in the bucket subject to the provided options.
+    """
+    s3 = s3fs.S3FileSystem(anon=True)
+    with open('aws_glob_patterns.json') as f:
+        aws_glob_patterns = ujson.load(f)
+
+    if prefix and resolution:
+        files = s3.glob(aws_glob_patterns[bucket][prefix][resolution])
+    elif prefix:
+        files = s3.glob(aws_glob_patterns[bucket][prefix])
+    else:
+        files = s3.glob(aws_glob_patterns[bucket])
+    
+    files = [file for file in files if file.endswith(extension)]
+    
+    return files
+
+def get_StepFunctionRole():
+    # TODO: Add code that creates the role if it doesn't exist
+    """
+    This function obtains the StepFunctionRole to be used when creating a step function.
+
+    Parameters
+    ----------
+    None
+
+    Returns
+    -------
+    roleArn : str
+        Amazon resource number of the StepFunctionRole
+    """
+    iam = boto3.client('iam')
+    roleArn = iam.get_role(RoleName='StepFunctionRole')['Role']['Arn']
+    return roleArn
+
+def create_state_machine(name, definition='./ASL/state_machine_template.json', tags=None, region_name='us-west-2'):
+    """
+    This is a convenience function that creates or updates a state machine in AWS from the definition.
+
+    Parameters
+    ----------
+    name : str (required)
+        The name given to the state machine in AWS.
+    definition : str
+        Path to the json file that contains the ASL definition of the state machine.
+    tags : dict
+        key-value pairs for tracking aws resources. Defaults will be used if none are provided (see get_tags).
+    Returns
+    -------
+    stateMachineArn : str
+        The amazon resource number of the state machine.
+    """
+    
+    sf = boto3.client('stepfunctions', region_name=region_name)
+    
+    sms = sf.list_state_machines()['stateMachines']
+    stateMachineArn = ''
+    for sm in sms:
+        if sm['name'] == name:
+            stateMachineArn = sm['stateMachineArn']
+            break
+    
+    if not tags:
+        tags = get_tags()
+
+    with open(definition) as f:
+        if stateMachineArn:
+            sf.update_state_machine(stateMachineArn=stateMachineArn, definition=f.read())
+        else:
+            roleArn = get_StepFunctionRole()
+            stateMachineArn = sf.create_state_machine(name=name, definition=f.read(), roleArn=roleArn, tags=tags)['stateMachineArn']
+    return stateMachineArn
+
+def get_state_machine(name, region_name='us-west-2'):
+    """
+    This function gets the ARN for a state machine by name.
+
+    Parameters
+    ----------
+    name : str (required)
+        The name of the state machine in AWS.
+
+    Returns
+    -------
+    stateMachineArn : str
+        The amazon resource number of the state machine.
+    """
+    sf = boto3.client('stepfunctions', region_name=region_name)
+    sms = sf.list_state_machines()['stateMachines']
+    stateMachineArn = ''
+    for sm in sms:
+        if sm['name'] == name:
+            stateMachineArn = sm['stateMachineArn']
+            break
+    if not stateMachineArn:
+        raise Exception(f'State machine {name} not found.')
+    return stateMachineArn
+
+def create_state_machine_input(files, staging_bucket, s3_comb_ref_file, az_comb_ref_file, run_name=None, input_file='ASL/state_machine_input.json'):
+    # TODO: Check access/existence to/of staging bucket
+    """
+    This function generates the state machine input to process a dataset.
+
+    Parameters
+    ----------
+    files : list (required)
+        A list of bucket+key paths to the files of the dataset
+    staging_bucket : str
+        Name of the bucket where transformed files and json references will be written
+    s3_comb_ref_file : str
+        Key for the combined kerchunk reference file that points to the dataset in staging
+    az_comb_ref_file : str
+        Key for the combined kerchunk reference file that points to the dataset in azure
+    run_name : str
+        The name of the run. This will be used to create a json file in S3 containing the inputs needed for the run
+    input_file : str
+        A path in which to store a local copy of the json inputs needed for the run.
+    Returns
+    -------
+    input_data : str
+        A serialized copy of the input data
+    """
+    smi = {
+        's3_files': files,
+        'staging_bucket': staging_bucket,
+        's3_comb_ref_file': s3_comb_ref_file,
+        'az_comb_ref_file': az_comb_ref_file,
+        'run_name' : run_name
+    }
+    with open(input_file, 'w') as f:
+        ujson.dump(smi, f)
+
+    s3 = s3fs.S3FileSystem()
+    s3.put_file(input_file, f'{staging_bucket}/{run_name}.json')
+
+    input_data = ujson.dumps(smi)
+    return input_data
+
+def run_state_machine(name, run_name = 'sm_run', input_file='ASL/state_machine_input.json', region_name='us-west-2'):
+    sf = boto3.client('stepfunctions', region_name=region_name)
+    stateMachineArn = get_state_machine(name)
+    with open(input_file) as f:
+        input = f.read()
+    response = sf.start_execution(stateMachineArn=stateMachineArn, name=run_name, input=input)
+    return response
+
+def create_job_def(job_def_file='./ASL/job_definition.json', region_name='us-west-2'):
+    with open(job_def_file) as f:
+        job_def = ujson.load(f)
+    tags = get_tags()
+    job_def['tags'] = {}
+    for tag in tags:
+        job_def['tags'][tag['key']] = tag['value']
+    job_def['propagateTags'] = True
+    batch = boto3.client('batch', region_name=region_name)
+    response = batch.register_job_definition(**job_def)
+    return response
+
+def create_launch_templates():
+    # TODO: Need to add the 2TB and 3TB versions
+    ec2 = boto3.client('ec2')
+    LaunchTemplateNames = ['kerchunk-1TB']
+    for LaunchTemplateName in LaunchTemplateNames:
+        with open(f'./ASL/{LaunchTemplateName}.json') as f:
+            LaunchTemplateData = ujson.load(f)
+        existing_template = ec2.describe_launch_templates(Filters=[{'Name': 'launch-template-name', 'Values': [LaunchTemplateName]}])
+        if existing_template:
+            ec2.create_launch_template_version(LaunchTemplateName=LaunchTemplateName, LaunchTemplateData=LaunchTemplateData)
+        else:
+            ec2.create_launch_template(LaunchTemplateName=LaunchTemplateName, LaunchTemplateData=LaunchTemplateData)
+
+def create_cluster():
+    batch = boto3.client('batch')
+
+def create_aws_resources():
+    create_state_machine('kerchunk-h5')
+    create_job_def()
+
+def process_h5_dataset(files, staging_bucket, s3_comb_ref_file, az_comb_ref_file, state_machine_name='kerchunk_h5', region_name='us-west-2'):
+    smi = create_state_machine_input(files, staging_bucket, s3_comb_ref_file, az_comb_ref_file)
+    stateMachineArn = get_state_machine(state_machine_name)
+    sf = boto3.client('stepfunctions', region_name=region_name)
+    sf.start_execution(stateMachineArn=stateMachineArn, input=smi)
+
+def copy_s3_dataset_to_azure(files, staging_bucket, dry_run=False):
+    CONTAINER_NAME = 'oedi'
+    sas = load_oedi_sas()
+    load_dotenv()   # Store AWS credentials in .env file
+    cmd = [
+        'azcopy',
+        'copy',
+        f'https://s3.us-west-2.amazonaws.com/{staging_bucket}',
+        f'https://nrel.blob.core.windows.net/{CONTAINER_NAME}?{sas}',
+        '--include-path',
+        ';'.join(files)
+    ]
+
+    if dry_run:
+        cmd.append('--dry-run')
+
+    subprocess.run(cmd)
+
+def create_combined_ref(files, staging_bucket, comb_ref_file=None, remote_protocol='s3'):
+    s3 = s3fs.S3FileSystem()
+    f = h5py.File(s3.open(f'{staging_bucket}/{files[0]}'))
+    identical_dims = list(f.attrs['identical_dims'])
+    if remote_protocol == 's3':
+        ref_files = [file.replace('.h5', '_s3.json') for file in files]
+    elif remote_protocol == 'abfs':
+        ref_files = [file.replace('.h5', '.json') for file in files]
+    else:
+        raise Exception('remote_protocol must be "s3" or "abfs"')
+    refs = []
+    for ref_file in ref_files:
+        with s3.open(f'{staging_bucket}/{ref_file}', 'rb') as f:
+            refs.append(ujson.load(f))
+    ref_comb = gen_ref_comb(refs, identical_dims=identical_dims, remote_protocol=remote_protocol)
+    temp_file = 'temp.json'
+    with open(temp_file, 'wb') as f:
+        f.write(ujson.dumps(ref_comb).encode())
+    s3.put_file(temp_file, f's3://{staging_bucket}/{comb_ref_file}')
+    if remote_protocol=='abfs':
+        sas = load_oedi_sas()
+        CONTAINER_NAME = 'oedi'
+        dest = f'https://nrel.blob.core.windows.net/{CONTAINER_NAME}/{comb_ref_file}?{sas}'
+        subprocess.run(['azcopy', 'copy', temp_file, dest])
+
+def copy_s3_file_to_azure(source, dest, sas=None, container='oedi'):
+    s3 = s3fs.S3FileSystem()
+    if not sas:
+        sas = load_oedi_sas()
+    client = ContainerClient.from_container_url(f'https://nrel.blob.core.windows.net/{container}?{sas}')
+    blob = client.get_blob_client(dest)
+    with s3.open(source, 'rb') as f:
+        blob.upload_blob(f.read())
+
+def copy_local_file_to_azure(source, dest, sas=None, container='oedi'):
+    if not sas:
+        sas = load_oedi_sas()
+    client = ContainerClient.from_container_url(f'https://nrel.blob.core.windows.net/{container}?{sas}')
+    blob = client.get_blob_client(dest)
+    with open(source, 'rb') as f:
+        blob.upload_blob(f.read())
diff --git a/azure/pipeline/azure_tools.py b/azure/pipeline/azure_tools.py
new file mode 100644
index 0000000..c8568f1
--- /dev/null
+++ b/azure/pipeline/azure_tools.py
@@ -0,0 +1,34 @@
+import planetary_computer
+
+def get_fs(account='nrel', container='oedi'):
+    return planetary_computer.get_adlfs_filesystem(account, container)
+
+def get_size(path, units='B'):
+    fs = get_fs()
+    size = fs.du(path, total=True)
+    if units=='B':
+        pass
+    elif units=='kB':
+        size = size * 10 ** -3
+    elif units=='MB':
+        size = size * 10 ** -6
+    elif units=='GB':
+        size = size * 10 ** -9
+    elif units=='TB':
+        size = size * 10 ** -12
+    elif units=='PB':
+        size = size * 10 ** -15
+    elif units=='kiB':
+        size = size * 2 ** -10
+    elif units=='MiB':
+        size = size * 2 ** -20
+    elif units=='GiB':
+        size = size * 2 ** -30
+    elif units=='TiB':
+        size = size * 2 ** -40
+    elif units=='PiB':
+        size = size * 2 ** -50
+    else:
+        raise NotImplementedError(f'Units "{units}" not recognized.')
+
+    return size
diff --git a/azure/pipeline/blob_access_example.ipynb b/azure/pipeline/blob_access_example.ipynb
new file mode 100644
index 0000000..ba24a9c
--- /dev/null
+++ b/azure/pipeline/blob_access_example.ipynb
@@ -0,0 +1,83 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Deleting blobs\n",
+    "\n",
+    "from azure.storage.blob import ContainerClient\n",
+    "from etl_tools import load_oedi_sas\n",
+    "\n",
+    "sas_token = load_oedi_sas() # Loads oedi rw sas token\n",
+    "client = ContainerClient.from_container_url(f'https://nrel.blob.core.windows.net/oedi?{sas_token}')\n",
+    "for blob in client.list_blobs():\n",
+    "    if \"wtk\" in blob.name and 'test' in blob.name:\n",
+    "        print(blob.name)\n",
+    "        #client.delete_blob(blob)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Copying blobs within Azure\n",
+    "\n",
+    "source_blob = client.get_blob_client('wtk/wtk_bangladesh_hourly_ref.json')\n",
+    "dest_blob = client.get_blob_client('wtk/bangladesh/kerchunk_hourly_ref.json')\n",
+    "dest_blob.start_copy_from_url(source_blob.url)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Copy objects from S3 into Azure BLOB storage\n",
+    "\n",
+    "import s3fs\n",
+    "from azure.storage.blob import ContainerClient\n",
+    "from etl_tools import load_oedi_sas\n",
+    "\n",
+    "def copy_file_to_azure(source, dest, sas=None, container='oedi'):\n",
+    "    s3 = s3fs.S3FileSystem()\n",
+    "    if not sas:\n",
+    "        sas = load_oedi_sas()\n",
+    "\n",
+    "    client = ContainerClient.from_container_url(f'https://nrel.blob.core.windows.net/{container}?{sas}')\n",
+    "    blob = client.get_blob_client(dest)\n",
+    "    with s3.open(source, 'rb') as f:\n",
+    "        blob.upload_blob(f.read())\n",
+    "\n",
+    "copy_file_to_azure('s3://kerchunk-staging/test.txt', 'wtk/test/test.txt')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".env",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/azure/pipeline/etl_tools.py b/azure/pipeline/etl_tools.py
new file mode 100644
index 0000000..61e86bb
--- /dev/null
+++ b/azure/pipeline/etl_tools.py
@@ -0,0 +1,381 @@
+import h5py
+import pandas as pd
+import numpy as np
+from kerchunk.hdf import SingleHdf5ToZarr
+from kerchunk.combine import MultiZarrToZarr
+import ujson
+import subprocess
+import planetary_computer
+import os
+from time import time
+import s3fs
+import logging
+
+def time_index_bytestring_to_float(dset):
+    t = pd.Series(dset)
+    t = t.str.decode('utf8')
+    t = t.str.split('+', expand=True)[0]
+    t = np.array(t,dtype=np.datetime64)
+    t = t.astype('int')
+    return t
+
+def copy_attrs(obj1, obj2):
+    # Copy the attributes from obj1 to obj2, which may be h5 File objects or h5 dataset objects
+    for key in obj1.attrs.keys():
+        obj2.attrs[key] = obj1.attrs[key]
+
+def copy_dataset(f_in, f_out, var, mem_limit_GB=80):
+    # Determine sizes of slices to read
+    dtype_size = f_in[var].dtype.itemsize
+    max_read_size = mem_limit_GB * 10 ** 9 # Read 80 GB at a time
+    time_index_read_size = f_in[var].shape[0]  # Read all time values
+    gid_index_read_size = int(max_read_size / time_index_read_size // dtype_size) # Number of sites to read at a time
+
+    # Create slices
+    end = f_in[var].shape[1]
+    starts = np.arange(0, end, gid_index_read_size)
+    stops = starts[1:]
+    stops = np.append(stops, end)
+    
+    # Copy slices
+    for start, stop in zip(starts, stops):
+        f_out[var][:, start:stop] = f_in[var][:, start:stop]
+
+def elapsed_time(st):
+    return f'{(time() - st) / 60:.2f} min'
+
+def load_oedi_sas():
+    # read/write sas token must be stored in a plain text file located at $HOME/.sas or oedi_azure/.sas
+    home = os.path.expanduser('~')
+    if os.path.isfile(f'{home}/.sas'):
+        path = f'{home}/.sas'
+    elif os.path.isfile('./.sas'):
+        path = './.sas'
+    elif os.path.isfile('../.sas'):
+        path = '../.sas'
+    else:
+        path = None
+    
+    if path:
+        with open(path) as f:
+            sas = f.read()
+    else:
+        raise Exception('.sas file not found. Please save your read/write .sas token to a file called .sas located in the oedi_azure directory.')
+
+    return sas
+
+def transform_wtk_h5_file(in_file, out_file, chunk_size=2, weeks_per_chunk=None, in_file_on_s3=False):
+    # This is an updated version of transform_h5_file, designed for wtk. wtk does not have a nice rectangular coordinate grid,
+    # so the data will be left in 2 dims rather than be converted to 3 dims.
+
+    # h5_file should be a path to a local h5 file. The file will be opened in write-mode, transformed and then closed.
+    # chunk_size is the desired size of each chunk in MiB
+    # weeks_per_chunk determines the length of chunks in the time_index dimension
+
+    # Summary of data transformations:
+
+    # 1. time_index is converted from byte-string to int (when read by xarray, this will automatically convert to np.datetime64)
+    # 2. A gid dataset is created to index the locations
+    # 3. time_index and gid are converted to dimension scales
+    # 4. Each variable is rechunked so that we will have consistent chunk sizes accross all files
+    # 5. The dimension scales are attached to each variable's dimensions
+    # 6. The scale_factor metadata is inverted (new_sf = 1 / old_sf)
+    # 7. The meta variable is unpacked
+
+    # Notes:
+    # Once again, the download/upload steps are what will take all of the time here. To scale up to wtk, this transformation
+    # should either happen on Eagle (where the data are already local) or the transformation should be containerized for use
+    # with AWS batch.
+
+    # Begin logging
+    st = time()
+    file_name = out_file.split('/')[-1]
+    logging.info(f'{elapsed_time(st)} - {file_name}: Starting transformation.')
+
+    # Open input file
+    if in_file_on_s3:
+        s3 = s3fs.S3FileSystem()
+        f_in = h5py.File(s3.open(in_file))
+    else:
+        f_in = h5py.File(in_file, 'r')
+    
+    # Delete output file if it exists, and then create it (note that 'w' mode for h5py would be better, but is unreliable)
+    if os.path.exists(out_file):
+        os.remove(out_file)
+    f_out = h5py.File(out_file, 'a')
+
+    # Copy file attrs
+    copy_attrs(f_in, f_out)
+    logging.info(f'{elapsed_time(st)} - {file_name}: File attrs copied!')
+
+    # Get the length of time_index and coordinates
+    time_len = f_in['time_index'].len()
+    nloc = len(f_in['coordinates'])
+
+    # Convert time_index from  bytes to float.
+    t = time_index_bytestring_to_float(f_in['time_index'])
+
+    # Create time_index variable in new file. 'units' metadata required for xarray to interpret as datetime.
+    f_out.create_dataset('time_index', data=t)
+    copy_attrs(f_in['time_index'], f_out['time_index'])
+    f_out['time_index'].attrs['units'] = b'seconds since 1970-01-01'
+
+    # Create gid variable
+    f_out.create_dataset('gid', data=np.arange(nloc, dtype=np.int32), fillvalue=-1)
+    logging.info(f'{elapsed_time(st)} - {file_name}: gid created.')
+
+    # Convert to dimension scales
+    f_out['time_index'].make_scale()
+    f_out['gid'].make_scale()
+
+    # Determine time_index chunksize
+    time_step = t[1] - t[0]
+    if not weeks_per_chunk:
+        if time_step == 5 * 60:     # 5min data
+            weeks_per_chunk = 1
+        elif time_step == 10 * 60:  # 10min data
+            weeks_per_chunk = 2
+        elif time_step == 15 * 60:  # 15min data
+            weeks_per_chunk = 3
+        elif time_step == 60 * 60:  # hourly data
+            weeks_per_chunk = 12
+        else:
+            weeks_per_chunk = 8     # other resolution
+            logging.info(f'Warning: Non-standard resolution of {time_step / 60} min detected.')
+
+    time_index_chunk_len = int(min(weeks_per_chunk * 7 * 24 * 60 * 60 / time_step, time_len))
+
+    logging.info(f'{elapsed_time(st)} - {file_name}: time_index and gid created')
+
+    # Get var names
+    vars = [var for var in f_in.keys() if var not in ['meta', 'time_index', 'latitude', 'longitude', 'gid', 'coordinates']]
+
+    # Loop over vars copying them to the new file
+    for var in vars:
+        logging.info(f'{elapsed_time(st)} - {file_name}: Processing {var}...')
+        
+        # Check dims
+        if not f_in[var].shape[0] == time_len:
+            raise Exception(f'Dim 0 of {var} has different length than time_index.')
+        if not f_in[var].shape[1] == nloc:
+            raise Exception(f'Dim 1 of {var} has different length than gid.')
+
+        # Determine location chunk size
+        element_size = f_in[var].dtype.itemsize    # size of single element in bytes
+        gid_chunk_len = int(min(chunk_size * 2 ** 20 / time_index_chunk_len // element_size, nloc))
+
+        # Create dataset in new file
+        chunks=(time_index_chunk_len, gid_chunk_len)
+        f_out.create_dataset(var, shape=f_in[var].shape, dtype=f_in[var].dtype, chunks=chunks)
+        copy_dataset(f_in, f_out, var)
+        copy_attrs(f_in[var], f_out[var])
+
+        # Add chunks attribute
+        f_out[var].attrs['chunks'] = chunks
+    
+        # Fix scale_factor
+        if 'scale_factor' in f_out[var].attrs.keys():
+            f_out[var].attrs['scale_factor'] = 1 / f_out[var].attrs['scale_factor']
+
+        # Attach scales to the dims
+        f_out[var].dims[0].attach_scale(f_out['time_index'])
+        f_out[var].dims[1].attach_scale(f_out['gid'])
+
+        # Progress report
+        logging.info(f'{elapsed_time(st)} - {file_name}: Done!')
+
+    logging.info(f'{elapsed_time(st)} - {file_name}: All variables transformed!')
+
+    # Start tracking identical_dims (anything with only a gid dimension)
+    identical_dims = ['gid']
+
+    # Unpack metadata variables
+    for var in f_in['meta'].dtype.names:
+        logging.info(f'{elapsed_time(st)} - {file_name}: Unpacking {var} from meta...')
+        element_size = f_in['meta'][var].dtype.itemsize
+        gid_chunk_len = min(chunk_size * 2 ** 20 // element_size, nloc)
+        chunks = (gid_chunk_len,)
+        f_out.create_dataset(var, data=f_in['meta'][var], chunks=chunks)
+
+        # Add chunks attribute
+        f_out[var].attrs['chunks'] = chunks
+
+        # Attach dimension scales to the dimensions
+        f_out[var].dims[0].attach_scale(f_out['gid'])
+
+        # Append to identical_dims
+        identical_dims.append(var)
+        
+        logging.info(f'{elapsed_time(st)} - {file_name}: Done!')
+    
+    logging.info(f'{elapsed_time(st)} - {file_name}: meta unpacked!')
+
+    # Add identical_dims to file metadata so we can pass to kerchunk later
+    f_out.attrs['identical_dims'] = identical_dims
+
+    # Close the datasets to ensure changes are written
+    f_in.close()
+    f_out.close()
+
+    logging.info(f'{elapsed_time(st)} - {file_name}: Done with transormations!')
+
+    return
+
+def transform_sup3rcc_h5_file(infile, outfile):
+    # This function is designed to transform h5 files for the Sup3rcc dataset, to prepare them for use with Kerchunk.
+    # infile and outfile should both be local file paths. infile is the original Sup3rcc h5 file. outfile will be created
+    # by copying and transforming the data from infile.
+
+    # The Sup3rcc data uses a nice rectangular, evenly-spaced grid of lon/lat coordinates. This allowed for easy transformation
+    # from 2 dimensions to 3 dimensions, which results in improved user experience when loading the data with xarray.
+
+    # Summary of data transformations:
+
+    # 1. time_index is converted from byte-string to int (when read by xarray, this will automatically convert to np.datetime64)
+    # 2. latitude and longitude are given their own datsets
+    # 3. time_index, latitude and longitude are converted to dimension scales
+    # 4. Each variable is reshaped from 2 dims (time_index, location) to 3 dims (time_index, latitude, longitude)
+    # 5. Each variable is rechunked, resulting in about 1.8 MB per chunk
+    # 6. The dimension scales are attached to each variable's dimensions
+    # 7. The scale_factor metadata is inverted (new_sf = 1 / old_sf)
+
+    # TODO
+    # 1. Future iterations of this transformation should modify the original h5 file, rather than copying the contents to a new file
+    # 2. Rechunking should be automated (currently the choice of chunk size is specific to the Sup3rcc dataset) 
+
+    # Open infile, create outfile
+    f1 = h5py.File(infile)
+    f2 = h5py.File(outfile, 'a')
+
+    # Copy attributes
+    for attr in f1.attrs.keys():
+        f2.attrs[attr] = f1.attrs[attr]
+
+    # Get the length of time_index
+    time_len = f1['time_index'].len()
+
+    # Convert time_index from  bytes to float.
+    t = pd.Series(f1['time_index'])
+    t = t.str.decode('utf8')
+    t = t.str.split('+', expand=True)[0]
+    t = np.array(t,dtype=np.datetime64)
+    t = t.astype('int')
+
+    # Grab the lat and lon coordinates from meta
+    lat = f1['meta']['latitude'].reshape(650, 1475)[:, 0]
+    lon = f1['meta']['longitude'].reshape(650, 1475)[0, :]
+
+    # Add time_index dimension to the temp dataset. 'units' metadata required for xarray to interpret as datetime.
+    f2.create_dataset('time_index', data=t)
+    f2['time_index'].attrs['units'] = b'seconds since 1970-01-01'
+
+    # Add lon/lat dimensions to temp dataset
+    f2.create_dataset('latitude', data=lat)
+    f2.create_dataset('longitude', data=lon)
+
+    # Convert them to dimension scales
+    f2['time_index'].make_scale()
+    f2['latitude'].make_scale()
+    f2['longitude'].make_scale()
+
+    logging.info('Dimension scales created.')
+
+    # Get var names
+    vars = [var for var in f1.keys() if var not in ['meta', 'time_index']]
+
+    # Loop over the variables and transfer them to the temp data set
+    for var in vars:
+        # Check dimensions
+        time_len = f1['time_index'].len()
+        assert f1[var].shape[0] == time_len
+        assert f1[var].shape[1] == 650 * 1475
+
+        # Copy data, reshape it and rechunk it. Now we have 3 dims, time, lat, lon
+        # Note that chunks=True will result in auto-chunking. This doesn't really work when
+        # data sets have different lengths for the time_index (as is the case for Sup3rcc)
+        chunks = (24, 130, 295)
+        f2.create_dataset(var, data=f1[var][:].reshape(time_len, 650, 1475), chunks=chunks)  # Results in 1.8 MB chunks for pressure data
+        logging.info(f'{var} reshaped and transferred to new dataset.')
+
+        # Add attributes
+        for attr in f1[var].attrs.keys():
+            if attr == 'scale_factor':
+                f2[var].attrs[attr] = 1 / f1[var].attrs[attr]
+            elif attr != 'chunks':
+                f2[var].attrs[attr] = f1[var].attrs[attr]
+        f2[var].attrs['chunks'] = chunks
+
+        # Label the dimensions of the main variable
+        f2[var].dims[0].label = 'time_index'
+        f2[var].dims[1].label = 'latitude'
+        f2[var].dims[2].label = 'longitude'
+
+        # Attach dimension scales to the dimensions
+        f2[var].dims[0].attach_scale(f2['time_index'])
+        f2[var].dims[1].attach_scale(f2['latitude'])
+        f2[var].dims[2].attach_scale(f2['longitude'])
+
+        logging.info(f'Dimension scales attached to {var}.')
+
+    # Add metadata variables
+    for var in f1['meta'].dtype.names:
+        if var not in ['latitude', 'longitude']:
+            chunks = (130, 295)
+            f2.create_dataset(var, data=f1['meta'][var].reshape(650, 1475), chunks=chunks)
+
+            # Add chunks attribute
+            f2[var].attrs['chunks'] = chunks
+
+            # Label the dimensions of the main variable
+            f2[var].dims[0].label = 'latitude'
+            f2[var].dims[1].label = 'longitude'
+
+            # Attach dimension scales to the dimensions
+            f2[var].dims[0].attach_scale(f2['latitude'])
+            f2[var].dims[1].attach_scale(f2['longitude'])
+
+    # Close the new dataset to ensure changes are written
+    f1.close()
+    f2.close()
+
+    return
+
+def gen_ref(local_path, storage_path, ref_file=None):
+    # local_path is the file to be analyzed. storage_path is the path to the same file in cloud storage. ref_file is
+    # an optional argument that can be used to save the kerchunk references as a json.\
+
+    with open(local_path, 'rb') as f:
+        ref = SingleHdf5ToZarr(f, storage_path, inline_threshold=300).translate()
+    
+    if ref_file:
+        with open(ref_file, 'wb') as f:
+            f.write(ujson.dumps(ref).encode())
+
+    return ref
+
+def gen_ref_comb(refs, ref_file=None, concat_dims=['time_index'], identical_dims=None, remote_protocol='abfs'):
+    # This function takes a list of kerchunk references and combines them into a single reference.
+    # For sup3rcc, we used identical_dims=['country', 'county', 'eez', 'elevation', 'latitude', 'longitude', 'offshore', 'state', 'timezone'],
+    # however, None would probably have been fine...
+    # Generate combo reference
+    
+    if remote_protocol not in ['s3', 'abfs']:
+        raise NotImplementedError()
+    
+    kwargs = {
+        'remote_protocol': remote_protocol,
+        'concat_dims': concat_dims,
+        'identical_dims': identical_dims
+    }
+    if remote_protocol == 'abfs':
+        token = planetary_computer.sas.get_token('nrel', 'oedi').token
+        kwargs['remote_options'] = {'account_name': 'nrel', "credential": token}
+
+    ref_comb = MultiZarrToZarr(refs, **kwargs).translate()
+
+    # Write to json file
+    if ref_file:
+        with open(ref_file, 'wb') as f:
+            f.write(ujson.dumps(ref_comb).encode())
+
+    return ref_comb
diff --git a/azure/pipeline/hpc_gen_refs.py b/azure/pipeline/hpc_gen_refs.py
new file mode 100644
index 0000000..c629023
--- /dev/null
+++ b/azure/pipeline/hpc_gen_refs.py
@@ -0,0 +1,60 @@
+import ujson
+from etl_tools import gen_ref_comb, load_oedi_sas
+import xarray as xr
+import planetary_computer
+import os
+import sys
+import logging
+
+# Get input
+# First arg should be the path for the combined ref file
+# Next should be any number of paths to individual ref files
+
+args = sys.argv
+comb_ref_file = args[1]
+ref_paths = args[2:]
+
+USER = os.getenv('USER')
+CONTAINER_NAME = 'oedi'
+
+az_path = comb_ref_file.replace(f'/scratch/{USER}/', '')
+
+if 'sup3rcc' in ref_paths[0]:
+    DATASET = 'sup3rcc'
+    identical_dims = ['country', 'county', 'eez', 'elevation', 'latitude', 'longitude', 'offshore', 'state', 'timezone']
+elif 'WIND' in ref_paths[0]:
+    DATASET = 'wtk'
+    az_path = az_path.replace('WIND/', 'wtk/')
+    # Open one dataset to get the identical_dims attribute
+    token = planetary_computer.sas.get_token('nrel', CONTAINER_NAME).token
+    ds = xr.open_dataset(
+        "reference://", engine="zarr",
+        backend_kwargs={
+            "storage_options": {
+                "fo": ref_paths[0],
+                "remote_protocol": "abfs",
+                "remote_options": {'account_name': 'nrel', "credential": token}
+            },
+            "consolidated": False,
+        }
+    )
+    identical_dims = ds.attrs['identical_dims']
+else:
+    raise NotImplementedError('The only implemented Eagle datasets are sup3rcc and WIND.')
+
+logging.info(f'Identical dims: {identical_dims}')
+
+# Open all reference files
+refs = []
+for rp in ref_paths:
+    with open(rp, 'rb') as f:
+        refs.append(ujson.load(f))
+
+# Generate the combined reference file
+gen_ref_comb(refs, ref_file=comb_ref_file, identical_dims=identical_dims)
+
+# Send to Azure
+sas_token = load_oedi_sas()
+blob_address = f'https://nrel.blob.core.windows.net/{CONTAINER_NAME}'
+dest = f'{blob_address}/{az_path}?{sas_token}'
+os.system(f'azcopy cp "{comb_ref_file}" "{dest}"')
diff --git a/azure/pipeline/hpc_migration.ipynb b/azure/pipeline/hpc_migration.ipynb
new file mode 100644
index 0000000..7e2ca2c
--- /dev/null
+++ b/azure/pipeline/hpc_migration.ipynb
@@ -0,0 +1,154 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In this notebook, we demonstrate how to use this package to migrate h5 data from Eagle to Azure."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "First, we need to identify the files that we want to migrate. On Eagle, data are located in the `/datasets` directory."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Number of files: 2\n"
+     ]
+    }
+   ],
+   "source": [
+    "from pipeline.hpc_tools import get_dataset\n",
+    "\n",
+    "dataset = 'WIND/kazakhstan'\n",
+    "resolution = '15min'\n",
+    "\n",
+    "files = get_dataset(dataset, resolution=resolution)\n",
+    "\n",
+    "print(f'Number of files: {len(files)}')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Next, we launch a series of jobs to copy and transform each file in the set."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Starting 1 transformation jobs.\n",
+      "Starting job to copy dataset to Azure.\n",
+      "Starting job to combine references.\n",
+      "All jobs scheduled!\n"
+     ]
+    }
+   ],
+   "source": [
+    "from pipeline.hpc_tools import process_h5_dataset\n",
+    "\n",
+    "comb_ref_file = f'/scratch/mheine/{dataset}/kerchunk_{resolution}_ref.json'\n",
+    "job_ids = process_h5_dataset(files, comb_ref_file=comb_ref_file)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "After a run, use scan_err to identify any file transformation jobs that failed."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Timeouts: 1\n",
+      "Other errors: 0\n",
+      "Total Files: 1\n",
+      "The smallest file that timed out in WIND/conus/v1.1.0 was 1537 GB.\n"
+     ]
+    }
+   ],
+   "source": [
+    "from pipeline.hpc_tools import scan_err\n",
+    "dataset = 'WIND/conus/v1.1.0'\n",
+    "resolution = 'hourly'\n",
+    "files, timeout_redos, other_redos = scan_err(dataset=dataset, resolution=resolution)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "process_h5_redos allows you to launch a partial job. `files` should be all files in the dataset, and `redos` should be a subset of them that you want to reprocess."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Starting 1 transformation jobs.\n",
+      "Starting job to copy dataset to Azure.\n",
+      "Starting job to combine references.\n",
+      "All jobs scheduled!\n"
+     ]
+    }
+   ],
+   "source": [
+    "from pipeline.hpc_tools import process_h5_redos\n",
+    "\n",
+    "comb_ref_file = f'/scratch/mheine/{dataset}/kerchunk_{resolution}_ref.json'\n",
+    "job_ids = process_h5_redos(files, timeout_redos + other_redos, comb_ref_file=comb_ref_file)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".env",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/azure/pipeline/hpc_process_file.py b/azure/pipeline/hpc_process_file.py
new file mode 100644
index 0000000..0aa948f
--- /dev/null
+++ b/azure/pipeline/hpc_process_file.py
@@ -0,0 +1,33 @@
+import sys
+import os
+from etl_tools import transform_wtk_h5_file, transform_sup3rcc_h5_file, gen_ref
+from hpc_tools import construct_paths
+from time import time
+import logging
+
+# Start timer
+start_time = time()
+
+CONTAINER_NAME = 'oedi'
+USER = os.getenv('USER')
+
+# Get input
+args = sys.argv
+if len(args) != 2:
+    raise Exception('Must provide exactly one file path.')
+source_path = args[1]
+
+# Construct paths
+file_name, job_name, job_dir, ref_file, az_path = construct_paths(source_path)
+scratch_path = f'{job_dir}{file_name}'
+if 'WIND' in source_path:
+    transform_wtk_h5_file(source_path, scratch_path)
+elif 'sup3rcc' in source_path:
+    transform_sup3rcc_h5_file(source_path, scratch_path)
+else:
+    raise NotImplementedError(f'The only Eagle datasets that have been implemented are WIND and sup3rcc.')
+logging.info(f'{(time() - start_time) / 60:.2f} min: {file_name} transformed.')
+
+# Generate references
+gen_ref(scratch_path, f'abfs://{CONTAINER_NAME}/{az_path}', ref_file=ref_file)
+logging.info(f'{(time() - start_time) / 60:.2f} min: {job_name} references generated.')
diff --git a/azure/pipeline/hpc_to_azure.py b/azure/pipeline/hpc_to_azure.py
new file mode 100644
index 0000000..7d5a1a1
--- /dev/null
+++ b/azure/pipeline/hpc_to_azure.py
@@ -0,0 +1,17 @@
+import planetary_computer
+import sys
+import subprocess
+from etl_tools import load_oedi_sas
+import os
+
+args = sys.argv
+
+blob_address = 'https://nrel.blob.core.windows.net/oedi'
+sas_token = load_oedi_sas()
+
+for arg in args[1:]:
+    source, dest = arg.split(':')
+    source = f"'{source}'"
+    dest = f"'{blob_address}/{dest}?{sas_token}'"
+
+    os.system(f'azcopy copy {source} {dest} --overwrite ifSourceNewer')
diff --git a/azure/pipeline/hpc_tools.py b/azure/pipeline/hpc_tools.py
new file mode 100644
index 0000000..996c513
--- /dev/null
+++ b/azure/pipeline/hpc_tools.py
@@ -0,0 +1,470 @@
+import os
+import h5py
+import subprocess
+import math
+from glob import glob
+import re
+import logging
+
+def run_job(job_file):
+    job_submission = subprocess.run(['sbatch', job_file], capture_output=True)
+    output = job_submission.stdout.decode()
+    if 'Submitted batch job ' in output:
+        jobid = output.split()[3]
+    else:
+        jobid = 0
+        logging.error(f'Job submission failure: {job_submission.stderr.decode()}')
+    return jobid
+
+def cancel_jobs(job_ids):
+    for job_id in job_ids:
+        subprocess.run(['scancel', job_id])
+
+def construct_paths(file):
+    # Need username to access scratch
+    user = os.getenv('USER')
+    
+    file_name = file.split('/')[-1]
+    job_name = file_name.replace('.h5', '')
+    job_dir = file.replace('/datasets', f'/scratch/{user}').replace(file_name, '')
+    ref_file = f'{job_dir}{job_name}.json'
+
+    if 'WIND' in file:
+        az_path = file.replace('/datasets/WIND', 'wtk')
+    elif 'sup3rcc' in file:
+        az_path = file.replace('/datasets/', '')
+    else:
+        raise NotImplementedError(f'The only Eagle datasets that have been implemented are WIND and sup3rcc.')
+
+    return file_name, job_name, job_dir, ref_file, az_path
+
+def get_dep_str(dependency):
+    if not isinstance(dependency, (list, tuple)):
+        dependency = [dependency, ]
+
+    return '#SBATCH --dependency=afterok:' + ':'.join([str(id) for id in dependency])
+
+def get_dataset(dataset, resolution=None):
+    files = []
+    if 'WIND' in dataset:
+        subsets = ['North_Atlantic', 'gulf_of_mexico']
+        subsets2 = ['india']
+        if any([subset in dataset for subset in subsets]):
+            if resolution == 'hourly':
+                files = glob(f'/datasets/{dataset}/yearly_hr/*.h5')
+            elif resolution == '5min':
+                files = glob(f'/datasets/{dataset}/yearly/*.h5')
+        elif any([subset in dataset for subset in subsets2]):
+            if resolution == '5min':
+                files = glob(f'/datasets/{dataset}/*.h5')
+            else:
+                files = []
+        else:
+            if resolution == 'hourly':
+                files = glob(f'/datasets/{dataset}/*.h5')
+            elif resolution == '5min':
+                files = glob(f'/datasets/{dataset}/*/*.h5')
+            else:   # 10min and 15min resolutions
+                files = glob(f'/datasets/{dataset}/*.h5')
+    return files
+
+def gen_hpc_single_job(file, job_dir, job_name, mem_GB=None, time_limit_hrs=4, debug=False):
+
+    # Get bash path
+    bash_path = os.popen('which bash').read().replace('\n', '')
+
+    # Construct paths
+    file_name, job_name, job_dir, ref_file, az_path = construct_paths(file)
+
+    # Get user
+    user = os.getenv('USER')
+
+    # Set parameters
+    nodes = 1
+    ntasks = 1
+
+    # Create job file paths
+    job_file = f'{job_dir}{job_name}.sh'
+    output_file = f'{job_dir}{job_name}_out'
+    error_file = f'{job_dir}{job_name}_err'
+
+    # Add debug partition if desired
+    if debug:
+        add_debug = '#SBATCH --partition=debug'
+        time_limit_hrs = 1
+    else:
+        add_debug = ''
+
+    if mem_GB:
+        add_mem = f'#SBATCH --mem={mem_GB}GB'
+    else:
+        add_mem = ''
+
+    with open(job_file, 'w') as f:
+        # Write SBATCH inputs
+        f.write(
+f"""#!{bash_path}
+#SBATCH --job-name='{job_name}'
+#SBATCH --nodes={nodes}
+#SBATCH --ntasks={ntasks}
+#SBATCH --time={time_limit_hrs:.0f}:00:00
+#SBATCH -o {output_file}
+#SBATCH -e {error_file}
+#SBATCH --export=ALL
+#SBATCH --account=oedi
+{add_mem}
+{add_debug}
+
+#------------------
+
+cd /scratch/$USER
+module load conda
+conda activate .env2
+srun python /home/{user}/oedi_azure/pipeline/hpc_process_file.py {file}
+"""
+        )
+
+    return job_file
+
+def gen_hpc_combine_refs_job(comb_ref_file, ref_files, time_limit_hrs=4, dependency=None, debug=False, py_file='/home/mheine/oedi_azure/pipeline/hpc_gen_refs.py'):
+    bash_path = os.popen('which bash').read().replace('\n', '')
+    
+    comb_ref_file_name = comb_ref_file.split('/')[-1]
+    job_dir = comb_ref_file.replace(comb_ref_file_name, '')
+    job_name = comb_ref_file_name.replace('.json', '')
+    job_file = f'{job_dir}{job_name}.sh'
+
+    # Add dependency if any
+    if dependency:
+        add_dependency = get_dep_str(dependency)
+    else:
+        add_dependency = ''
+
+    # Add debug partition if desired
+    if debug:
+        add_debug = '#SBATCH --partition=debug'
+        time_limit_hrs = 1
+    else:
+        add_debug = ''
+
+    # Create job file
+    with open(job_file, 'w') as f:
+        # Write SBATCH inputs
+        f.write(
+f"""#!{bash_path}
+#SBATCH --job-name='{job_name}'
+#SBATCH --nodes=1
+#SBATCH --ntasks=1
+#SBATCH --time={time_limit_hrs:.0f}:00:00
+#SBATCH -o {job_dir}{job_name}_out
+#SBATCH -e {job_dir}{job_name}_err
+#SBATCH --export=ALL
+#SBATCH --account=oedi
+{add_dependency}
+{add_debug}
+
+#------------------
+cd /scratch/$USER
+module load conda
+conda activate .env2
+srun python {py_file} {comb_ref_file} {' '.join(ref_files)}
+
+"""
+        )
+    return job_file
+
+def gen_hpc_to_azure_job(files, transformed_files, az_paths, dependency=None, transfer_speed=1500, debug=False, py_file='/home/mheine/oedi_azure/pipeline/hpc_to_azure.py'):
+    # Transfer speed in Mb/s
+    bash_path = os.popen('which bash').read().replace('\n', '')
+
+    first_file_name = transformed_files[0].split('/')[-1]
+    job_dir = transformed_files[0].replace(first_file_name, '')
+    match = re.search(r'/\d\d\d\d/$', job_dir)
+    if match:
+        year = match.group(0)
+        job_dir = job_dir.replace(year, '/')
+    job_name = 'hpc_to_azure'
+    existing_job_files = glob(job_dir + 'hpc_to_azure*.sh')
+    if existing_job_files:
+        job_name += f'_{len(existing_job_files) + 1}'
+    job_file = f'{job_dir}{job_name}.sh'
+
+    # Estimate time requirements
+    total_bytes = 0
+    for file in files:
+        total_bytes += os.stat(file).st_size
+    
+    time_factor = 1.5   # Provide extra time in case things move a little slower than usual
+    time_required_hrs = math.ceil(time_factor * total_bytes * 8 * 10 ** -6 / transfer_speed / 60 / 60)
+    if time_required_hrs > 240:
+        logging.info('Warning: Transfer job is estimated to take longer than the maximum of 240 hrs.')
+        time_required_hrs = 240
+
+    # Create transfer args
+    # <source>:<destination>
+    transfer_args = [f'{transformed_file}:{az_path}' for transformed_file, az_path in zip(transformed_files, az_paths)]
+
+    # Add dependency if any
+    if dependency:
+        add_dependency = get_dep_str(dependency)
+    else:
+        add_dependency = ''
+
+    # Add debug partition if desired
+    if debug:
+        add_debug = '#SBATCH --partition=debug'
+        time_required_hrs = 1
+    else:
+        add_debug = ''
+
+    # Create job file
+    with open(job_file, 'w') as f:
+        # Write SBATCH inputs
+        f.write(
+f"""#!{bash_path}
+#SBATCH --job-name='{job_name}'
+#SBATCH --nodes=1
+#SBATCH --ntasks=1
+#SBATCH --time={time_required_hrs:.0f}:00:00
+#SBATCH -o {job_dir}{job_name}_out
+#SBATCH -e {job_dir}{job_name}_err
+#SBATCH --export=ALL
+#SBATCH --account=oedi
+{add_dependency}
+{add_debug}
+
+#------------------
+cd /scratch/$USER
+module load conda
+conda activate .env2
+srun python {py_file} {' '.join(transfer_args)}
+
+"""
+        )
+    return job_file
+
+def process_h5_dataset(files, comb_ref_file=None, time_limit_hrs=None, mem_factor=1.2, debug=False, skip_transformation=False, skip_transfer_to_azure=False):
+    # For each file in files, we generate a job script and submit to sbatch
+    # files should a be a list of absolute file paths to files in the /datasets directory.
+
+    # Make lists to track jobs
+    job_ids = []
+    ref_files = []
+    transformed_files = []
+    az_paths = []
+    # Loop over files
+    logging.info(f'Starting {len(files)} transformation jobs.')
+    for file in files:
+
+        # It was found that files as small as 415 GB timed out when only given 4 hours.
+        # In practice, there is a lot of variablity in the lengths of job runs. This may
+        # be due to network limitations when running many jobs concurrently. We're bumping
+        # up the time limit to 48 (the limit for the standard partition) for all files larger
+        # than 400 GB.
+        if not time_limit_hrs:
+            # Get file size to adjust time limit
+            file_size_GB = os.stat(file).st_size * 10 ** -9
+            if file_size_GB < 400:
+                time_limit_hrs = 4
+            else:
+                time_limit_hrs = 48
+
+        # Construct paths and create directory
+        file_name, job_name, job_dir, ref_file, az_path = construct_paths(file)
+        os.makedirs(job_dir, exist_ok=True)
+        ref_files.append(ref_file)
+        transformed_files.append(f'{job_dir}{file_name}')
+        az_paths.append(az_path)
+
+        if not skip_transformation:
+            # Generate job file to transform and generate references for a single h5 file
+            job_file = gen_hpc_single_job(file, job_dir, job_name, time_limit_hrs=time_limit_hrs, debug=debug)
+
+            # Run job file
+            job_id = run_job(job_file)
+            if job_id == 0:
+                cancel_jobs(job_ids)
+                raise Exception('Job submission failure')
+            else:
+                job_ids.append(job_id)
+    
+    # Generate job file to copy dataset to Azure
+    if not skip_transfer_to_azure:
+        logging.info('Starting job to copy dataset to Azure.')
+        copy_job_file = gen_hpc_to_azure_job(files, transformed_files, az_paths, dependency=job_ids, debug=debug)
+        copy_job_id = run_job(copy_job_file)
+        if copy_job_id == 0:
+            cancel_jobs(job_ids)
+            raise Exception('Copy job submission failure')
+        else:
+            job_ids.append(copy_job_id)
+    else:
+        copy_job_id = None
+
+    # Generate job file to combine references
+    # NOTE THAT DEBUG IS CURRENTLY SET TO TRUE TO EXPEDITE JOBS WHILE ACCOUNT IN STANDBY
+    logging.info('Starting job to combine references.')
+    if comb_ref_file:
+        ref_job_file = gen_hpc_combine_refs_job(comb_ref_file, ref_files, dependency=copy_job_id, debug=True)
+        ref_job_id = run_job(ref_job_file)
+        if ref_job_id == 0:
+            cancel_jobs(job_ids)
+            raise Exception('Gen combined ref job submission failure')
+        else:
+            job_ids.append(ref_job_id)
+    
+    logging.info('All jobs scheduled!')
+
+    comb_ref_file_name = comb_ref_file.split('/')[-1]
+    if 'hourly' in comb_ref_file_name:
+        job_id_file = comb_ref_file.replace(comb_ref_file_name, 'job_ids_hourly.txt')
+    elif '5min' in comb_ref_file_name:
+        job_id_file = comb_ref_file.replace(comb_ref_file_name, 'job_ids_5min.txt')
+    else:
+        job_id_file = comb_ref_file.replace(comb_ref_file_name, 'job_ids.txt')
+    with open(job_id_file, 'w') as f:
+        f.writelines([job_id + '\n' for job_id in job_ids ])
+
+    return job_ids
+
+def process_h5_redos(files, redos, comb_ref_file=None, time_limit_hrs=None, debug=False, skip_transfer_to_azure=False):
+    """
+    Process an h5 datset where some of the transformations failed.
+
+    Parameters
+    ----------
+    files : list
+        Paths to source h5 files for entire dataset (must be in /datasets on Eagle)
+    redos: list
+        Paths to source h5 files that failed (must be in /datasets on Eagle)
+    comb_ref_file: str
+        Path to where the combined kerchunk reference file will be
+        stored. If None, then no combined reference will be generated.
+    time_limit_hrs: int
+        Override the default time limit for the file transformation tasks.
+    debug: bool
+        Submit all jobs to the debug partition
+    skip_transfer_to_azure: bool
+        If true, then no files will be transferred to Azure.
+
+    Returns
+    -------
+    job_ids : list
+        List of all job_ids submitted to sbatch.
+    """
+
+    # For each file in redos, we generate a job script and submit to sbatch.
+
+    # Make lists to track jobs
+    job_ids = []
+    ref_files = []
+    transformed_files = []
+    az_paths = []
+    # Loop over files
+    logging.info(f'Starting {len(redos)} transformation jobs.')
+    for file in files:
+
+        # It was found that files as small as 415 GB timed out when only given 4 hours.
+        # In practice, there is a lot of variablity in the lengths of job runs. This may
+        # be due to network limitations when running many jobs concurrently. We're bumping
+        # up the time limit to 48 (the limit for the standard partition) for all files larger
+        # than 400 GB.
+        if not time_limit_hrs:
+            # Get file size to adjust time limit
+            file_size_GB = os.stat(file).st_size * 10 ** -9
+            if file_size_GB < 400:
+                time_limit_hrs = 4
+            else:
+                time_limit_hrs = 48
+
+        # Construct paths and create directory
+        file_name, job_name, job_dir, ref_file, az_path = construct_paths(file)
+        os.makedirs(job_dir, exist_ok=True)
+        ref_files.append(ref_file)
+        transformed_files.append(f'{job_dir}{file_name}')
+        az_paths.append(az_path)
+
+        if file in redos:
+            # Generate job file to transform and generate references for a single h5 file
+            job_file = gen_hpc_single_job(file, job_dir, job_name, time_limit_hrs=time_limit_hrs, debug=debug)
+
+            # Run job file
+            job_id = run_job(job_file)
+            if job_id == 0:
+                cancel_jobs(job_ids)
+                raise Exception('Job submission failure')
+            else:
+                job_ids.append(job_id)
+
+    # Generate job file to copy dataset to Azure
+    if not skip_transfer_to_azure:
+        logging.info('Starting job to copy dataset to Azure.')
+        copy_job_file = gen_hpc_to_azure_job(files, transformed_files, az_paths, dependency=job_ids, debug=debug)
+        copy_job_id = run_job(copy_job_file)
+        if copy_job_id == 0:
+            cancel_jobs(job_ids)
+            raise Exception('Copy job submission failure')
+        else:
+            job_ids.append(copy_job_id)
+    else:
+        copy_job_id = None
+
+    # Generate job file to combine references
+    if comb_ref_file:
+        logging.info('Starting job to combine references.')
+        ref_job_file = gen_hpc_combine_refs_job(comb_ref_file, ref_files, dependency=copy_job_id, debug=debug)
+        ref_job_id = run_job(ref_job_file)
+        if ref_job_id == 0:
+            cancel_jobs(job_ids)
+            raise Exception('Gen combined ref job submission failure')
+        else:
+            job_ids.append(ref_job_id)
+    
+    logging.info('All jobs scheduled!')
+    
+    comb_ref_file_name = comb_ref_file.split('/')[-1]
+    if 'hourly' in comb_ref_file_name:
+        job_id_file = comb_ref_file.replace(comb_ref_file_name, 'job_ids_hourly.txt')
+    elif '5min' in comb_ref_file_name:
+        job_id_file = comb_ref_file.replace(comb_ref_file_name, 'job_ids_5min.txt')
+    else:
+        job_id_file = comb_ref_file.replace(comb_ref_file_name, 'job_ids.txt')
+    with open(job_id_file, 'w') as f:
+        f.writelines([job_id + '\n' for job_id in job_ids ])
+
+    return job_ids
+
+def scan_err(dataset='WIND/Great_Lakes', resolution='5min'):
+    if resolution == 'hourly':
+        files = glob(f'/datasets/{dataset}/*.h5')
+    elif resolution == '5min':
+        files = glob(f'/datasets/{dataset}/*/*.h5')
+
+    if len(files) == 0:
+        raise Exception('No output files found. Dataset/resolution does not exists or has not been processed.')
+
+    timeouts = []
+    other_errors = []
+    timeout_redos = []
+    other_redos = []
+    for file in files:
+        err = file.replace('/datasets', '/scratch/mheine').replace('.h5', '_err')
+        with open(err) as f:
+            text = f.read()
+            if 'TIME LIMIT' in text:
+                timeouts.append(err)
+                timeout_redos.append(file)
+            elif len(text) > 0:
+                other_errors.append(err)
+                other_redos.append(file)
+    logging.info(f'Timeouts: {len(timeouts)}')
+    logging.info(f'Other errors: {len(other_errors)}')
+    logging.info(f'Total Files: {len(files)}')
+
+    sizes = []
+    for redo in timeout_redos:
+        sizes.append(os.stat(redo).st_size * 10 ** -9)
+    if len(sizes) > 0:
+        logging.info(f'The smallest file that timed out in {dataset} was {min(sizes):.0f} GB.')
+
+    return files, timeout_redos, other_redos
diff --git a/azure/pipeline/run_aws_pipeline.ipynb b/azure/pipeline/run_aws_pipeline.ipynb
new file mode 100644
index 0000000..ce9cbf9
--- /dev/null
+++ b/azure/pipeline/run_aws_pipeline.ipynb
@@ -0,0 +1,117 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from aws_tools import *\n",
+    "\n",
+    "# Update state machine and job def\n",
+    "\n",
+    "create_aws_resources()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Get the s3 addresses for the dataset\n",
+    "\n",
+    "prefix = 'south_atlantic'\n",
+    "resolution = '5min'\n",
+    "staging_bucket = 'kerchunk-staging'\n",
+    "run_name = 'south_atlantic-5min-2'\n",
+    "\n",
+    "files = get_dataset('nrel-pds-wtk', prefix=prefix, resolution=resolution)\n",
+    "files"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Generate the state machine input for this dataset\n",
+    "\n",
+    "s3_comb_ref_file = f'wtk/{prefix}/kerchunk_{resolution}_ref_s3.json'\n",
+    "az_comb_ref_file = f'wtk/{prefix}/kerchunk_{resolution}_ref.json'\n",
+    "create_state_machine_input(files, staging_bucket, s3_comb_ref_file, az_comb_ref_file, run_name=run_name)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Run the state machine\n",
+    "\n",
+    "run_state_machine('kerchunk-h5', run_name=run_name)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "If the state machine fully executed without error, then there should now be a set of transformed h5 files, s3 refs and az refs, as well as a combined s3 ref file in the staging bucket. Use the test_staging.ipynb notebook to verify that the transformation was successful by loading the combined s3 ref file.\n",
+    "\n",
+    "Once you are satisfied, continue to the next cell to copy the data to Azure and generate the combined az ref file.\n",
+    "\n",
+    "Make sure to update the .env file with AWS credentials!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "copy_s3_dataset_to_azure(files, staging_bucket, dry_run=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "comb_ref_file = f'wtk/{prefix}/kerchunk_{resolution}_ref.json'\n",
+    "create_combined_ref(files, staging_bucket, comb_ref_file=comb_ref_file, remote_protocol='abfs')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Once these tasks have finished, you can open the wtk example notebook and verify that the dataset can now be loaded from Azure."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "oedi-azure-dev",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/azure/pipeline/transform_h5_container/Dockerfile b/azure/pipeline/transform_h5_container/Dockerfile
new file mode 100644
index 0000000..b536919
--- /dev/null
+++ b/azure/pipeline/transform_h5_container/Dockerfile
@@ -0,0 +1,25 @@
+FROM continuumio/miniconda3
+
+# make docker use bash instead of sh
+SHELL ["/bin/bash", "--login", "-c"]
+
+# create environment
+COPY ./pipeline/transform_h5_container/env.yml .
+RUN conda env create -f env.yml
+
+# install azcopy
+COPY ./pipeline/transform_h5_container/install_azcopy.sh .
+RUN sh install_azcopy.sh
+
+# copy all necessary files
+COPY ./.sas .
+COPY ./pipeline/transform_h5_container/* ./
+COPY ./pipeline/etl_tools.py .
+COPY ./pipeline/aws_tools.py .
+
+# make entrypoint script executable
+RUN chmod u+x entrypoint.sh
+
+# activate environment and run container
+ENTRYPOINT ["./entrypoint.sh"]
+CMD ["python", "transform.py"]
diff --git a/azure/pipeline/transform_h5_container/entrypoint.sh b/azure/pipeline/transform_h5_container/entrypoint.sh
new file mode 100644
index 0000000..67ef690
--- /dev/null
+++ b/azure/pipeline/transform_h5_container/entrypoint.sh
@@ -0,0 +1,6 @@
+#!/bin/bash --login
+set -e
+
+# activate conda environment and let the following process take over
+conda activate oedi-azure-container
+exec "$@"
diff --git a/azure/pipeline/transform_h5_container/env.yml b/azure/pipeline/transform_h5_container/env.yml
new file mode 100644
index 0000000..5dbc52f
--- /dev/null
+++ b/azure/pipeline/transform_h5_container/env.yml
@@ -0,0 +1,16 @@
+name: oedi-azure-container
+channels:
+  - conda-forge
+  - defaults
+  - hcc
+dependencies:
+  - python=3.10.12
+  - h5py=3.9.0
+  - boto3
+  - cftime
+  - kerchunk
+  - planetary-computer
+  - s3fs=2023.6.0
+  - pandas
+  - adlfs
+  - xarray
diff --git a/azure/pipeline/transform_h5_container/gen_ref.py b/azure/pipeline/transform_h5_container/gen_ref.py
new file mode 100644
index 0000000..8ea40c4
--- /dev/null
+++ b/azure/pipeline/transform_h5_container/gen_ref.py
@@ -0,0 +1,54 @@
+import ujson
+from etl_tools import gen_ref_comb
+import xarray as xr
+import os
+import s3fs
+import h5py
+
+# TODO: Remove all az stuff. The az combined ref file gets created at a later step, after data moves to Azure
+
+# Azure container name
+CONTAINER_NAME = 'oedi'
+
+# Access S3
+s3 = s3fs.S3FileSystem()
+
+# Get input from container environment
+s3_comb_ref_file = os.getenv('s3_comb_ref_file')
+staging_bucket = os.getenv('staging_bucket')
+run_name = os.getenv('run_name')
+
+# Get s3 file list from input file on s3 (This list was too long to be an env variable.)
+with s3.open(f'{staging_bucket}/{run_name}.json') as f:
+    input_data = ujson.load(f)
+s3_source_files = input_data['s3_files']
+
+# Get paths to references and list of identical dims
+test_file = s3_source_files[0]
+if 'nrel-pds-wtk' in test_file:
+    s3_ref_paths = [f"{staging_bucket}/{f.replace('nrel-pds-wtk', 'wtk').replace('.h5', '_s3.json')}" for f in s3_source_files]
+    az_ref_paths = [f"{staging_bucket}/{f.replace('nrel-pds-wtk', 'wtk').replace('.h5', '.json')}" for f in s3_source_files]
+    test_file = test_file.replace('nrel-pds-wtk', 'wtk')
+    with s3.open(f'{staging_bucket}/{test_file}') as f:
+        h5 = h5py.File(f)
+        identical_dims = list(h5.attrs['identical_dims'])
+elif 'sup3rcc' in test_file:
+    identical_dims = ['country', 'county', 'eez', 'elevation', 'latitude', 'longitude', 'offshore', 'state', 'timezone']
+    raise NotImplementedError()
+else:
+    NotImplementedError()
+
+# Open all reference files
+s3_refs = []
+az_refs = []
+for s3_rp, az_rp in zip(s3_ref_paths, az_ref_paths):
+    with s3.open(s3_rp, 'rb') as f:
+        s3_refs.append(ujson.load(f))
+    with s3.open(az_rp, 'rb') as f:
+        az_refs.append(ujson.load(f))
+
+# Generate the combined reference files
+if s3_comb_ref_file:
+    local_s3_ref = 's3_ref.json'
+    gen_ref_comb(s3_refs, ref_file=local_s3_ref, identical_dims=identical_dims, remote_protocol='s3')
+    s3.put_file(local_s3_ref, f's3://{staging_bucket}/{s3_comb_ref_file}')
diff --git a/azure/pipeline/transform_h5_container/install_azcopy.sh b/azure/pipeline/transform_h5_container/install_azcopy.sh
new file mode 100644
index 0000000..56eb716
--- /dev/null
+++ b/azure/pipeline/transform_h5_container/install_azcopy.sh
@@ -0,0 +1,17 @@
+
+#!/bin/bash
+
+# Install AzCopy on Linux
+
+# Download and extract
+wget https://aka.ms/downloadazcopy-v10-linux
+tar -xvf downloadazcopy-v10-linux
+
+# Move AzCopy
+rm -f /usr/bin/azcopy
+cp ./azcopy_linux_amd64_*/azcopy /usr/bin/
+chmod 755 /usr/bin/azcopy
+
+# Clean the kitchen
+rm -f downloadazcopy-v10-linux
+rm -rf ./azcopy_linux_amd64_*/
diff --git a/azure/pipeline/transform_h5_container/transfer.py b/azure/pipeline/transform_h5_container/transfer.py
new file mode 100644
index 0000000..c43ae6f
--- /dev/null
+++ b/azure/pipeline/transform_h5_container/transfer.py
@@ -0,0 +1,5 @@
+import subprocess
+import sys
+
+args = sys.argv
+subprocess.run(['azcopy', '--version'])
diff --git a/azure/pipeline/transform_h5_container/transform.py b/azure/pipeline/transform_h5_container/transform.py
new file mode 100644
index 0000000..515f15f
--- /dev/null
+++ b/azure/pipeline/transform_h5_container/transform.py
@@ -0,0 +1,60 @@
+import os
+from etl_tools import transform_wtk_h5_file, transform_sup3rcc_h5_file, gen_ref
+from time import time
+import boto3
+import logging
+
+# Download h5 to local and then build out the rechunked copy
+
+# Start timer
+start_time = time()
+
+# Get input from container environment overrides
+container_name = 'oedi'
+staging_bucket = os.getenv('staging_bucket')
+source_path = os.getenv('s3_file')
+file_name = source_path.split('/')[-1]
+
+# Download file to local
+s3 = boto3.client('s3')
+Bucket = source_path.split('/')[0]
+Key = source_path.replace(f'{Bucket}/', '')
+local_path = f'/data/{source_path}'
+os.makedirs(local_path.replace(file_name, ''))
+s3.download_file(Bucket=Bucket, Key=Key, Filename=local_path)
+
+# Transform dataset
+logging.info(f'{(time() - start_time) / 60:.2f} min: {file_name} - Starting transformation.')
+if 'nrel-pds-wtk' in source_path:
+    #DATASET_NAME = 'wtk'
+    az_path = source_path.replace('nrel-pds-wtk/', 'wtk/')
+    scratch_path = f'/data/{az_path}'
+    os.makedirs(scratch_path.replace(file_name, ''), exist_ok=True) # Need to create the dir if it doesn't exist
+    transform_wtk_h5_file(local_path, scratch_path, in_file_on_s3=False)
+elif 'sup3rcc' in source_path:
+    DATASET_NAME = 'sup3rcc'
+    az_path = source_path.replace('/nrel-pds-sup3rcc/', 'sup3rcc/')
+    scratch_path = f'/data/{az_path}'
+    os.makedirs(scratch_path.replace(file_name, ''), exist_ok=True) # Need to create the dir if it doesn't exist
+    transform_sup3rcc_h5_file(source_path, scratch_path)
+else:
+    raise NotImplementedError(f'Dataset for {source_path} not implemented yet.')
+logging.info(f'{(time() - start_time) / 60:.2f} min: {file_name} - Transformed.')
+
+ref_file = scratch_path.replace('.h5', '.json')
+ref_file_s3 = scratch_path.replace('.h5', '_s3.json')
+gen_ref(scratch_path, f'abfs://{container_name}/{az_path}', ref_file=ref_file)
+logging.info(f'{(time() - start_time) / 60:.2f} min: {file_name} - Azure reference generated.')
+
+s3_staging_path = f's3://{staging_bucket}/{az_path}'
+gen_ref(scratch_path, s3_staging_path, ref_file=ref_file_s3)
+logging.info(f'{(time() - start_time) / 60:.2f} min: {file_name} - S3 reference generated.')
+
+# Upload to staging
+s3 = boto3.client('s3')
+s3.upload_file(ref_file, staging_bucket, ref_file.replace('/data/', ''))
+logging.info(f'{(time() - start_time) / 60:.2f} min: {file_name} - Azure reference uploaded to staging.')
+s3.upload_file(ref_file_s3, staging_bucket, ref_file_s3.replace('/data/', ''))
+logging.info(f'{(time() - start_time) / 60:.2f} min: {file_name} - S3 reference uploaded to staging.')
+s3.upload_file(scratch_path, staging_bucket, az_path)
+logging.info(f'{(time() - start_time) / 60:.2f} min: {file_name} - h5 file uploaded to staging.')
diff --git a/azure/pipeline/update_trans_container.sh b/azure/pipeline/update_trans_container.sh
new file mode 100644
index 0000000..9f1eeea
--- /dev/null
+++ b/azure/pipeline/update_trans_container.sh
@@ -0,0 +1,4 @@
+docker build -t transform_h5_container -f ./pipeline/transform_h5_container/Dockerfile .
+docker tag transform_h5_container:latest 351672045885.dkr.ecr.us-west-2.amazonaws.com/transform_h5_container:latest
+aws ecr get-login-password --region us-west-2 | docker login --username AWS --password-stdin 351672045885.dkr.ecr.us-west-2.amazonaws.com
+docker push 351672045885.dkr.ecr.us-west-2.amazonaws.com/transform_h5_container:latest