In [2]:
from types import SimpleNamespace
import boto3
import re,os
import pandas as pd
import h5py
import json

#  open processed_models json file and load into dictionary.
with open("./output/Processed Models.json", "r") as infile:
    processed_models_dict = json.load(infile)

with open(r"Z:\LWI\LWI S3 Key.txt") as secret_file:
    secret = secret_file.readlines()
secret_ID = secret[1].strip('\n')
secret_key = secret[3].strip('\n')

# %%
client = boto3.client('s3',
                   aws_access_key_id=secret_ID,
                   aws_secret_access_key = secret_key)

session = boto3.Session(aws_access_key_id=secret_ID,
                        aws_secret_access_key = secret_key)

s3 = session.resource('s3')

# %%
response = client.list_buckets()
bucket_names = [bucket['Name'] for bucket in response['Buckets']]
# Limiting to buckets I have access to.
bucket_names = [
 'lwi-region4',
 'lwi-region5',
 'lwi-region6',
 'lwi-region7',
 ]



In [3]:
processed_models_dict

{'completed_models_list': ['lwi-region4\\deliverables/20230606_08080206_LC_RAS2DModelSetupP2',
  'lwi-region4\\deliverables/20230321_12010005_SL_RASModelSetup',
  'lwi-region4\\deliverables/20230606_08080205_WF_RAS1D2DModelSetupP2/04_BoundaryConditions/Stage/Region4Stage',
  'lwi-region4\\deliverables/20221222_08080206_LC_RAS2DModelSetupP1/LC_LowerCalcasieu',
  'lwi-region4\\deliverables/20221222_08080205_WF_RAS2DModelSetupP1/WF_WestForkCalcasieu',
  'lwi-region4\\deliverables/20230504_08080203_UC_RAS1D2DModelSetupP2/UC_UpperCalcasieu-2D',
  'lwi-region4\\deliverables/20230606_08080205_WF_RAS1D2DModelSetupP2/WF_WestForkCalcasieu-2D',
  'lwi-region4\\deliverables/20221222_08080203_UC_RAS2DModelSetupP1/UC_UpperCalcasieu',
  'lwi-region5\\deliverables/2023_0209_ToDOTD_Hydraulic_Model_Setup_MermentauHeadwatersHUC8/Model/MH_RAS_2D_09FFEB2023/MH Bayou Nezpique',
  'lwi-region5\\deliverables/2023_0209_ToDOTD_Hydraulic_Model_Setup_MermentauHeadwatersHUC8/Model/MH_RAS_2D_09FFEB2023/MH Bayou Des

In [61]:
# create a dictionary of model data matching tif and hdf (ignoring g**.hdf, p**.hdf, and u**.hdf) files by bucket name
model_data = {}
tif_names = ['tif', 'tiff', 'geotiff', 'geotif']
exclusion_terms = ["hms", "backup", "topobathy_datasets", "bridge_data", "survey", "postprocessing", "impervious", "(max)", "landuse", "land_use", 
    "landcover", "land_cover", "soil_", "soiltype", "soilgroup", "soiltexture", 'DA.tif', 'demsd8.tif', 'elevation.tif', 
    'flowaccum.tif', 'flowdir.tif', 'reconditioned.tif', 'sinkfill.tif', 'sinklocs.tif', 'str_bin.tif', 'streams.tif', "snapshot.hdf", 'tmp.hdf']
    
for bucket in bucket_names:
    print('\nSearching Bucket:', bucket)
    my_bucket = s3.Bucket(bucket)
    model_data[bucket] = {
        'tif': [],
        'non_u_p_g_hdfs': [],
        'g': [],
    }
    for obj in my_bucket.objects.all():
        
        # if exclusion term in obj.key, skip
        if any(term.lower() in obj.key.lower() for term in exclusion_terms):
            continue
        
        # if ends with a tif_ext add to model_data[bucket]['tif']
        for tif_ext in tif_names:
            if obj.key.lower().endswith(tif_ext):
                # Remove any hms directories
                if 'hms' not in obj.key.lower():
                    model_data[bucket]['tif'].append(obj.key)
        
        # if ends with hdf, then if not p**.hdf, u**.hdf, or g**.hdf add to model_data[bucket]['non_u_p_g_hdfs']
        if obj.key.endswith('hdf'):
                if not (re.search('p...hdf', obj.key)) and not (re.search('u...hdf', obj.key)) and not (re.search('g...hdf', obj.key)):   
                    model_data[bucket]['non_u_p_g_hdfs'].append(obj.key)
        
        # if ends with g** add to model_data[bucket]['g']
        # iterate through possible g file extensions [g00, g01, g02, ... g99]
        for i in range(0,100):
            if i<10:
                g_ext = f'g0{i}'
            else:
                g_ext = f'g{i}'

            if obj.key.endswith(g_ext):
                model_data[bucket]['g'].append(obj.key)

model_data_df = pd.DataFrame(model_data)
model_data_df


Searching Bucket: lwi-region4

Searching Bucket: lwi-region5

Searching Bucket: lwi-region6

Searching Bucket: lwi-region7


Unnamed: 0,lwi-region4,lwi-region5,lwi-region6,lwi-region7
tif,[deliverables/20221222_08080203_UC_RAS2DModelS...,[deliverables/2023_0127_ToDOTD_Hydraulic_Model...,[deliverables/20230106_TO3_Hydrology_Model_Sta...,[deliverables/Amite_RAS_Setupdelivery/RAS/AM-A...
non_u_p_g_hdfs,[deliverables/20221222_08080203_UC_RAS2DModelS...,[deliverables/2023_0127_ToDOTD_Hydraulic_Model...,[deliverables/20230106_TO3_Hydrology_Model_Sta...,[deliverables/Amite_RAS_Setupdelivery/RAS/AM-A...
g,[deliverables/20221222_08080203_UC_RAS2DModelS...,[deliverables/2023_0127_ToDOTD_Hydraulic_Model...,[deliverables/20230106_TO3_Hydrology_Model_Sta...,[deliverables/Amite_RAS_Setupdelivery/RAS/AM-A...


In [62]:
model_data_dict = model_data_df.to_dict()
with open('model_data_dict.json', 'w') as f:
    json.dump(model_data_dict, f)

In [77]:
model_data_df.keys()
for region in model_data_df.keys():
    for param in model_data_df[region].keys():
        print(f"{region}: {param}'s = {len(model_data_df[region][param])}")


lwi-region4: tif's = 50
lwi-region4: non_u_p_g_hdfs's = 30
lwi-region4: g's = 12
lwi-region5: tif's = 132
lwi-region5: non_u_p_g_hdfs's = 16
lwi-region5: g's = 32
lwi-region6: tif's = 23
lwi-region6: non_u_p_g_hdfs's = 20
lwi-region6: g's = 8
lwi-region7: tif's = 173
lwi-region7: non_u_p_g_hdfs's = 68
lwi-region7: g's = 34


In [78]:
model_data_dict.values()

dict_values([{'tif': ['deliverables/20221222_08080203_UC_RAS2DModelSetupP1/UC_UpperCalcasieu/03_Terrain/SupportingFiles/UC_Terrain.tif', 'deliverables/20221222_08080203_UC_RAS2DModelSetupP1/UC_UpperCalcasieu/03_Terrain/SupportingFiles/nmc/UC_Terrain.tif', 'deliverables/20221222_08080203_UC_RAS2DModelSetupP1/UC_UpperCalcasieu/03_Terrain/UC_Terrain.UC_Terrain.tif', 'deliverables/20221222_08080203_UC_RAS2DModelSetupP1/UC_UpperCalcasieu/04_MapLayers/NLCD_Manning_Roughness.tif', 'deliverables/20221222_08080203_UC_RAS2DModelSetupP1/UC_UpperCalcasieu/04_MapLayers/gSSURGO_InfiltrationDC.tif', 'deliverables/20221222_08080205_WF_RAS2DModelSetupP1/WF_WestForkCalcasieu/03_Terrain/SupportingFiles/WF_Terrain.tif', 'deliverables/20221222_08080205_WF_RAS2DModelSetupP1/WF_WestForkCalcasieu/03_Terrain/WF_Terrain.WF_Terrain.tif', 'deliverables/20221222_08080205_WF_RAS2DModelSetupP1/WF_WestForkCalcasieu/04_MapLayers/NLCD_Manning_Roughness.tif', 'deliverables/20221222_08080205_WF_RAS2DModelSetupP1/WF_WestF

In [105]:
regions = [v for v in model_data_dict.keys()]
for region in regions:
    params = [v for v in model_data_dict[region].keys()]

totals = {}
for region in regions:
    for param in params:
        if not param in totals:
            totals[param] = 0
        totals[param] += len(model_data_dict[region][param])
totals

{'tif': 378, 'non_u_p_g_hdfs': 134, 'g': 86}

In [3]:
client.download_file(
                Bucket="lwi-region5",
                Key="deliverables/2023_0721_ToDOTD_Hydraulic_Model_Calibration_AtchafalayaHUC8/Model/AA_AtchRi_21JUL2023/AA_Atchafalaya_Ri.g79.hdf",
                Filename="AA_Atchafalaya_Ri.g79.hdf"
            )