In [1]:
import boto3
import os
from pathlib import Path
import numpy as np
import pandas as pd
import json

from utils.constants import *

In [2]:
client = boto3.client('s3')
bucket = "ml-for-bem"
experiment_name = "idf/batch/test1000"

In [3]:
local_dir = Path("D:\DATA\ShadingAutozoner")
dir_contents = os.listdir(local_dir)

In [4]:
idf_names = [x for x in dir_contents if "_Autozoner.idf" in x]
bldg_idxs = [x.split("_")[1] for x in idf_names]
geom_names = [x for x in dir_contents if "_geometry.json" in x]
templ_names = [x for x in dir_contents if "_templates.json" in x]
print(len(idf_names))
print(len(geom_names))
print(len(templ_names))

1000
1000
1000


In [7]:
def get_params(batchname, bldg_idx_str, local_dir):
    template_path = local_dir / f"{batchname}_{bldg_idx_str}_templates.json"
    geom_path = local_dir / f"{batchname}_{bldg_idx_str}_geometry.json"
    with open(template_path, "r") as f:
        config = json.load(f)
    with open(geom_path, "r") as f:
        geom_dict = json.load(f)

    row = list(geom_dict.values())[0]
    n_floors = row["n_floors"]
    idx = list(geom_dict.keys())[0]
    
    features = pd.DataFrame.from_dict(row["perims"]).T.sort_index()
    features = pd.merge(features.reset_index().rename(columns={"index":"name"}), pd.DataFrame.from_dict(row["cores"]).T, how="left", on="floor")
    features["building_id"] = idx
    features["idf_path"] = f"{batchname}_{bldg_idx_str}_Autozoner.idf"

    features[list(config.keys())] = list(config.values())

    # Convert tempalte mass values to one-hot
    features["FacadeMass"] = get_tmass_idx(config["FacadeMass"])
    features["RoofMass"] = get_tmass_idx(config["RoofMass"])

    shading_df = pd.DataFrame.from_dict(row["shading"]).T
    shading_df.columns = [f"shading_{i}" for i in range(12)]
    features["vert"] = [x[-3:] for x in features["name"]]
    features = features.merge(shading_df, left_on="vert", right_index=True)
    features.drop(columns=["vert"], inplace=True)
    
    # Set ground and roof for bottom and top floors
    features.loc[:, "ground_2_footprint"] = 0
    features.loc[:, "roof_2_footprint"] = 0

    features.loc[["perim_floor_0" in x for x in features["name"]], "ground_2_footprint"] = 1
    features.loc[[f"perim_floor_{n_floors-1}" in x for x in features["name"]], "roof_2_footprint"] = 1
    
    # # Adjust weighting - core area, perim area, edge length
    # # TODO
    # edge_weight = features["facade_length"] / features["width"]
    # floor_weight = 1 # 1/n_floors
    # features["weight"] = edge_weight * floor_weight
    # core_area_weight = features["core_area"] / 4 / ((row["core_depth"]+row["perim_depth"])*features["width"])
    # perim_area_weight = features["perim_area"] / ((row["core_depth"]+row["perim_depth"])*features["width"])
    # features["core_weight"] = row["core_depth"] / (row["core_depth"]+row["perim_depth"]) * core_area_weight
    # features["perim_weight"] = row["perim_depth"] / (row["core_depth"]+row["perim_depth"]) * perim_area_weight
    # # print(features["core_weight"][:4])
    # # print(features["perim_weight"][:4])
    # # print(features["weight"][:4])
    # # print((row["footprint_area"]))
    # # features.index = range(12)
    return features

In [10]:
batchname = "test"
for i, idf in enumerate(idf_names):
    client.upload_file(local_dir / idf, bucket, experiment_name + "/idf/" + idf)
    client.upload_file(local_dir / geom_names[i], bucket, experiment_name + "/geometry/" + geom_names[i])
    client.upload_file(local_dir / templ_names[i], bucket, experiment_name + "/template/" + templ_names[i])
    df = get_params(batchname, bldg_idxs[i], local_dir)
    param_name = f"{batchname}_{bldg_idxs[i]}.h5"
    df.to_hdf(local_dir / param_name, key="df")
    client.upload_file(local_dir / param_name, bucket, experiment_name + "/parameters/" + param_name)

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed-integer,key->block2_values] [items->Index(['name', 'orientation', 'facade_normal', 'distance_to_bldg_cent',
       'facade_length', 'floor', 'perim_area', 'building_id', 'idf_path'],
      dtype='object')]

  df.to_hdf(local_dir / param_name, key="df")
your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed-integer,key->block2_values] [items->Index(['name', 'orientation', 'facade_normal', 'distance_to_bldg_cent',
       'facade_length', 'floor', 'perim_area', 'building_id', 'idf_path'],
      dtype='object')]

  df.to_hdf(local_dir / param_name, key="df")
your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed-integer,key->block2_values] [items->Index(['name', 'orientation', 'facade_normal', 'distance_to_bldg_cent',
       '