# Energy Usage Prediction - Model Building

In [1]:
import pandas as pd
import numpy as np

## Load dataset

In [2]:
train_path = 'data/train.csv'
test_path = 'data/test.csv'

train_df = pd.read_csv(train_path)
test_df = pd.read_csv(test_path)

In [3]:
# replace 0s in year_built with np.nan
train_df.year_built = train_df.year_built.replace(0, np.nan)
test_df.year_built = test_df.year_built.replace(0, np.nan)

In [4]:
# check for duplicates
print(train_df.duplicated().sum())
print(test_df.duplicated().sum())

0
0


## Create X and y

In [5]:
# features to remove: the features which are more than 50% nan in EDA + id and target
remove = [feature for feature in train_df.columns if 'wind' in feature]
remove += ['days_with_fog', 'site_eui', 'id']
remove

['direction_max_wind_speed',
 'direction_peak_wind_speed',
 'max_wind_speed',
 'days_with_fog',
 'site_eui',
 'id']

In [6]:
X = train_df.drop(remove, axis=1)
y = train_df.site_eui

In [7]:
from sklearn import model_selection

X_train, X_val, y_train, y_val = model_selection.train_test_split(
    X, y, test_size=.1, random_state=25
)

In [8]:
X_test = test_df.drop(['id'], axis=1)

## Data preprocessing

In [9]:
from sklearn import pipeline
from sklearn import impute
from sklearn import preprocessing
from sklearn import compose

In [44]:
num_pipe = pipeline.Pipeline([
    ('median_imputer', impute.SimpleImputer(strategy="median")),
    ('standard_scaler', preprocessing.StandardScaler())
    ])

In [29]:
cat_pipe = pipeline.Pipeline([
    ('ohe', preprocessing.OneHotEncoder(min_frequency=.05, handle_unknown='ignore'))
])

In [45]:
# combine num_pipe and cat_pipe
preproc = compose.ColumnTransformer([
    ('num_pipe', num_pipe, X_train.select_dtypes(exclude=['object']).columns),
    ('cat_pipe', cat_pipe, X_train.select_dtypes(include=['object']).columns)
])

In [46]:
preproc

In [32]:
X_train_preproc = pd.DataFrame(preproc.fit_transform(X_train), columns=preproc.get_feature_names_out())
X_val_preproc = pd.DataFrame(preproc.transform(X_val), columns=preproc.get_feature_names_out())

2023/06/26 15:54:50 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '077a13ecc00d4da6b61351353df39339', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow
2023/06/26 15:54:56 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID 'c84aaf8513a84b69867be75256bdc555', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow


In [33]:
X_train_preproc.head(5)

Unnamed: 0,num_pipe__Year_Factor,num_pipe__floor_area,num_pipe__year_built,num_pipe__energy_star_rating,num_pipe__ELEVATION,num_pipe__january_min_temp,num_pipe__january_avg_temp,num_pipe__january_max_temp,num_pipe__february_min_temp,num_pipe__february_avg_temp,...,cat_pipe__State_Factor_State_2,cat_pipe__State_Factor_State_4,cat_pipe__State_Factor_State_6,cat_pipe__State_Factor_infrequent_sklearn,cat_pipe__building_class_Commercial,cat_pipe__building_class_Residential,cat_pipe__facility_type_Education_Other_classroom,cat_pipe__facility_type_Multifamily_Uncategorized,cat_pipe__facility_type_Office_Uncategorized,cat_pipe__facility_type_infrequent_sklearn
0,0.8,0.008808,0.983133,0.32,0.012792,0.691176,0.641555,0.428571,0.737705,0.676309,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
1,0.4,0.01324,0.785542,0.96,0.008027,0.602941,0.629895,0.469388,0.622951,0.668044,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
2,1.0,0.174496,0.93494,0.92,0.017039,0.691176,0.605082,0.285714,0.770492,0.663793,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
3,1.0,0.010073,0.771084,0.2,0.016262,0.441176,0.438266,0.346939,0.196721,0.471621,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
4,0.2,0.011642,0.739759,0.37,0.004557,0.676471,0.707025,0.55102,0.754098,0.740596,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0


In [34]:
X_test_preproc = pd.DataFrame(preproc.transform(X_test), columns=preproc.get_feature_names_out())



## Model building

In [35]:
import tensorflow as tf

In [47]:
input_dim = X_train_preproc.shape[1]
print(f"number of features: {input_dim}")

number of features: 67


In [37]:
# rmse loss function
def rmse_loss(y_true, y_pred):
    return tf.math.sqrt(tf.math.reduce_mean(((y_pred - y_true) ** 2), axis=1))

In [38]:
def instantiate_model(input_dim):
    '''instantiates model'''
    
    # simple model 
    # dropout added
    inputs = tf.keras.Input(shape=(input_dim,))

    x = tf.keras.layers.Dense(256, activation='relu')(inputs)
    x = tf.keras.layers.Dense(128, activation='relu')(x)
    x = tf.keras.layers.Dense(64, activation='relu')(x)
    x = tf.keras.layers.Dense(32, activation='relu')(x)

    outputs = tf.keras.layers.Dense(1, activation='linear')(x)

    model = tf.keras.Model(inputs=inputs, outputs=outputs)

    print(model.summary())
    
    return model

In [39]:
def compile_model(model):
    '''compiles given model'''
    
    opt_adam = tf.keras.optimizers.Adam(
        learning_rate = .001, 
    )
    
    model.compile(
        loss=rmse_loss,
        optimizer=opt_adam,
        metrics=['mse']
    )
    
    return model

In [40]:
def model_main(input_dim):
    '''instantiates and compiles model'''
    
    model = instantiate_model(input_dim)
    model = compile_model(model)
    
    return model

## Model training (with tensorboard)

In [52]:
# # Load the TensorBoard notebook extension.
# %load_ext tensorboard

In [53]:
# from datetime import datetime
# from packaging import version

# print("TensorFlow version: ", tf.__version__)
# assert version.parse(tf.__version__).release[0] >= 2, \
#     "This notebook requires TensorFlow 2.0 or above."

In [54]:
# # Clear any logs from previous runs
# !rm -rf ./logs/

In [40]:
# logdir = 'logs/' + datetime.now().strftime("%Y%m%d-%H%M%S")
# tb_cb = tf.keras.callbacks.TensorBoard(log_dir=logdir)

In [41]:
# # instantiates and compiles model
# model = model_main(input_dim)

# # callbacks
# es = tf.keras.callbacks.EarlyStopping(
#     monitor='val_loss', 
#     patience=10,
#     restore_best_weights=True
# )

# logdir = 'logs/' + datetime.now().strftime("%Y%m%d-%H%M%S")
# tb_cb = tf.keras.callbacks.TensorBoard(log_dir=logdir)
# file_writer = tf.summary.create_file_writer(logdir + "/metrics")
# file_writer.set_as_default()

# # training
# history = model.fit(
#     X_train_preproc, y_train,
#     verbose=1,
#     callbacks=[es, tb_cb],
#     validation_data=(X_val_preproc, y_val),
#     epochs=1000,
#     batch_size=32,
# )

Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 71)]              0         
                                                                 
 dense_10 (Dense)            (None, 256)               18432     
                                                                 
 dense_11 (Dense)            (None, 128)               32896     
                                                                 
 dense_12 (Dense)            (None, 64)                8256      
                                                                 
 dense_13 (Dense)            (None, 32)                2080      
                                                                 
 dense_14 (Dense)            (None, 1)                 33        
                                                                 
Total params: 61,697
Trainable params: 61,697
Non-trainable

In [37]:
# %tensorboard --logdir logs

Reusing TensorBoard on port 6006 (pid 11389), started 0:00:43 ago. (Use '!kill 11389' to kill it.)

## Model training (with mlflow)

In [41]:
import mlflow

In [82]:
mlflow.autolog()

# instantiates and compiles model
model = model_main(input_dim)

# callbacks
es = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', 
    patience=5,
    restore_best_weights=True
)

# training
history = model.fit(
    X_train_preproc, y_train,
    verbose=1,
    callbacks=[es],
    validation_data=(X_val_preproc, y_val),
    epochs=1000,
    batch_size=64,
)

2023/06/26 16:54:35 INFO mlflow.tracking.fluent: Autologging successfully enabled for tensorflow.
2023/06/26 16:54:36 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.


Model: "model_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_381 (InputLayer)      [(None, 67)]              0         
                                                                 
 dense_1521 (Dense)          (None, 256)               17408     
                                                                 
 dense_1522 (Dense)          (None, 128)               32896     
                                                                 
 dense_1523 (Dense)          (None, 64)                8256      
                                                                 
 dense_1524 (Dense)          (None, 32)                2080      
                                                                 
 dense_1525 (Dense)          (None, 1)                 33        
                                                                 
Total params: 60,673
Trainable params: 60,673
Non-trainable

2023/06/26 16:54:36 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '1804541f1dca4c4985e3c9ac8d5260fb', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current tensorflow workflow


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5




INFO:tensorflow:Assets written to: /tmp/tmpfqbjcfcp/model/data/model/assets




## Experimenting with model architecture

In [91]:
import itertools

In [100]:
def build_models(num_layers, input_shape, node_start = 5, node_end = 8):
    node_options = [2**x for x in range(node_start, node_end+1)]
    possibilities = [node_options] * num_layers
    node_permutations = list(itertools.product(*possibilities))
    
    models = []
    for permutation in node_permutations:
        # number of nodes has to get smaller or remain the same in the later layers
        check = 0
        for i in len(permutation)-1:
            if permutation[i+1] > permutation[i]:
                check += 1
        if check > 0:
            continue
            
        # if ther permutation satisfies the above condition
        else:
            model = tf.keras.Sequential()
            model.add(tf.keras.layers.InputLayer(input_shape=input_shape))
            model_name = ''
            
            for node in permutation:
                model.add(tf.keras.layers.Dense(node, activation='relu'))
                model_name += f'dense{node}_'
            
            model.add(tf.keras.layers.Dense(1, activation='linear'))
            model._name = model_name[:-1]
            models.append(model)
    
    return models

In [101]:
def train_models(models, X_train, y_train, X_val, y_val, epochs=100, patience=2, verbose=1):
    
    def compile_model(model):
        '''compiles given model'''
        opt_adam = tf.keras.optimizers.Adam(
            learning_rate = .001, 
        )
        model.compile(
            loss=rmse_loss,
            optimizer=opt_adam,
            metrics=['mse']
        )
        return model
    
    def train_model(model):
        es = tf.keras.callbacks.EarlyStopping(
            monitor='val_loss', 
            patience=patience,
            restore_best_weights=True
        )
        # training
        history = model.fit(
            X_train, y_train,
            verbose=verbose,
            callbacks=[es],
            validation_data=(X_val, y_val),
            epochs=epochs,
        )
        return model, history
        
    records = []
    for i, model in enumerate(models):
        print(f"model #{i+1}: {model.name}")
        mlflow.autolog()
        model_ = compile_model(model)
        model_, history = train_model(model_)
        min_val_loss = min(history.history['val_loss'])
        best_index = history.history['val_loss'].index(min_val_loss)
        train_loss = history.history['loss'][best_index]
        
        record = {'model_name': model.name, 'model': model_, 'train_history': history, 'min_val_loss': min_val_loss, 'train_loss': train_loss}
        records.append(record)
    
    return records 
        

In [102]:
models = build_models(3, (input_dim,))

In [104]:
len(models)

20

In [105]:
records = train_models(models, X_train_preproc, y_train, X_val_preproc, y_val)

2023/06/26 17:12:45 INFO mlflow.tracking.fluent: Autologging successfully enabled for tensorflow.
2023/06/26 17:12:46 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.


model #1: dense32_dense32_dense32


2023/06/26 17:12:46 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID 'a366385abba24c948e5142686befe2a1', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current tensorflow workflow


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100




INFO:tensorflow:Assets written to: /tmp/tmphbpkc11f/model/data/model/assets


2023/06/26 17:14:33 INFO mlflow.tracking.fluent: Autologging successfully enabled for tensorflow.


model #2: dense64_dense32_dense32


2023/06/26 17:14:34 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.
2023/06/26 17:14:34 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID 'd5afbbd499be4b95917f0fe655aef21b', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current tensorflow workflow


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100




INFO:tensorflow:Assets written to: /tmp/tmpt8o7f6mj/model/data/model/assets


2023/06/26 17:17:23 INFO mlflow.tracking.fluent: Autologging successfully enabled for tensorflow.
2023/06/26 17:17:23 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.


model #3: dense64_dense64_dense32


2023/06/26 17:17:23 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '4ab70247dd454ae89fc78e7a7ed3427f', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current tensorflow workflow


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100




INFO:tensorflow:Assets written to: /tmp/tmpeh77xvvm/model/data/model/assets


2023/06/26 17:18:44 INFO mlflow.tracking.fluent: Autologging successfully enabled for tensorflow.


model #4: dense64_dense64_dense64


2023/06/26 17:18:44 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.
2023/06/26 17:18:45 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID 'f55cc6ad265f46e3ba6822d8b4f250c0', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current tensorflow workflow


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100




INFO:tensorflow:Assets written to: /tmp/tmpxa_p8n10/model/data/model/assets


2023/06/26 17:19:49 INFO mlflow.tracking.fluent: Autologging successfully enabled for tensorflow.
2023/06/26 17:19:50 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.


model #5: dense128_dense32_dense32


2023/06/26 17:19:50 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '7f46defc01c945a0a652322be6bc6aee', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current tensorflow workflow


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100




INFO:tensorflow:Assets written to: /tmp/tmppzi_l8ru/model/data/model/assets


2023/06/26 17:21:22 INFO mlflow.tracking.fluent: Autologging successfully enabled for tensorflow.


model #6: dense128_dense64_dense32


2023/06/26 17:21:22 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.
2023/06/26 17:21:22 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '7c0950ff04874fef80dfaab5b9df6e41', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current tensorflow workflow


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100




INFO:tensorflow:Assets written to: /tmp/tmp0d06v0zn/model/data/model/assets


2023/06/26 17:26:08 INFO mlflow.tracking.fluent: Autologging successfully enabled for tensorflow.
2023/06/26 17:26:08 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.
2023/06/26 17:26:08 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '37c9576c735f4a79b3168bc9ce89a8dd', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current tensorflow workflow


model #7: dense128_dense64_dense64




Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100




INFO:tensorflow:Assets written to: /tmp/tmpuz0mo7b9/model/data/model/assets


2023/06/26 17:27:49 INFO mlflow.tracking.fluent: Autologging successfully enabled for tensorflow.
2023/06/26 17:27:49 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.


model #8: dense128_dense128_dense32


2023/06/26 17:27:49 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '1aa4891af90e42a2b5f39402aac34235', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current tensorflow workflow


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100




INFO:tensorflow:Assets written to: /tmp/tmpfkpitsmh/model/data/model/assets


2023/06/26 17:30:37 INFO mlflow.tracking.fluent: Autologging successfully enabled for tensorflow.
2023/06/26 17:30:37 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.
2023/06/26 17:30:37 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '5c3ac26d39154d2fbceb27a347790ba2', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current tensorflow workflow


model #9: dense128_dense128_dense64




Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100




INFO:tensorflow:Assets written to: /tmp/tmpcpov04_7/model/data/model/assets


2023/06/26 17:32:03 INFO mlflow.tracking.fluent: Autologging successfully enabled for tensorflow.
2023/06/26 17:32:04 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.


model #10: dense128_dense128_dense128


2023/06/26 17:32:04 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '0bfee3d4f9104fb7bd29e06718fd6ff3', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current tensorflow workflow


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100




INFO:tensorflow:Assets written to: /tmp/tmpe9sd17wt/model/data/model/assets


2023/06/26 17:33:34 INFO mlflow.tracking.fluent: Autologging successfully enabled for tensorflow.
2023/06/26 17:33:34 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.


model #11: dense256_dense32_dense32


2023/06/26 17:33:34 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '3c8828745f3d46de91912ce205b44c20', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current tensorflow workflow


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100




INFO:tensorflow:Assets written to: /tmp/tmpvphbuaop/model/data/model/assets


2023/06/26 17:37:32 INFO mlflow.tracking.fluent: Autologging successfully enabled for tensorflow.


model #12: dense256_dense64_dense32


2023/06/26 17:37:32 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.
2023/06/26 17:37:32 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '121b2294d5254fcc95f383c2725dab46', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current tensorflow workflow


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100




INFO:tensorflow:Assets written to: /tmp/tmpcnlb3s7b/model/data/model/assets


2023/06/26 17:40:04 INFO mlflow.tracking.fluent: Autologging successfully enabled for tensorflow.


model #13: dense256_dense64_dense64


2023/06/26 17:40:04 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.
2023/06/26 17:40:04 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '2356e800ad1149fdb845ab66b9123ada', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current tensorflow workflow


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100




INFO:tensorflow:Assets written to: /tmp/tmplst4kar6/model/data/model/assets


2023/06/26 17:41:33 INFO mlflow.tracking.fluent: Autologging successfully enabled for tensorflow.
2023/06/26 17:41:33 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.


model #14: dense256_dense128_dense32


2023/06/26 17:41:34 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '4ebe57df2bd545fbadc6e4a7082ee2ba', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current tensorflow workflow


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100




INFO:tensorflow:Assets written to: /tmp/tmpp3bbs97e/model/data/model/assets


2023/06/26 17:43:01 INFO mlflow.tracking.fluent: Autologging successfully enabled for tensorflow.
2023/06/26 17:43:01 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.


model #15: dense256_dense128_dense64


2023/06/26 17:43:01 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '3a69e309546c4fc781b5b79ce20797eb', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current tensorflow workflow


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100




INFO:tensorflow:Assets written to: /tmp/tmp_j7a83l0/model/data/model/assets


2023/06/26 17:45:48 INFO mlflow.tracking.fluent: Autologging successfully enabled for tensorflow.


model #16: dense256_dense128_dense128


2023/06/26 17:45:56 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.
2023/06/26 17:45:56 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '9cf9953b6ffa4a8d8f74ab3fb5418469', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current tensorflow workflow


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100




INFO:tensorflow:Assets written to: /tmp/tmpplmjw9ii/model/data/model/assets


2023/06/26 17:47:15 INFO mlflow.tracking.fluent: Autologging successfully enabled for tensorflow.
2023/06/26 17:47:15 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.


model #17: dense256_dense256_dense32


2023/06/26 17:47:15 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '95437d0b8bff42af8eb2ccc535abb030', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current tensorflow workflow


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100




INFO:tensorflow:Assets written to: /tmp/tmp3_5uvq8h/model/data/model/assets


2023/06/26 17:54:18 INFO mlflow.tracking.fluent: Autologging successfully enabled for tensorflow.


model #18: dense256_dense256_dense64


2023/06/26 17:54:19 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.
2023/06/26 17:54:19 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '890f54a2c3d445bcbf7fcabddc970194', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current tensorflow workflow


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100




INFO:tensorflow:Assets written to: /tmp/tmp1_gqmjkg/model/data/model/assets


2023/06/26 17:57:18 INFO mlflow.tracking.fluent: Autologging successfully enabled for tensorflow.
2023/06/26 17:57:18 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.


model #19: dense256_dense256_dense128


2023/06/26 17:57:18 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '24763413ea5545e990081fa00a4cb910', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current tensorflow workflow


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100




INFO:tensorflow:Assets written to: /tmp/tmpbghx5xdt/model/data/model/assets


2023/06/26 18:00:22 INFO mlflow.tracking.fluent: Autologging successfully enabled for tensorflow.
2023/06/26 18:00:22 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.


model #20: dense256_dense256_dense256


2023/06/26 18:00:22 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '859ccfa32bab4655bcbc8485d7e00a9f', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current tensorflow workflow


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100




INFO:tensorflow:Assets written to: /tmp/tmp_5_l1ft0/model/data/model/assets




In [110]:
exp_3_layers = pd.DataFrame(records).drop(['model', 'train_history'], axis=1)
exp_3_layers.to_csv('model_architecture_3_layers.csv')

In [114]:
exp_3_layers.sort_values('min_val_loss')

Unnamed: 0,model_name,min_val_loss,train_loss
5,dense128_dense64_dense32,24.889011,25.121851
16,dense256_dense256_dense32,24.959993,25.155613
7,dense128_dense128_dense32,24.992544,25.152374
14,dense256_dense128_dense64,25.030584,25.333105
10,dense256_dense32_dense32,25.040279,25.276236
18,dense256_dense256_dense128,25.163895,25.525047
1,dense64_dense32_dense32,25.242655,25.464148
0,dense32_dense32_dense32,25.261032,25.445284
11,dense256_dense64_dense32,25.262478,25.429745
6,dense128_dense64_dense64,25.320202,25.544189


## Prediction

In [32]:
pred = model.predict(X_test_preproc)



In [36]:
from datetime import datetime

exp_name = 'amazing-finch-125'

pred_df = pd.concat((test_df[['id']], pd.DataFrame(pred)), axis=1).rename(columns={0: 'site_eui'})
pred_df.to_csv(f'pred/{datetime.now().strftime("%Y%m%d-%H%M%S")}_{exp_name}.csv', index=False)