## Purpose 

This script creates a model for predicting water temperature in Shadow Mountain Reservoir using the t2023 data. This pass is a middle ground between the aggressive feature reduction and all features. For this model, we're using the `leaky_basic_5` setting in `settings.py`, which reduces the number of hidden layers.

### Import Modules

In [1]:
#high level modules
import os
import imp
import pandas as pd

# ml/ai modules
import tensorflow as tf

  import imp


### Custom Modules

In [4]:
# import custom modules
this_dir = "/Users/steeleb/Documents/GitHub/NASA-NW/modeling/temperature/arNN_fewer2/"
imp.load_source("settings",os.path.join(this_dir,"settings.py"))
from settings import settings
imp.load_source("architecture", os.path.join(this_dir, "architecture.py"))
from architecture import build_model, compile_model
imp.load_source("universals", os.path.join(this_dir, "universal_functions.py"))
from universals import save_to_pickle, twotemp_labels_features

# point to data directory
data_dir = "/Users/steeleb/Documents/GitHub/NASA-NW/data/NN_train_val_test/SMR_autoNN_reduce_2/"

### Import train/val sets

Import and format training and validation arrays for use in model training

In [6]:
all_files = pd.Series(os.listdir(data_dir))
t2023 = all_files[all_files.str.contains('t2023')]
t2023_val = t2023[t2023.str.contains('validation')]
t2023_train = t2023[t2023.str.contains('training')]

# these files end up in no particular order, so we need to sort them
t2023_val = t2023_val.sort_values()
t2023_train = t2023_train.sort_values()

def load_data(file):
    return pd.read_csv(os.path.join(data_dir, file), sep=',')

val1 = load_data(t2023_val.values[0])
train1 = load_data(t2023_train.values[0])

val2 = load_data(t2023_val.values[1])
train2 = load_data(t2023_train.values[1])

val3 = load_data(t2023_val.values[2])
train3 = load_data(t2023_train.values[2])

val4 = load_data(t2023_val.values[3])
train4 = load_data(t2023_train.values[3])

val5 = load_data(t2023_val.values[5])
train5 = load_data(t2023_train.values[5])

val6 = load_data(t2023_val.values[5])
train6 = load_data(t2023_train.values[5])

val7 = load_data(t2023_val.values[6])
train7 = load_data(t2023_train.values[6])

val8 = load_data(t2023_val.values[7])
train8 = load_data(t2023_train.values[7])

val9 = load_data(t2023_val.values[8])
train9 = load_data(t2023_train.values[8])


Using the function twotemp_labels_features, we can create ML-ready features and labels for the training and validation sets.

In [9]:
features1, labels_1, val_features1, val_labels_1 = twotemp_labels_features(train1, val1)
features2, labels_2, val_features2, val_labels_2 = twotemp_labels_features(train2, val2)
features3, labels_3, val_features3, val_labels_3 = twotemp_labels_features(train3, val3)
features4, labels_4, val_features4, val_labels_4 = twotemp_labels_features(train4, val4)
features5, labels_5, val_features5, val_labels_5 = twotemp_labels_features(train5, val5)
features6, labels_6, val_features6, val_labels_6 = twotemp_labels_features(train6, val6)
features7, labels_7, val_features7, val_labels_7 = twotemp_labels_features(train7, val7)
features8, labels_8, val_features8, val_labels_8 = twotemp_labels_features(train8, val8)
features9, labels_9, val_features9, val_labels_9 = twotemp_labels_features(train9, val9)

### Compile and train models

Here, we'll use intentionally overfit settings to create an overfit model. This particular instance uses 2 layers containint 20 nodes each. We've increased the batch size to 64 from the previous iteration, and kept the patience at 200 and the drop out at 0.1.

In [10]:
tf.keras.backend.clear_session()
tf.keras.utils.set_random_seed(settings["leaky_basic_5"]["random_seed"])

# define the early stopping callback
early_stopping_callback = tf.keras.callbacks.EarlyStopping(
  monitor="val_loss", 
  patience=settings["leaky_basic_5"]["patience"], 
  restore_best_weights=True, 
  mode="auto"
)

## TS cross 1
model_1 = build_model(
  features1, 
  labels_1, 
  settings["leaky_basic_5"])

model_1 = compile_model(
  model_1, 
  settings["leaky_basic_5"])

# train the model via model.fit
history_1 = model_1.fit(
  features1, 
  labels_1, 
  epochs=settings["leaky_basic_5"]["max_epochs"],
  batch_size=settings["leaky_basic_5"]["batch_size"],
  shuffle=True,
  validation_data=[val_features1, val_labels_1],
  callbacks=[early_stopping_callback],
  verbose=1,
)

## TS cross 2
model_2 = build_model(
  features2,
  labels_2, 
  settings["leaky_basic_5"])
model_2 = compile_model(model_2, settings["leaky_basic_5"])

# train the model via model.fit
history_2 = model_2.fit(
  features2,
  labels_2,
  epochs=settings["leaky_basic_5"]["max_epochs"],
  batch_size=settings["leaky_basic_5"]["batch_size"],
  shuffle=True,
  validation_data=[val_features2, val_labels_2],
  callbacks=[early_stopping_callback],
  verbose=1,
)

## TS cross 3

model_3 = build_model(
  features3,
  labels_3,
  settings["leaky_basic_5"])
model_3 = compile_model(model_3, settings["leaky_basic_5"])

# train the model via model.fit
history_3 = model_3.fit(
  features3,
  labels_3,
  epochs=settings["leaky_basic_5"]["max_epochs"],
  batch_size=settings["leaky_basic_5"]["batch_size"],
  shuffle=True,
  validation_data=[val_features3, val_labels_3],
  callbacks=[early_stopping_callback],
  verbose=1,
)

## TS cross 4

model_4 = build_model(
  features4,
  labels_4,
  settings["leaky_basic_5"])
model_4 = compile_model(model_4, settings["leaky_basic_5"])

# train the model via model.fit
history_4 = model_4.fit(
  features4,
  labels_4,
  epochs=settings["leaky_basic_5"]["max_epochs"],
  batch_size=settings["leaky_basic_5"]["batch_size"],
  shuffle=True,
  validation_data=[val_features4, val_labels_4],
  callbacks=[early_stopping_callback],
  verbose=1,
)

## TS cross 5

model_5 = build_model(
  features5,
  labels_5,
  settings["leaky_basic_5"])
model_5 = compile_model(model_5, settings["leaky_basic_5"])

# train the model via model.fit
history_5 = model_5.fit(
  features5,
  labels_5,
  epochs=settings["leaky_basic_5"]["max_epochs"],
  batch_size=settings["leaky_basic_5"]["batch_size"],
  shuffle=True,
  validation_data=[val_features5, val_labels_5],
  callbacks=[early_stopping_callback],
  verbose=1,
)

## TS cross 6

model_6 = build_model(
  features6,
  labels_6,
  settings["leaky_basic_5"])
model_6 = compile_model(model_6, settings["leaky_basic_5"])

# train the model via model.fit
history_6 = model_6.fit(
  features6,
  labels_6,
  epochs=settings["leaky_basic_5"]["max_epochs"],
  batch_size=settings["leaky_basic_5"]["batch_size"],
  shuffle=True,
  validation_data=[val_features6, val_labels_6],
  callbacks=[early_stopping_callback],
  verbose=1,
)

## TS cross 7

model_7 = build_model(
  features7,
  labels_7,
  settings["leaky_basic_5"])
model_7 = compile_model(model_7, settings["leaky_basic_5"])

# train the model via model.fit
history_7 = model_7.fit(
  features7,
  labels_7,
  epochs=settings["leaky_basic_5"]["max_epochs"],
  batch_size=settings["leaky_basic_5"]["batch_size"],
  shuffle=True,
  validation_data=[val_features7, val_labels_7],
  callbacks=[early_stopping_callback],
  verbose=1,
)

## TS cross 8

model_8 = build_model(
  features8,
  labels_8,
  settings["leaky_basic_5"])
model_8 = compile_model(model_8, settings["leaky_basic_5"])

# train the model via model.fit
history_8 = model_8.fit(
  features8,
  labels_8,
  epochs=settings["leaky_basic_5"]["max_epochs"],
  batch_size=settings["leaky_basic_5"]["batch_size"],
  shuffle=True,
  validation_data=[val_features8, val_labels_8],
  callbacks=[early_stopping_callback],
  verbose=1,
)

## TS cross 9

model_9 = build_model(
  features9,
  labels_9,
  settings["leaky_basic_5"])
model_9 = compile_model(model_9, settings["leaky_basic_5"])

# train the model via model.fit
history_9 = model_9.fit(
  features9,
  labels_9,
  epochs=settings["leaky_basic_5"]["max_epochs"],
  batch_size=settings["leaky_basic_5"]["batch_size"],
  shuffle=True,
  validation_data=[val_features9, val_labels_9],
  callbacks=[early_stopping_callback],
  verbose=1,
)


Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 52)]              0         
                                                                 
 dropout (Dropout)           (None, 52)                0         
                                                                 
 dense (Dense)               (None, 10)                530       
                                                                 
 dense_1 (Dense)             (None, 10)                110       
                                                                 
 dense_2 (Dense)             (None, 2)                 22        
                                                                 
Total params: 662
Trainable params: 662
Non-trainable params: 0
_________________________________________________________________
_______________________________________________________________

And save the models and training history, to do this, you'll need to create the following directory path:

data/NN_train_val_test/SMR_autoNN_reduce_2/models/leaky_basic_5_t2023/

In [11]:
dump_dir = "/Users/steeleb/Documents/GitHub/NASA-NW/data/NN_train_val_test/SMR_autoNN_reduce_2/models/leaky_basic_5_t2023/"

# save models to pickle
models = [model_1, model_2, model_3, model_4, model_5, model_6, model_7, model_8, model_9]

for model, i in zip(models, range(1,10)):
    save_to_pickle(model, f"{dump_dir}/model_{i}.pkl")

# save history to pickles
histories = [history_1, history_2, history_3, history_4, history_5, history_6, history_7, history_8, history_9]

for history, i in zip(histories, range(1,10)):
    save_to_pickle(history, f"{dump_dir}/history_{i}.pkl")


INFO:tensorflow:Assets written to: ram://d40cffdf-f5a3-4cbf-a029-599fa36349b3/assets


2024-08-26 14:55:11.388324: W tensorflow/python/util/util.cc:368] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.


INFO:tensorflow:Assets written to: ram://f7b1b0f9-9d55-42ab-b5f8-3c15b41fff42/assets
INFO:tensorflow:Assets written to: ram://28b23c97-be81-4d0a-b3e7-d21eb37aba0c/assets
INFO:tensorflow:Assets written to: ram://5b1ea1e3-6b1c-45a2-9d89-f73d5b03c906/assets
INFO:tensorflow:Assets written to: ram://a7d3d583-557f-4a9d-b959-82f3528a2e3c/assets
INFO:tensorflow:Assets written to: ram://519a5211-ea01-4248-bb34-3865d45555c6/assets
INFO:tensorflow:Assets written to: ram://ffb9bcf8-7492-471d-ba78-70c55de65db1/assets
INFO:tensorflow:Assets written to: ram://d85eb68d-9e85-40c6-b789-987b2670c3c2/assets
INFO:tensorflow:Assets written to: ram://93ee29e5-0bdc-4112-8d55-a1bfb8943f68/assets
INFO:tensorflow:Assets written to: ram://75fd8a3b-7b40-457f-adeb-b5177599e3ff/assets
INFO:tensorflow:Assets written to: ram://46f5510d-fbd4-4359-88be-f67dc7afc816/assets
INFO:tensorflow:Assets written to: ram://1efb6766-1926-4578-a99c-6dea2362fec3/assets
INFO:tensorflow:Assets written to: ram://e7df4206-3657-4d52-9bc7-