## Purpose 

This script creates a model for predicting water temperature in Shadow Mountain Reservoir using the t2022 data. This pass is a middle ground between the aggressive feature reduction and all features. For this model, we're using the `leaky_basic_5` setting in `settings.py`, which reduces the number of hidden layers.

### Import Modules

In [33]:
#high level modules
import os
import imp
import pandas as pd

# ml/ai modules
import tensorflow as tf

### Custom Modules

In [34]:
# import custom modules
this_dir = "/Users/steeleb/Documents/GitHub/ATS-Data-Driven-Forecasting/NN-operational/arNN_summary/"
imp.load_source("settings",os.path.join(this_dir,"settings.py"))
from settings import settings
imp.load_source("architecture", os.path.join(this_dir, "architecture.py"))
from architecture import build_model, compile_model
imp.load_source("universals", os.path.join(this_dir, "universal_functions.py"))
from universals import save_to_pickle, twotemp_labels_features

# point to data directory
data_dir = "/Users/steeleb/Documents/GitHub/NASA-NW/data/NN_train_val_test/SMR_forecast/"

### Import train/val sets

Import and format training and validation arrays for use in model training

In [35]:
all_files = pd.Series(os.listdir(data_dir))
t2022 = all_files[all_files.str.contains('t2022')]
t2022_ver = t2022[t2022.str.contains('2024-11-29')]
t2022_val = t2022_ver[t2022_ver.str.contains('validation')]
t2022_train = t2022_ver[t2022_ver.str.contains('training')]

# these files end up in no particular order, so we need to sort them
t2022_val = t2022_val.sort_values()
t2022_train = t2022_train.sort_values()

t2022_val, t2022_train



(91     validation_summary_t2022_2014_summary_v2024-11...
 23     validation_summary_t2022_2015_summary_v2024-11...
 52     validation_summary_t2022_2016_summary_v2024-11...
 126    validation_summary_t2022_2017_summary_v2024-11...
 122    validation_summary_t2022_2018_summary_v2024-11...
 53     validation_summary_t2022_2019_summary_v2024-11...
 0      validation_summary_t2022_2020_summary_v2024-11...
 105    validation_summary_t2022_2021_summary_v2024-11...
 dtype: object,
 85     training_summary_t2022_2014_summary_v2024-11-2...
 28     training_summary_t2022_2015_summary_v2024-11-2...
 58     training_summary_t2022_2016_summary_v2024-11-2...
 119    training_summary_t2022_2017_summary_v2024-11-2...
 128    training_summary_t2022_2018_summary_v2024-11-2...
 50     training_summary_t2022_2019_summary_v2024-11-2...
 8      training_summary_t2022_2020_summary_v2024-11-2...
 103    training_summary_t2022_2021_summary_v2024-11-2...
 dtype: object)

In [36]:
def load_data(file):
    return pd.read_csv(os.path.join(data_dir, file), sep=',')

val1 = load_data(t2022_val.values[0])
train1 = load_data(t2022_train.values[0])

val2 = load_data(t2022_val.values[1])
train2 = load_data(t2022_train.values[1])

val3 = load_data(t2022_val.values[2])
train3 = load_data(t2022_train.values[2])

val4 = load_data(t2022_val.values[3])
train4 = load_data(t2022_train.values[3])

val5 = load_data(t2022_val.values[5])
train5 = load_data(t2022_train.values[5])

val6 = load_data(t2022_val.values[5])
train6 = load_data(t2022_train.values[5])

val7 = load_data(t2022_val.values[6])
train7 = load_data(t2022_train.values[6])

val8 = load_data(t2022_val.values[7])
train8 = load_data(t2022_train.values[7])

Using the function twotemp_labels_features, we can create ML-ready features and labels for the training and validation sets.

In [37]:
features1, labels_1, val_features1, val_labels_1 = twotemp_labels_features(train1, val1)
features2, labels_2, val_features2, val_labels_2 = twotemp_labels_features(train2, val2)
features3, labels_3, val_features3, val_labels_3 = twotemp_labels_features(train3, val3)
features4, labels_4, val_features4, val_labels_4 = twotemp_labels_features(train4, val4)
features5, labels_5, val_features5, val_labels_5 = twotemp_labels_features(train5, val5)
features6, labels_6, val_features6, val_labels_6 = twotemp_labels_features(train6, val6)
features7, labels_7, val_features7, val_labels_7 = twotemp_labels_features(train7, val7)
features8, labels_8, val_features8, val_labels_8 = twotemp_labels_features(train8, val8)

In [38]:
features1.shape, labels_1.shape, val_features1.shape, val_labels_1.shape

((544, 90), (544, 2), (85, 90), (85, 2))

### Compile and train models

Here, we'll use intentionally overfit settings to create an overfit model. This particular instance uses 2 layers containint 20 nodes each. We've increased the batch size to 64 from the previous iteration, and kept the patience at 200 and the drop out at 0.1.

In [39]:
tf.keras.backend.clear_session()
tf.keras.utils.set_random_seed(settings["leaky_5_batch"]["random_seed"])

# define the early stopping callback
early_stopping_callback = tf.keras.callbacks.EarlyStopping(
  monitor="val_loss", 
  patience=settings["leaky_5_batch"]["patience"], 
  restore_best_weights=True, 
  mode="auto"
)

## TS cross 1
model_1 = build_model(
  features1, 
  labels_1, 
  settings["leaky_5_batch"])

model_1 = compile_model(
  model_1, 
  settings["leaky_5_batch"])

# train the model via model.fit
history_1 = model_1.fit(
  features1, 
  labels_1, 
  epochs=settings["leaky_5_batch"]["max_epochs"],
  batch_size=settings["leaky_5_batch"]["batch_size"],
  shuffle=True,
  validation_data=[val_features1, val_labels_1],
  callbacks=[early_stopping_callback],
  verbose=1,
)

## TS cross 2
model_2 = build_model(
  features2,
  labels_2, 
  settings["leaky_5_batch"])
model_2 = compile_model(model_2, settings["leaky_5_batch"])

# train the model via model.fit
history_2 = model_2.fit(
  features2,
  labels_2,
  epochs=settings["leaky_5_batch"]["max_epochs"],
  batch_size=settings["leaky_5_batch"]["batch_size"],
  shuffle=True,
  validation_data=[val_features2, val_labels_2],
  callbacks=[early_stopping_callback],
  verbose=1,
)

## TS cross 3

model_3 = build_model(
  features3,
  labels_3,
  settings["leaky_5_batch"])
model_3 = compile_model(model_3, settings["leaky_5_batch"])

# train the model via model.fit
history_3 = model_3.fit(
  features3,
  labels_3,
  epochs=settings["leaky_5_batch"]["max_epochs"],
  batch_size=settings["leaky_5_batch"]["batch_size"],
  shuffle=True,
  validation_data=[val_features3, val_labels_3],
  callbacks=[early_stopping_callback],
  verbose=1,
)

## TS cross 4

model_4 = build_model(
  features4,
  labels_4,
  settings["leaky_5_batch"])
model_4 = compile_model(model_4, settings["leaky_5_batch"])

# train the model via model.fit
history_4 = model_4.fit(
  features4,
  labels_4,
  epochs=settings["leaky_5_batch"]["max_epochs"],
  batch_size=settings["leaky_5_batch"]["batch_size"],
  shuffle=True,
  validation_data=[val_features4, val_labels_4],
  callbacks=[early_stopping_callback],
  verbose=1,
)

## TS cross 5

model_5 = build_model(
  features5,
  labels_5,
  settings["leaky_5_batch"])
model_5 = compile_model(model_5, settings["leaky_5_batch"])

# train the model via model.fit
history_5 = model_5.fit(
  features5,
  labels_5,
  epochs=settings["leaky_5_batch"]["max_epochs"],
  batch_size=settings["leaky_5_batch"]["batch_size"],
  shuffle=True,
  validation_data=[val_features5, val_labels_5],
  callbacks=[early_stopping_callback],
  verbose=1,
)

## TS cross 6

model_6 = build_model(
  features6,
  labels_6,
  settings["leaky_5_batch"])
model_6 = compile_model(model_6, settings["leaky_5_batch"])

# train the model via model.fit
history_6 = model_6.fit(
  features6,
  labels_6,
  epochs=settings["leaky_5_batch"]["max_epochs"],
  batch_size=settings["leaky_5_batch"]["batch_size"],
  shuffle=True,
  validation_data=[val_features6, val_labels_6],
  callbacks=[early_stopping_callback],
  verbose=1,
)

## TS cross 7

model_7 = build_model(
  features7,
  labels_7,
  settings["leaky_5_batch"])
model_7 = compile_model(model_7, settings["leaky_5_batch"])

# train the model via model.fit
history_7 = model_7.fit(
  features7,
  labels_7,
  epochs=settings["leaky_5_batch"]["max_epochs"],
  batch_size=settings["leaky_5_batch"]["batch_size"],
  shuffle=True,
  validation_data=[val_features7, val_labels_7],
  callbacks=[early_stopping_callback],
  verbose=1,
)

## TS cross 8

model_8 = build_model(
  features8,
  labels_8,
  settings["leaky_5_batch"])
model_8 = compile_model(model_8, settings["leaky_5_batch"])

# train the model via model.fit
history_8 = model_8.fit(
  features8,
  labels_8,
  epochs=settings["leaky_5_batch"]["max_epochs"],
  batch_size=settings["leaky_5_batch"]["batch_size"],
  shuffle=True,
  validation_data=[val_features8, val_labels_8],
  callbacks=[early_stopping_callback],
  verbose=1,
)

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 90)]              0         
                                                                 
 dropout (Dropout)           (None, 90)                0         
                                                                 
 dense (Dense)               (None, 10)                910       
                                                                 
 dense_1 (Dense)             (None, 10)                110       
                                                                 
 dense_2 (Dense)             (None, 2)                 22        
                                                                 
Total params: 1,042
Trainable params: 1,042
Non-trainable params: 0
_________________________________________________________________
___________________________________________________________

And save the models and training history, to do this, you'll need to create the following directory path:

data/NN_train_val_test/SMR_forecast/models/leaky_basic_5/

In [41]:
dump_dir = "/Users/steeleb/Documents/GitHub/NASA-NW/data/NN_train_val_test/SMR_forecast/models/leaky_5_batch/"
!mkdir -p {dump_dir}

# save models to pickle
models = [model_1, model_2, model_3, model_4, model_5, model_6, model_7, model_8]

for model, i in zip(models, range(1,9)):
    save_to_pickle(model, f"{dump_dir}/model_{i}.pkl")

# save history to pickles
histories = [history_1, history_2, history_3, history_4, history_5, history_6, history_7, history_8]

for history, i in zip(histories, range(1,9)):
    save_to_pickle(history, f"{dump_dir}/history_{i}.pkl")


INFO:tensorflow:Assets written to: ram://d3f0b2d6-25df-4207-8188-efb578368180/assets
INFO:tensorflow:Assets written to: ram://419dfc5b-565d-46f8-a714-23f79f567813/assets
INFO:tensorflow:Assets written to: ram://9ec6c763-e84a-47df-b4ac-e486b6632107/assets
INFO:tensorflow:Assets written to: ram://e3997306-9a60-4a3d-bac5-f5d604060305/assets
INFO:tensorflow:Assets written to: ram://dbc41403-e6c6-4d6b-9f48-c9963c1649a0/assets
INFO:tensorflow:Assets written to: ram://07dc3ec9-5cf5-460a-9208-3907e621ac25/assets
INFO:tensorflow:Assets written to: ram://1492f2c7-c071-4311-8a3d-aeab7b0ee899/assets
INFO:tensorflow:Assets written to: ram://37736661-ecb3-488f-b3de-c8ca5efc4344/assets
INFO:tensorflow:Assets written to: ram://7955c75c-c439-4ca7-b07f-67059bb1e4cb/assets
INFO:tensorflow:Assets written to: ram://0050e27b-1b2d-4ce6-8bc5-486d457656fe/assets
INFO:tensorflow:Assets written to: ram://2cef1c51-b97d-49e4-9636-5f95c43e5cb8/assets
INFO:tensorflow:Assets written to: ram://6e118364-dd96-462f-9751-