## Purpose 

This script creates a model for predicting water temperature in Shadow Mountain Reservoir using the t2022 data. This pass is a middle ground between the aggressive feature reduction and all features. For this model, we're using the `leaky_basic_5` setting in `settings.py`, which reduces the number of hidden layers.

### Import Modules

In [9]:
#high level modules
import os
import imp
import pandas as pd

# ml/ai modules
import tensorflow as tf

### Custom Modules

In [10]:
# import custom modules
this_dir = "/Users/steeleb/Documents/GitHub/NASA-NW/modeling/SMR_temp_forecast/arNN/"
imp.load_source("settings",os.path.join(this_dir,"settings.py"))
from settings import settings
imp.load_source("architecture", os.path.join(this_dir, "architecture.py"))
from architecture import build_model, compile_model
imp.load_source("universals", os.path.join(this_dir, "universal_functions.py"))
from universals import save_to_pickle, twotemp_labels_features

# point to data directory
data_dir = "/Users/steeleb/Documents/GitHub/NASA-NW/data/NN_train_val_test/SMR_forecast/met_3h_complete/"

### Import train/val sets

Import and format training and validation arrays for use in model training

In [11]:
all_files = pd.Series(os.listdir(data_dir))
t2022 = all_files[all_files.str.contains('t2022')]
t2022_val = t2022[t2022.str.contains('validation')]
t2022_train = t2022[t2022.str.contains('training')]

# these files end up in no particular order, so we need to sort them
t2022_val = t2022_val.sort_values()
t2022_train = t2022_train.sort_values()

t2022_val, t2022_train



(5     validation_t2022_2014_v2024-12-01.csv
 23    validation_t2022_2015_v2024-12-01.csv
 39    validation_t2022_2016_v2024-12-01.csv
 33    validation_t2022_2017_v2024-12-01.csv
 29    validation_t2022_2018_v2024-12-01.csv
 34    validation_t2022_2019_v2024-12-01.csv
 14    validation_t2022_2020_v2024-12-01.csv
 8     validation_t2022_2021_v2024-12-01.csv
 dtype: object,
 38    training_t2022_2014_v2024-12-01.csv
 32    training_t2022_2015_v2024-12-01.csv
 4     training_t2022_2016_v2024-12-01.csv
 22    training_t2022_2017_v2024-12-01.csv
 20    training_t2022_2018_v2024-12-01.csv
 3     training_t2022_2019_v2024-12-01.csv
 28    training_t2022_2020_v2024-12-01.csv
 43    training_t2022_2021_v2024-12-01.csv
 dtype: object)

In [12]:
def load_data(file):
    return pd.read_csv(os.path.join(data_dir, file), sep=',')

val1 = load_data(t2022_val.values[0])
train1 = load_data(t2022_train.values[0])

val2 = load_data(t2022_val.values[1])
train2 = load_data(t2022_train.values[1])

val3 = load_data(t2022_val.values[2])
train3 = load_data(t2022_train.values[2])

val4 = load_data(t2022_val.values[3])
train4 = load_data(t2022_train.values[3])

val5 = load_data(t2022_val.values[5])
train5 = load_data(t2022_train.values[5])

val6 = load_data(t2022_val.values[5])
train6 = load_data(t2022_train.values[5])

val7 = load_data(t2022_val.values[6])
train7 = load_data(t2022_train.values[6])

val8 = load_data(t2022_val.values[7])
train8 = load_data(t2022_train.values[7])

Using the function twotemp_labels_features, we can create ML-ready features and labels for the training and validation sets.

In [13]:
features1, labels_1, val_features1, val_labels_1 = twotemp_labels_features(train1, val1)
features2, labels_2, val_features2, val_labels_2 = twotemp_labels_features(train2, val2)
features3, labels_3, val_features3, val_labels_3 = twotemp_labels_features(train3, val3)
features4, labels_4, val_features4, val_labels_4 = twotemp_labels_features(train4, val4)
features5, labels_5, val_features5, val_labels_5 = twotemp_labels_features(train5, val5)
features6, labels_6, val_features6, val_labels_6 = twotemp_labels_features(train6, val6)
features7, labels_7, val_features7, val_labels_7 = twotemp_labels_features(train7, val7)
features8, labels_8, val_features8, val_labels_8 = twotemp_labels_features(train8, val8)

In [14]:
features1.shape, labels_1.shape, val_features1.shape, val_labels_1.shape

((544, 174), (544, 2), (85, 174), (85, 2))

### Compile and train models

Here, we'll use intentionally overfit settings to create an overfit model. This particular instance uses 2 layers containint 20 nodes each. We've increased the batch size to 64 from the previous iteration, and kept the patience at 200 and the drop out at 0.1.

In [15]:
tf.keras.backend.clear_session()
tf.keras.utils.set_random_seed(settings["leaky_basic_5"]["random_seed"])

# define the early stopping callback
early_stopping_callback = tf.keras.callbacks.EarlyStopping(
  monitor="val_loss", 
  patience=settings["leaky_basic_5"]["patience"], 
  restore_best_weights=True, 
  mode="auto"
)

## TS cross 1
model_1 = build_model(
  features1, 
  labels_1, 
  settings["leaky_basic_5"])

model_1 = compile_model(
  model_1, 
  settings["leaky_basic_5"])

# train the model via model.fit
history_1 = model_1.fit(
  features1, 
  labels_1, 
  epochs=settings["leaky_basic_5"]["max_epochs"],
  batch_size=settings["leaky_basic_5"]["batch_size"],
  shuffle=True,
  validation_data=[val_features1, val_labels_1],
  callbacks=[early_stopping_callback],
  verbose=1,
)

## TS cross 2
model_2 = build_model(
  features2,
  labels_2, 
  settings["leaky_basic_5"])
model_2 = compile_model(model_2, settings["leaky_basic_5"])

# train the model via model.fit
history_2 = model_2.fit(
  features2,
  labels_2,
  epochs=settings["leaky_basic_5"]["max_epochs"],
  batch_size=settings["leaky_basic_5"]["batch_size"],
  shuffle=True,
  validation_data=[val_features2, val_labels_2],
  callbacks=[early_stopping_callback],
  verbose=1,
)

## TS cross 3

model_3 = build_model(
  features3,
  labels_3,
  settings["leaky_basic_5"])
model_3 = compile_model(model_3, settings["leaky_basic_5"])

# train the model via model.fit
history_3 = model_3.fit(
  features3,
  labels_3,
  epochs=settings["leaky_basic_5"]["max_epochs"],
  batch_size=settings["leaky_basic_5"]["batch_size"],
  shuffle=True,
  validation_data=[val_features3, val_labels_3],
  callbacks=[early_stopping_callback],
  verbose=1,
)

## TS cross 4

model_4 = build_model(
  features4,
  labels_4,
  settings["leaky_basic_5"])
model_4 = compile_model(model_4, settings["leaky_basic_5"])

# train the model via model.fit
history_4 = model_4.fit(
  features4,
  labels_4,
  epochs=settings["leaky_basic_5"]["max_epochs"],
  batch_size=settings["leaky_basic_5"]["batch_size"],
  shuffle=True,
  validation_data=[val_features4, val_labels_4],
  callbacks=[early_stopping_callback],
  verbose=1,
)

## TS cross 5

model_5 = build_model(
  features5,
  labels_5,
  settings["leaky_basic_5"])
model_5 = compile_model(model_5, settings["leaky_basic_5"])

# train the model via model.fit
history_5 = model_5.fit(
  features5,
  labels_5,
  epochs=settings["leaky_basic_5"]["max_epochs"],
  batch_size=settings["leaky_basic_5"]["batch_size"],
  shuffle=True,
  validation_data=[val_features5, val_labels_5],
  callbacks=[early_stopping_callback],
  verbose=1,
)

## TS cross 6

model_6 = build_model(
  features6,
  labels_6,
  settings["leaky_basic_5"])
model_6 = compile_model(model_6, settings["leaky_basic_5"])

# train the model via model.fit
history_6 = model_6.fit(
  features6,
  labels_6,
  epochs=settings["leaky_basic_5"]["max_epochs"],
  batch_size=settings["leaky_basic_5"]["batch_size"],
  shuffle=True,
  validation_data=[val_features6, val_labels_6],
  callbacks=[early_stopping_callback],
  verbose=1,
)

## TS cross 7

model_7 = build_model(
  features7,
  labels_7,
  settings["leaky_basic_5"])
model_7 = compile_model(model_7, settings["leaky_basic_5"])

# train the model via model.fit
history_7 = model_7.fit(
  features7,
  labels_7,
  epochs=settings["leaky_basic_5"]["max_epochs"],
  batch_size=settings["leaky_basic_5"]["batch_size"],
  shuffle=True,
  validation_data=[val_features7, val_labels_7],
  callbacks=[early_stopping_callback],
  verbose=1,
)

## TS cross 8

model_8 = build_model(
  features8,
  labels_8,
  settings["leaky_basic_5"])
model_8 = compile_model(model_8, settings["leaky_basic_5"])

# train the model via model.fit
history_8 = model_8.fit(
  features8,
  labels_8,
  epochs=settings["leaky_basic_5"]["max_epochs"],
  batch_size=settings["leaky_basic_5"]["batch_size"],
  shuffle=True,
  validation_data=[val_features8, val_labels_8],
  callbacks=[early_stopping_callback],
  verbose=1,
)

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 174)]             0         
                                                                 
 dropout (Dropout)           (None, 174)               0         
                                                                 
 dense (Dense)               (None, 10)                1750      
                                                                 
 dense_1 (Dense)             (None, 10)                110       
                                                                 
 dense_2 (Dense)             (None, 2)                 22        
                                                                 
Total params: 1,882
Trainable params: 1,882
Non-trainable params: 0
_________________________________________________________________
___________________________________________________________

And save the models and training history, to do this, you'll need to create the following directory path:

data/NN_train_val_test/SMR_forecast/models/leaky_basic_5/

In [16]:
dump_dir = "/Users/steeleb/Documents/GitHub/NASA-NW/data/NN_train_val_test/SMR_forecast/models/leaky_basic_5_complete/"
!mkdir -p $dump_dir

# save models to pickle
models = [model_1, model_2, model_3, model_4, model_5, model_6, model_7, model_8]

for model, i in zip(models, range(1,9)):
    save_to_pickle(model, f"{dump_dir}/model_{i}.pkl")

# save history to pickles
histories = [history_1, history_2, history_3, history_4, history_5, history_6, history_7, history_8]

for history, i in zip(histories, range(1,9)):
    save_to_pickle(history, f"{dump_dir}/history_{i}.pkl")


INFO:tensorflow:Assets written to: ram://37b99850-6df7-4901-96c2-4fe68707a108/assets
INFO:tensorflow:Assets written to: ram://d5af9556-1f9d-4ed1-9fa4-a186af50ade7/assets
INFO:tensorflow:Assets written to: ram://7bd7dc9b-04fb-44a5-b758-ee41a2d5812d/assets
INFO:tensorflow:Assets written to: ram://30848577-4823-4801-b4ce-b782444cd7c7/assets
INFO:tensorflow:Assets written to: ram://794a6682-f865-444c-a2aa-08ba3d38d1b0/assets
INFO:tensorflow:Assets written to: ram://c804f0ca-b7ba-423b-9c7c-55626c687c9a/assets
INFO:tensorflow:Assets written to: ram://d0072349-1885-4f2f-8081-32da3ed3d376/assets
INFO:tensorflow:Assets written to: ram://cbf863b2-e030-43a7-90b5-56b4004afe98/assets
INFO:tensorflow:Assets written to: ram://176bf802-86a1-48d2-9be6-21f437065064/assets
INFO:tensorflow:Assets written to: ram://e2290f71-9bb0-4d39-a28b-f2f627815e2b/assets
INFO:tensorflow:Assets written to: ram://e0aa9605-fa76-44d8-8dc5-d15d07bdce8d/assets
INFO:tensorflow:Assets written to: ram://9248befa-7300-4f61-b3aa-