This script creates a very overfit model for predicting surface temperature

In [45]:
#high level modules
import os
import sys
import imp
import numpy as np
import pandas as pd
import pickle

# ml/ai modules
import tensorflow as tf
# Let's import some different things we will use to build the neural network
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense, Input, Dropout, Softmax

# import custom modules
this_dir = "/Users/steeleb/Documents/GitHub/ATS-ML-Fall2023/"
imp.load_source("settings",os.path.join(this_dir,"NeuralNetworks/settings.py"))
from settings import settings
imp.load_source("tvt", os.path.join(this_dir, "NeuralNetworks/preprocessing.py"))
from tvt import train1, val1, train2, val2, train3, val3, train4, val4, train5, val5, train6, val6
from tvt import train1_ts, val1_ts, train2_ts, val2_ts, train3_ts, val3_ts, train4_ts, val4_ts
imp.load_source("architecture", os.path.join(this_dir, "NeuralNetworks/architecture.py"))
from architecture import build_model, compile_model
imp.load_source("universals", os.path.join(this_dir, "NeuralNetworks/universal_functions.py"))
from universals import save_to_pickle


Format training and validation arrays for use in model training

In [46]:

# grab the values we want to predict
labels_1 = np.array(train1['value'])
labels_2 = np.array(train2['value'])
labels_3 = np.array(train3['value'])
labels_4 = np.array(train4['value'])
labels_5 = np.array(train5['value'])
labels_6 = np.array(train6['value'])

# grab the values we want to predict
val_labels_1 = np.array(val1['value'])
val_labels_2 = np.array(val2['value'])
val_labels_3 = np.array(val3['value'])
val_labels_4 = np.array(val4['value'])
val_labels_5 = np.array(val5['value'])
val_labels_6 = np.array(val6['value'])

# and remove the labels from the dataset containing the feature set
features1 = (train1
  .drop(['value', 'feature', 'date'], axis = 1))
features2 = (train2
  .drop(['value', 'feature', 'date'], axis = 1))
features3 = (train3
  .drop(['value', 'feature', 'date'], axis = 1))
features4 = (train4
  .drop(['value', 'feature', 'date'], axis = 1))
features5 = (train5
  .drop(['value', 'feature', 'date'], axis = 1))
features6 = (train6
  .drop(['value', 'feature', 'date'], axis = 1))

# and remove the labels from the dataset containing the feature set
val_features1 = (val1
  .drop(['value', 'feature', 'date'], axis = 1))
val_features2 = (val2
  .drop(['value', 'feature', 'date'], axis = 1))
val_features3 = (val3
  .drop(['value', 'feature', 'date'], axis = 1))
val_features4 = (val4
  .drop(['value', 'feature', 'date'], axis = 1))
val_features5 = (val5
  .drop(['value', 'feature', 'date'], axis = 1))
val_features6 = (val6
  .drop(['value', 'feature', 'date'], axis = 1))

# Saving feature names for later use
feature_list = list(features1.columns)

# Convert to numpy array
features1 = np.array(features1)
features2 = np.array(features2)
features3 = np.array(features3)
features4 = np.array(features4)
features5 = np.array(features5)
features6 = np.array(features6)

# Convert to numpy array
val_features1 = np.array(val_features1)
val_features2 = np.array(val_features2)
val_features3 = np.array(val_features3)
val_features4 = np.array(val_features4)
val_features5 = np.array(val_features5)
val_features6 = np.array(val_features6)


# grab the values we want to predict
labels_1_ts = np.array(train1_ts['value'])
labels_2_ts = np.array(train2_ts['value'])
labels_3_ts = np.array(train3_ts['value'])
labels_4_ts = np.array(train4_ts['value'])

# grab the values we want to predict
val_labels_1_ts = np.array(val1_ts['value'])
val_labels_2_ts = np.array(val2_ts['value'])
val_labels_3_ts = np.array(val3_ts['value'])
val_labels_4_ts = np.array(val4_ts['value'])

# and remove the labels from the dataset containing the feature set
features1_ts = (train1_ts
  .drop(['value', 'feature', 'date'], axis = 1))
features2_ts = (train2_ts
  .drop(['value', 'feature', 'date'], axis = 1))
features3_ts = (train3_ts
  .drop(['value', 'feature', 'date'], axis = 1))
features4_ts = (train4_ts
  .drop(['value', 'feature', 'date'], axis = 1))

# and remove the labels from the dataset containing the feature set
val_features1_ts = (val1_ts
  .drop(['value', 'feature', 'date'], axis = 1))
val_features2_ts = (val2_ts
  .drop(['value', 'feature', 'date'], axis = 1))
val_features3_ts = (val3_ts
  .drop(['value', 'feature', 'date'], axis = 1))
val_features4_ts = (val4_ts
  .drop(['value', 'feature', 'date'], axis = 1))

# Convert to numpy array
features1_ts = np.array(features1_ts)
features2_ts = np.array(features2_ts)
features3_ts = np.array(features3_ts)
features4_ts = np.array(features4_ts)

# Convert to numpy array
val_features1_ts = np.array(val_features1_ts)
val_features2_ts = np.array(val_features2_ts)
val_features3_ts = np.array(val_features3_ts)
val_features4_ts = np.array(val_features4_ts)


Compile and train models

In [47]:
imp.reload(sys.modules['settings'])

tf.keras.backend.clear_session()
tf.keras.utils.set_random_seed(settings["super_overfit"]["random_seed"])

# define the early stopping callback
early_stopping_callback = tf.keras.callbacks.EarlyStopping(
  monitor="val_loss", 
  patience=settings["super_overfit"]["patience"], 
  restore_best_weights=True, 
  mode="auto"
)

## LOO 1
model_1 = build_model(
  features1, 
  labels_1, 
  settings["super_overfit"])

model_1 = compile_model(
  model_1, 
  settings['super_overfit'])

# train the model via model.fit
history_1 = model_1.fit(
  features1, 
  labels_1, 
  epochs=settings['super_overfit']["max_epochs"],
  batch_size=settings['super_overfit']["batch_size"],
  shuffle=True,
  validation_data=[val_features1, val_labels_1],
  callbacks=[early_stopping_callback],
  verbose=1,
)

## LOO 2
model_2 = build_model(
  features2,
  labels_2, 
  settings["super_overfit"])
model_2 = compile_model(model_2, settings["super_overfit"])

# train the model via model.fit
history_2 = model_2.fit(
  features2,
  labels_2,
  epochs=settings["super_overfit"]["max_epochs"],
  batch_size=settings["super_overfit"]["batch_size"],
  shuffle=True,
  validation_data=[val_features2, val_labels_2],
  callbacks=[early_stopping_callback],
  verbose=1,
)

## LOO 3

model_3 = build_model(
  features3,
  labels_3,
  settings["super_overfit"])
model_3 = compile_model(model_3, settings["super_overfit"])

# train the model via model.fit
history_3 = model_3.fit(
  features3,
  labels_3,
  epochs=settings["super_overfit"]["max_epochs"],
  batch_size=settings["super_overfit"]["batch_size"],
  shuffle=True,
  validation_data=[val_features3, val_labels_3],
  callbacks=[early_stopping_callback],
  verbose=1,
)

## LOO 4

model_4 = build_model(
  features4,
  labels_4,
  settings["super_overfit"])
model_4 = compile_model(model_4, settings["super_overfit"])

# train the model via model.fit
history_4 = model_4.fit(
  features4,
  labels_4,
  epochs=settings["super_overfit"]["max_epochs"],
  batch_size=settings["super_overfit"]["batch_size"],
  shuffle=True,
  validation_data=[val_features4, val_labels_4],
  callbacks=[early_stopping_callback],
  verbose=1,
)

## LOO 5

model_5 = build_model(
  features5,
  labels_5,
  settings["super_overfit"])
model_5 = compile_model(model_5, settings["super_overfit"])

# train the model via model.fit
history_5 = model_5.fit(
  features5,
  labels_5,
  epochs=settings["super_overfit"]["max_epochs"],
  batch_size=settings["super_overfit"]["batch_size"],
  shuffle=True,
  validation_data=[val_features5, val_labels_5],
  callbacks=[early_stopping_callback],
  verbose=1,
)

## LOO 6

model_6 = build_model(
  features6,
  labels_6,
  settings["super_overfit"])
model_6 = compile_model(model_6, settings["super_overfit"])

# train the model via model.fit
history_6 = model_6.fit(
  features6,
  labels_6,
  epochs=settings["super_overfit"]["max_epochs"],
  batch_size=settings["super_overfit"]["batch_size"],
  shuffle=True,
  validation_data=[val_features6, val_labels_6],
  callbacks=[early_stopping_callback],
  verbose=1,
)


Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 32)]              0         
                                                                 
 dropout (Dropout)           (None, 32)                0         
                                                                 
 dense (Dense)               (None, 30)                990       
                                                                 
 dense_1 (Dense)             (None, 30)                930       
                                                                 
 dense_2 (Dense)             (None, 30)                930       
                                                                 
 dense_3 (Dense)             (None, 30)                930       
                                                                 
 dense_4 (Dense)             (None, 30)                930   

2023-11-09 15:07:05.070804: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




And save the models and training history

In [None]:
dump_dir = "/Users/steeleb/OneDrive - Colostate/NASA-Northern/data/NN_train_val_test/models/super_overfit/"

# save models to pickle
models = [model_1, model_2, model_3, model_4, model_5, model_6]

for model, i in zip(models, range(1,7)):
    save_to_pickle(model, f"{dump_dir}/model_{i}.pkl")

# save history to pickles
histories = [history_1, history_2, history_3, history_4, history_5, history_6]

for history, i in zip(histories, range(1,7)):
    save_to_pickle(history, f"{dump_dir}/history_{i}.pkl")


And then do the same for timeseries train/val sets

In [None]:

tf.keras.backend.clear_session()
tf.keras.utils.set_random_seed(settings["super_overfit"]["random_seed"])

# define the early stopping callback
early_stopping_callback = tf.keras.callbacks.EarlyStopping(
  monitor="val_loss", 
  patience=settings["super_overfit"]["patience"], 
  restore_best_weights=True, 
  mode="auto"
)

## LOO 1
model_1_ts = build_model(
  features1_ts, 
  labels_1_ts, 
  settings["super_overfit"])

model_1_ts = compile_model(
  model_1_ts, 
  settings['super_overfit'])

# train the model via model.fit
history_1_ts = model_1_ts.fit(
  features1_ts, 
  labels_1_ts, 
  epochs=settings['super_overfit']["max_epochs"],
  batch_size=settings['super_overfit']["batch_size"],
  shuffle=True,
  validation_data=[val_features1_ts, val_labels_1_ts],
  callbacks=[early_stopping_callback],
  verbose=1,
)

## LOO 2
model_2_ts = build_model(
  features2_ts,
  labels_2_ts, 
  settings["super_overfit"])
model_2_ts = compile_model(model_2_ts, settings["super_overfit"])

# train the model via model.fit
history_2_ts = model_2_ts.fit(
  features2_ts,
  labels_2_ts,
  epochs=settings["super_overfit"]["max_epochs"],
  batch_size=settings["super_overfit"]["batch_size"],
  shuffle=True,
  validation_data=[val_features2_ts, val_labels_2_ts],
  callbacks=[early_stopping_callback],
  verbose=1,
)

## LOO 3

model_3_ts = build_model(
  features3_ts,
  labels_3_ts,
  settings["super_overfit"])
model_3_ts = compile_model(model_3_ts, settings["super_overfit"])

# train the model via model.fit
history_3_ts = model_3_ts.fit(
  features3_ts,
  labels_3_ts,
  epochs=settings["super_overfit"]["max_epochs"],
  batch_size=settings["super_overfit"]["batch_size"],
  shuffle=True,
  validation_data=[val_features3_ts, val_labels_3_ts],
  callbacks=[early_stopping_callback],
  verbose=1,
)

## LOO 4

model_4_ts = build_model(
  features4_ts,
  labels_4_ts,
  settings["super_overfit"])
model_4_ts = compile_model(model_4_ts, settings["super_overfit"])

# train the model via model.fit
history_4_ts = model_4_ts.fit(
  features4_ts,
  labels_4_ts,
  epochs=settings["super_overfit"]["max_epochs"],
  batch_size=settings["super_overfit"]["batch_size"],
  shuffle=True,
  validation_data=[val_features4_ts, val_labels_4_ts],
  callbacks=[early_stopping_callback],
  verbose=1,
)

And save to pickles

In [None]:
# save models to pickle
ts_models = [model_1_ts, model_2_ts, model_3_ts, model_4_ts]

for model, i in zip(ts_models, range(1,7)):
    save_to_pickle(model, f"{dump_dir}/ts_model_{i}.pkl")

# save history to pickles
ts_histories = [history_1_ts, history_2_ts, history_3_ts, history_4_ts]

for history, i in zip(ts_histories, range(1,7)):
    save_to_pickle(history, f"{dump_dir}/ts_history_{i}.pkl")
