This script creates a very overfit model for predicting surface temperature

In [None]:
#high level modules
import os
import sys
import imp
import numpy as np
import pandas as pd
import pickle

# ml/ai modules
import tensorflow as tf
# Let's import some different things we will use to build the neural network
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense, Input, Dropout, Softmax

# import custom modules
this_dir = "/Users/steeleb/Documents/GitHub/ATS-ML-Fall2023/"
imp.load_source("settings",os.path.join(this_dir,"NeuralNetworks/settings.py"))
from settings import settings
imp.load_source("tvt", os.path.join(this_dir, "preprocessing.py"))
from tvt import train1, val1, train2, val2, train3, val3, train4, val4, train5, val5, train6, val6
from tvt import train1_ts, val1_ts, train2_ts, val2_ts, train3_ts, val3_ts, train4_ts, val4_ts
imp.load_source("architecture", os.path.join(this_dir, "NeuralNetworks/architecture.py"))
from architecture import build_model, compile_model
imp.load_source("universals", os.path.join(this_dir, "universal_functions.py"))
from universals import save_to_pickle, get_features_labels


Format training and validation arrays for use in model training

In [None]:
features1, labels_1, val_features1, val_labels_1 = get_features_labels(train1, val1)
features2, labels_2, val_features2, val_labels_2 = get_features_labels(train2, val2)
features3, labels_3, val_features3, val_labels_3 = get_features_labels(train3, val3)
features4, labels_4, val_features4, val_labels_4 = get_features_labels(train4, val4)
features5, labels_5, val_features5, val_labels_5 = get_features_labels(train5, val5)
features6, labels_6, val_features6, val_labels_6 = get_features_labels(train6, val6)

ts_features1, ts_labels_1, ts_val_features1, ts_val_labels_1 = get_features_labels(train1_ts, val1_ts)
ts_features2, ts_labels_2, ts_val_features2, ts_val_labels_2 = get_features_labels(train2_ts, val2_ts)
ts_features3, ts_labels_3, ts_val_features3, ts_val_labels_3 = get_features_labels(train3_ts, val3_ts)
ts_features4, ts_labels_4, ts_val_features4, ts_val_labels_4 = get_features_labels(train4_ts, val4_ts)


Compile and train models

In [None]:
imp.reload(sys.modules['settings'])

tf.keras.backend.clear_session()
tf.keras.utils.set_random_seed(settings["super_overfit"]["random_seed"])

# define the early stopping callback
early_stopping_callback = tf.keras.callbacks.EarlyStopping(
  monitor="val_loss", 
  patience=settings["super_overfit"]["patience"], 
  restore_best_weights=True, 
  mode="auto"
)

## LOO 1
model_1 = build_model(
  features1, 
  labels_1, 
  settings["super_overfit"])

model_1 = compile_model(
  model_1, 
  settings['super_overfit'])

# train the model via model.fit
history_1 = model_1.fit(
  features1, 
  labels_1, 
  epochs=settings['super_overfit']["max_epochs"],
  batch_size=settings['super_overfit']["batch_size"],
  shuffle=True,
  validation_data=[val_features1, val_labels_1],
  callbacks=[early_stopping_callback],
  verbose=1,
)

## LOO 2
model_2 = build_model(
  features2,
  labels_2, 
  settings["super_overfit"])
model_2 = compile_model(model_2, settings["super_overfit"])

# train the model via model.fit
history_2 = model_2.fit(
  features2,
  labels_2,
  epochs=settings["super_overfit"]["max_epochs"],
  batch_size=settings["super_overfit"]["batch_size"],
  shuffle=True,
  validation_data=[val_features2, val_labels_2],
  callbacks=[early_stopping_callback],
  verbose=1,
)

## LOO 3

model_3 = build_model(
  features3,
  labels_3,
  settings["super_overfit"])
model_3 = compile_model(model_3, settings["super_overfit"])

# train the model via model.fit
history_3 = model_3.fit(
  features3,
  labels_3,
  epochs=settings["super_overfit"]["max_epochs"],
  batch_size=settings["super_overfit"]["batch_size"],
  shuffle=True,
  validation_data=[val_features3, val_labels_3],
  callbacks=[early_stopping_callback],
  verbose=1,
)

## LOO 4

model_4 = build_model(
  features4,
  labels_4,
  settings["super_overfit"])
model_4 = compile_model(model_4, settings["super_overfit"])

# train the model via model.fit
history_4 = model_4.fit(
  features4,
  labels_4,
  epochs=settings["super_overfit"]["max_epochs"],
  batch_size=settings["super_overfit"]["batch_size"],
  shuffle=True,
  validation_data=[val_features4, val_labels_4],
  callbacks=[early_stopping_callback],
  verbose=1,
)

## LOO 5

model_5 = build_model(
  features5,
  labels_5,
  settings["super_overfit"])
model_5 = compile_model(model_5, settings["super_overfit"])

# train the model via model.fit
history_5 = model_5.fit(
  features5,
  labels_5,
  epochs=settings["super_overfit"]["max_epochs"],
  batch_size=settings["super_overfit"]["batch_size"],
  shuffle=True,
  validation_data=[val_features5, val_labels_5],
  callbacks=[early_stopping_callback],
  verbose=1,
)

## LOO 6

model_6 = build_model(
  features6,
  labels_6,
  settings["super_overfit"])
model_6 = compile_model(model_6, settings["super_overfit"])

# train the model via model.fit
history_6 = model_6.fit(
  features6,
  labels_6,
  epochs=settings["super_overfit"]["max_epochs"],
  batch_size=settings["super_overfit"]["batch_size"],
  shuffle=True,
  validation_data=[val_features6, val_labels_6],
  callbacks=[early_stopping_callback],
  verbose=1,
)


And save the models and training history

In [None]:
dump_dir = "/Users/steeleb/OneDrive - Colostate/NASA-Northern/data/NN_train_val_test/models/super_overfit/"

# save models to pickle
models = [model_1, model_2, model_3, model_4, model_5, model_6]

for model, i in zip(models, range(1,7)):
    save_to_pickle(model, f"{dump_dir}/model_{i}.pkl")

# save history to pickles
histories = [history_1, history_2, history_3, history_4, history_5, history_6]

for history, i in zip(histories, range(1,7)):
    save_to_pickle(history, f"{dump_dir}/history_{i}.pkl")


And then do the same for timeseries train/val sets

In [None]:
tf.keras.backend.clear_session()
tf.keras.utils.set_random_seed(settings["basic"]["random_seed"])

# define the early stopping callback
early_stopping_callback = tf.keras.callbacks.EarlyStopping(
  monitor="val_loss", 
  patience=settings["basic"]["patience"], 
  restore_best_weights=True, 
  mode="auto"
)

## TS 1
model_1_ts = build_model(
  ts_features1, 
  ts_labels_1, 
  settings["basic"])

model_1_ts = compile_model(
  model_1_ts, 
  settings['basic'])

# train the model via model.fit
history_1_ts = model_1_ts.fit(
  ts_features1, 
  ts_labels_1, 
  epochs=settings['basic']["max_epochs"],
  batch_size=settings['basic']["batch_size"],
  shuffle=True,
  validation_data=[ts_val_features1, ts_val_labels_1],
  callbacks=[early_stopping_callback],
  verbose=1,
)

## TS 2
model_2_ts = build_model(
  ts_features2,
  ts_labels_2, 
  settings["basic"])
model_2_ts = compile_model(model_2_ts, settings["basic"])

# train the model via model.fit
history_2_ts = model_2_ts.fit(
  ts_features2,
  ts_labels_2,
  epochs=settings["basic"]["max_epochs"],
  batch_size=settings["basic"]["batch_size"],
  shuffle=True,
  validation_data=[ts_val_features2, ts_val_labels_2],
  callbacks=[early_stopping_callback],
  verbose=1,
)

## TS 3

model_3_ts = build_model(
  ts_features3,
  ts_labels_3,
  settings["basic"])
model_3_ts = compile_model(model_3_ts, settings["basic"])

# train the model via model.fit
history_3_ts = model_3_ts.fit(
  ts_features3,
  ts_labels_3,
  epochs=settings["basic"]["max_epochs"],
  batch_size=settings["basic"]["batch_size"],
  shuffle=True,
  validation_data=[ts_val_features3, ts_val_labels_3],
  callbacks=[early_stopping_callback],
  verbose=1,
)

## TS 4

model_4_ts = build_model(
  ts_features4,
  ts_labels_4,
  settings["basic"])
model_4_ts = compile_model(model_4_ts, settings["basic"])

# train the model via model.fit
history_4_ts = model_4_ts.fit(
  ts_features4,
  ts_labels_4,
  epochs=settings["basic"]["max_epochs"],
  batch_size=settings["basic"]["batch_size"],
  shuffle=True,
  validation_data=[ts_val_features4, ts_val_labels_4],
  callbacks=[early_stopping_callback],
  verbose=1,
)

And save to pickles

In [None]:
# save models to pickle
ts_models = [model_1_ts, model_2_ts, model_3_ts, model_4_ts]

for model, i in zip(ts_models, range(1,7)):
    save_to_pickle(model, f"{dump_dir}/ts_model_{i}.pkl")

# save history to pickles
ts_histories = [history_1_ts, history_2_ts, history_3_ts, history_4_ts]

for history, i in zip(ts_histories, range(1,7)):
    save_to_pickle(history, f"{dump_dir}/ts_history_{i}.pkl")
