## Imports

In [8]:
import numpy as np
import tensorflow as tf
import pandas as pd
from tensorflow.python.keras.callbacks import EarlyStopping

# training.py

Contains code from relevant file

In [9]:
# Define the neural network architecture
model = tf.keras.Sequential([
    tf.keras.layers.Reshape((8, 8, 13), input_shape=(832,)),
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', padding='same'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1, activation='linear')
])

# Compile the model
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)  # Try different learning rates
model.compile(optimizer=optimizer, loss='MeanSquaredError', metrics=['MeanAbsoluteError'])

# Data Loader

So we can use big dataset

In [10]:
class DataSequence(tf.keras.utils.Sequence):
    
    def __init__(self, files, batch_size):
        self.files = files
        self.batch_size = batch_size
        file_lengths = [len(np.load(f)) for f in files]
        self.total_length = sum(file_lengths)
        self.cumulative_lengths = np.cumsum([0] + file_lengths)
        self.file_num = 0
        self.file_cache = pd.DataFrame(np.load(self.files[self.file_num])).sample(frac=1)

    def __len__(self):
        return int(np.ceil(self.total_length) / self.batch_size)
    
    def __getitem__(self, idx):
        # Start and end lengths of batch
        global_start = idx * self.batch_size
        global_end = global_start + self.batch_size

        # Get global bounds of current file
        fileLowerBound, fileUpperBound = self.cumulative_lengths[self.file_num], self.cumulative_lengths[self.file_num + 1]

        # check that the global_start and global_end are within the bounds of the current  file

        # If not, then enter this. Here, we fetch the next file.
        if global_start < fileLowerBound or  global_end > fileUpperBound:
            self.file_num  += 1
            self.file_cache = pd.DataFrame(np.load(self.files[self.file_num]), dtype=np.int16).sample(frac=1)

            fileLowerBound, fileUpperBound = self.cumulative_lengths[self.file_num], self.cumulative_lengths[self.file_num + 1]

        local_start = global_start - fileLowerBound
        local_end = local_start + self.batch_size
        data = self.file_cache.iloc[local_start:local_end]

        x = data.iloc[:, :-1].to_numpy().astype(np.int8)
        y = data.iloc[:, -1].to_numpy().astype(np.int16)

        return (x, y)

### Fitting Model

Need to rerun the processedDataset, such that the evaluations are correct.

In [23]:
files = ['../data/createdData/npyFiles/preprocessedChunk' + str(num) +'.npy' for num in np.arange(2)]

# Make the DataSequence object to pass data to model
data = DataSequence(files, batch_size = 1028)

# Get validation data
validation = pd.DataFrame(np.load("../data/createdData/npyFiles/preprocessedChunk12.npy")).sample(frac=1)

X_validate = validation.iloc[:, :-1].to_numpy().astype(np.int8)
y_validate = validation.iloc[:, -1].to_numpy().astype(np.int16)

del(validation)

In [None]:
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

history = model.fit(data, epochs=3, validation_data=(X_validate, y_validate), callbacks=[early_stopping], shuffle = False)

In [None]:
model.save("../saved_models/corrected_12mil_3epoch_64batch_0.0001learnRate")



INFO:tensorflow:Assets written to: ../saved_models/corrected_12mil_3epoch_64batch_0.0001learnRate\assets


INFO:tensorflow:Assets written to: ../saved_models/corrected_12mil_3epoch_64batch_0.0001learnRate\assets


# Continue Training

For overnight training 5/3/23

In [None]:
model = tf.keras.models.load_model("../saved_models/corrected_12mil_3epoch_64batch_0.0001learnRate")

early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

history = model.fit(data, epochs=22, validation_data=(X_validate, y_validate), callbacks=[early_stopping])

model.save("../saved_models/corrected_12mil_25epoch_64batch_0.0001learnRate")

Epoch 1/22
Epoch 2/22
Epoch 3/22
Epoch 4/22
Epoch 5/22
Epoch 6/22
Epoch 7/22
Epoch 8/22
Epoch 9/22
Epoch 10/22
Epoch 11/22
Epoch 12/22
Epoch 13/22
Epoch 14/22
Epoch 15/22
Epoch 16/22
Epoch 17/22
Epoch 18/22
Epoch 19/22
Epoch 20/22
Epoch 21/22
Epoch 22/22




INFO:tensorflow:Assets written to: ../saved_models/corrected_12mil_25epoch_64batch_0.0001learnRate\assets


INFO:tensorflow:Assets written to: ../saved_models/corrected_12mil_25epoch_64batch_0.0001learnRate\assets
