In [None]:
#only run this to mount the drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import numpy as np
from sklearn.utils import shuffle

In [None]:
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


In [None]:
import pandas as pd
df = pd.read_csv('/content/drive/MyDrive/datasets/allHousesFilled.csv')
df.head()

Unnamed: 0,HouseNum,TimeSin,TimeCos,DayNumSin,DayNumCos,MonthSin,MonthCos,RealTemp,ApparTemp,Humid,...,Type_END OF TERRACE,Type_FLAT,Type_SEMI-DETACHED,Type_MID-TERRACE,NumRooms,NumOccupants,Total,AlwaysOn,Intermit,HVAC
0,1.0,-0.5,-0.8660254,0.974928,-0.222521,-0.866025,0.5,0.792136,0.392199,0.59,...,0.0,0.0,0.0,0.0,0.4,0.2,5.489313,0.041201,0.001036,0.957763
1,1.0,-0.707107,-0.7071068,0.974928,-0.222521,-0.866025,0.5,0.607125,0.255182,0.65,...,0.0,0.0,0.0,0.0,0.4,0.2,5.301124,0.095364,0.001536,0.9031
2,1.0,-0.866025,-0.5,0.974928,-0.222521,-0.866025,0.5,0.311106,0.011597,0.7,...,0.0,0.0,0.0,0.0,0.4,0.2,5.24037,0.152939,0.001684,0.845377
3,1.0,-0.965926,-0.258819,0.974928,-0.222521,-0.866025,0.5,0.144596,-0.186317,0.66,...,0.0,0.0,0.0,0.0,0.4,0.2,4.759992,0.097266,0.003417,0.899317
4,1.0,-1.0,-1.83697e-16,0.974928,-0.222521,-0.866025,0.5,0.015088,-0.38423,0.62,...,0.0,0.0,0.0,0.0,0.4,0.2,4.791903,0.296886,0.10086,0.602254


In [None]:
# Metadata for the windowing and network
num_device_categories = 3
num_input_parameters = 46
num_recur_hours = 24

In [None]:
# # Only run this to recreate windows
# # This creates a windowing function
# def create_sequential_windows(group, window_size):
#     # Check to see if there is enough data
#     if group.shape[0] < window_size:
#         return None

#     windows = []
#     start_index = 0
#     while start_index + window_size <= group.shape[0]:
#         window = group.iloc[start_index:start_index + window_size].copy()
#         windows.append(window)
#         start_index += 1

#     return windows

# # Apply the windowing function
# sequential_windows = df.groupby('HouseNum').apply(lambda x: create_sequential_windows(x.drop(columns=['HouseNum']), num_recur_hours))
# all_windows = [window for sublist in sequential_windows if sublist is not None for window in sublist]

# # Shuffle the windows
# all_windows = shuffle(all_windows)
# if all_windows:
#     windows_array = np.array([window.values for window in all_windows])

#     # Print the shape for confirmation
#     print("Shape of the array:", windows_array.shape)

#     # Convert to a TensorFlow tensor
#     windows_tensor = tf.convert_to_tensor(windows_array, dtype=tf.float32)

#     # Print the shape of the tensor
#     print("Shape of the tensor:", windows_tensor.shape)
# else:
#     print("No windows were created.")

# # Save the windowed data
# np.save('/content/drive/MyDrive/datasets/CreatedWindows.npy', windows_array)

In [None]:
windows_array = np.load('/content/drive/MyDrive/datasets/CreatedWindows.npy')

In [None]:
checkpoint_path = "/content/drive/MyDrive/datasets/training_1/cp-{epoch:04d}.ckpt"

# Making a callback to save model weights every epoch
cp_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_path,
    verbose=1,
    save_weights_only=True,
    save_freq='epoch')


In [None]:
# Two layer LSTM model
model = Sequential([
    LSTM(100, return_sequences=True, input_shape=(num_recur_hours, (num_input_parameters-num_device_categories))),
    Dropout(0.2),
    LSTM(50),
    Dropout(0.2),
    Dense(num_device_categories, activation='softmax')
])

# Adam optimizer, crossentropy loss
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 24, 100)           57600     
                                                                 
 dropout (Dropout)           (None, 24, 100)           0         
                                                                 
 lstm_1 (LSTM)               (None, 50)                30200     
                                                                 
 dropout_1 (Dropout)         (None, 50)                0         
                                                                 
 dense (Dense)               (None, 3)                 153       
                                                                 
Total params: 87953 (343.57 KB)
Trainable params: 87953 (343.57 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [None]:
from sklearn.model_selection import train_test_split


selected_features_tensor = windows_array[:, -1:, -3:]
selected_features_tensor = np.array([np.squeeze(arr) for arr in selected_features_tensor])
training_features_tensor = windows_array[:, :, :-3]

print("Number of samples in selected_features_tensor:", len(selected_features_tensor))
print("Number of samples in training_features_tensor:", training_features_tensor.shape[0])

X_train, X_test, y_train, y_test = train_test_split(training_features_tensor, selected_features_tensor, test_size=0.2, random_state=42)

print(X_train.shape)
print(y_train.shape)



Number of samples in selected_features_tensor: 247313
Number of samples in training_features_tensor: 247313
(197850, 24, 43)
(197850, 3)


In [31]:
checkpoint_dir =  '/content/drive/MyDrive/datasets/training_1/'
latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir)
if latest_checkpoint:
    # Load both the model and the optimizer state
    model.load_weights(latest_checkpoint)
    print("Model restored from checkpoint: {}".format(latest_checkpoint))
    # Extract the last epoch number from the checkpoint filename if needed
    import re
    last_epoch = int(re.search(r'cp-(\d{4}).ckpt', latest_checkpoint).group(1))
else:
    print("No checkpoint found. Initializing model from scratch.")
    last_epoch = 0

print('Starting from epoch ' + str(last_epoch))


Model restored from checkpoint: /content/drive/MyDrive/datasets/training_1/cp-0014.ckpt
Starting from epoch 14


In [32]:
LSTM = model.fit(X_train, y_train, epochs=40, initial_epoch=last_epoch, batch_size=32, verbose=1, callbacks=[cp_callback], validation_split=0.2)

Epoch 15/40
Epoch 15: saving model to /content/drive/MyDrive/datasets/training_1/cp-0015.ckpt
Epoch 16/40
Epoch 16: saving model to /content/drive/MyDrive/datasets/training_1/cp-0016.ckpt
Epoch 17/40
Epoch 17: saving model to /content/drive/MyDrive/datasets/training_1/cp-0017.ckpt
Epoch 18/40
Epoch 18: saving model to /content/drive/MyDrive/datasets/training_1/cp-0018.ckpt
Epoch 19/40
Epoch 19: saving model to /content/drive/MyDrive/datasets/training_1/cp-0019.ckpt
Epoch 20/40
Epoch 20: saving model to /content/drive/MyDrive/datasets/training_1/cp-0020.ckpt
Epoch 21/40
Epoch 21: saving model to /content/drive/MyDrive/datasets/training_1/cp-0021.ckpt
Epoch 22/40
Epoch 22: saving model to /content/drive/MyDrive/datasets/training_1/cp-0022.ckpt
Epoch 23/40
Epoch 23: saving model to /content/drive/MyDrive/datasets/training_1/cp-0023.ckpt
Epoch 24/40
Epoch 24: saving model to /content/drive/MyDrive/datasets/training_1/cp-0024.ckpt
Epoch 25/40
Epoch 25: saving model to /content/drive/MyDrive

In [33]:
model.save('/content/drive/MyDrive/datasets/training_1/finalModel', save_format='tf')