In [1]:
# imports
import os
import csv
import math
import random
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import tensorflow.keras as keras
from keras import layers
from sklearn import preprocessing
from sklearn.utils import shuffle
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense, GRU, Embedding
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard, ReduceLROnPlateau
from tensorflow.keras.backend import square, mean

In [2]:
# put everything into a dict

# make list of file names
file_names = ['charlotte_sim_1_cooltoofast_combined.csv', 'charlotte_sim_1_lowpeaktemp_combined.csv',
              'charlotte_sim_1_successful_combined.csv', 'charlotte_sim_2_combined.csv',
              'charlotte_sim_2_v2_combined.csv', 'charlotte_sim_3_combined.csv',
              'denver_sim_1_combined.csv', 'detroit_sim_1_combined.csv',
              'detriot_sim_2_incomplete_combined.csv', 'detroit_sim_3_combined.csv',
              'jacksonville_sim_1_combined.csv', 'jacksonville_sim_2_combined.csv',
              'jacksonville_sim_3_combined.csv', 'lasvegas_sim_1_combined.csv',
              'lasvegas_sim_2_combined.csv']

# declare the dict 
file_dict = dict()
file_len = len(file_names)

# loop through file names, open then, convert to list, and add to dict with i as the key
for i in range(file_len):
    with open(file_names[i], "r") as f:
        reader = csv.reader(f)
        data = list(reader)
        file_dict[i] = data

# check the conversion
print(file_dict[0])


[['Sensor Time', 'Motor Time', 'S4_Humidity', 'S4_Temperature', 'S6_Humidity', 'S6_Temperature', 'S12_Humidity', 'S12_Temperature', 'S18_Humidity', 'S18_Temperature', 'S19_Humidity', 'S19_Temperature', 'S24_Humidity', 'S24_Temperature', 'S25_Humidity', 'S25_Temperature', 'S26_Humidity', 'S26_Temperature', 'lamp', 'heater', 'fan', 'ac', 'Bedroom 2 to Bathroom', 'Bedroom 1 to Living Room', 'Living Room to Kitchen', 'Living Room to Bathroom', 'Bedroom 1 to Bathroom', 'Living Room to Outside', 'Bedroom 1 to Outside', 'Bedroom 1 Left Window', 'Living Room Left Window', 'Bedroom 2 Right Window', 'Kitchen Right Window', 'Bedroom 2 Back Window', 'Living Room Front Window', 'Kitchen Front Window', 'Bedroom 1 Back Window'], ['9:53:57', '9:58:07', '53.0', '70.0', '54.5', '69.4', '53.6', '69.8', '53.9', '69.6', '53.5', '70.0', '54.8', '69.6', '54.0', '69.6', '53.4', '70.2', '1', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0'], ['9:54:17', '9:58:07', '53.0'

In [3]:
# fix random seed so this can be reproduced
seed_val = 7
def reset_random_seeds():
    tf.random.set_seed(seed_val)
    np.random.seed(seed_val)
    random.seed(seed_val)
    
reset_random_seeds()

In [4]:
# setting some specifications for model based on our data
num_features = 35 # i think this is number of variables but im not 100% sure
batch_size = 16 # the number of chunks of data being fed into the ml algorithm at a time (always a power of 2)
time_steps = 10 # the size of each chunk of data being fed into the ml algorithm
shift_steps = 15 # the number of cells to shift the y values so the predictions line up with the current data
train_percent = 0.8 # the percentage of the input files to set aside for training


In [17]:
# the way the train and test datasets will look is a list of an array where each cell contains a batch of 16 chunks of 
# 10 data points
# the data in the batches is shuffled and the batches themselves can also be shuffled

# the following function gets one batch from the data set
def get_batch(batch_size, time_steps):
    x_batch = []
    for i in range(batch_size):
        # choose a random file from dict
        rand_file = random.randint(0, file_len - 1)
        file = file_dict[rand_file]
        #print(i)
        #print(rand_file)
        
        # get the number of rows from that file and multiply the len by train_size to get the limits of file
        train_size = int(len(file) * train_percent)
        
        # subtract time_steps from len to get upper bound of starting index
        train_size -= time_steps
        
        # get a random starting index within that limit (start at 1 so we dont get header info)
        idx = random.randint(1, train_size)
        #print(idx)
        # get x train from starting index to start + time_steps 
        x_batch += file[idx:idx + time_steps]
        #print(file[idx:idx + time_steps])
        #print(x_batch[i])
        print()
    return x_batch

x_batch = get_batch(batch_size, time_steps)
with open('batch_test.csv', "w", newline = "") as o:
    write = csv.writer(o)
    write.writerows(list(x_batch))

0
12
123
['10:30:58', '10:33:12', '51.8', '78.6', '59.2', '73.6', '61.4', '72.1', '61.9', '71.8', '59.6', '72.7', '58.0', '73.9', '57.4', '74.1', '59.8', '72.3', '1', '0', '0', '0', '0', '1', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0']

1
13
103
['10:31:22', '10:33:12', '52.0', '78.4', '59.2', '73.6', '61.5', '72.3', '61.9', '71.8', '59.7', '72.7', '57.9', '73.9', '57.2', '74.3', '59.8', '72.3', '1', '0', '0', '0', '0', '1', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0']

2
11
206
['10:31:45', '10:33:12', '51.9', '78.4', '59.1', '73.8', '61.5', '72.3', '61.9', '71.8', '59.6', '72.7', '57.9', '73.9', '57.2', '74.3', '59.8', '72.3', '1', '0', '0', '0', '0', '1', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0']

3
3
52
['10:32:00', '10:33:12', '52.0', '78.4', '59.0', '73.8', '61.5', '72.3', '61.9', '72.0', '59.7', '72.9', '57.9', '73.9', '57.1', '74.3', '59.8', '72.3', '1', '0', '0', '0', '0', '1', '0', '0', '0', '0', '0', '0', '0', '

In [None]:
# get random data from different files and batch them into one file
# we have a little over 5,000 data points which gives us approx. 4,000 for training and 1,000 for testing
# 4,000 data points is 26 batches of size 16 (meaning one group of 160 data points)


In [None]:

dataset = df.val
# make sure all the data is of the same type
dataset = dataset.astype('float32')
# shift the y data
target_names = ['S4_Temperature','S6_Temperature', 'S12_Temperature', 'S18_Temperature', 'S19_Temperature', 'S24_Temperature', 'S25_Temperature', 'S26_Temperature']
df_targets = data[][target_names].shift(-shift_steps)