In [1]:
# imports
import os
import csv
import math
import random
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import tensorflow.keras as keras
from keras import layers
from sklearn import preprocessing
from sklearn.utils import shuffle
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense, GRU, Embedding
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard, ReduceLROnPlateau
from tensorflow.keras.backend import square, mean

In [24]:
# put everything into a dict

# make list of file names
file_names = ['charlotte_sim_1_cooltoofast_combined_interpolated.csv', 'charlotte_sim_1_lowpeaktemp_combined_interpolated.csv',
              'charlotte_sim_1_lowtemps_combined_interpolated.csv','charlotte_sim_1_successful_combined_interpolated.csv', 
              'charlotte_sim_2_successful_combined_interpolated.csv',
              'charlotte_sim_2_v2_combined_interpolated.csv', 'charlotte_sim_3_combined_interpolated.csv',
              'denver_sim_1_combined_interpolated.csv', 'denver_sim_2_combined_interpolated.csv','denver_sim_3_combined_interpolated.csv',
              'detroit_sim_1_combined_interpolated.csv', 'detroit_sim_2_incomplete_combined_interpolated.csv', 
              'detroit_sim_2_successful_combined_interpolated.csv','detroit_sim_3_combined_interpolated.csv',
              'jacksonville_sim_1_combined_interpolated.csv', 'jacksonville_sim_2_combined_interpolated.csv',
              'jacksonville_sim_3_combined_interpolated.csv', 'lasvegas_sim_1_combined_interpolated.csv',
              'lasvegas_sim_2_combined_interpolated.csv', 'lasvegas_sim_3_combined_interpolated.csv']

# declare the dict 
file_sensor_dict = dict()
file_motor_dict = dict()
file_len = len(file_names)

# loop through file names, open then, convert to list, and add to dict as a numpy array with i as the key
for i in range(file_len):
    with open(file_names[i], "r") as f:
        reader = pd.read_csv(f)
        # exclude heater due to errors and sensor/motor times
        reader.drop(['heater','Sensor Time', 'Motor Time'], inplace = True, axis = 1)
        sensor_data = [list(x[0:16]) for x in reader.values]
        motor_data = [list(x[16::]) for x in reader.values]
        file_sensor_dict[i] = sensor_data
        file_motor_dict[i] = motor_data

# check the conversion
print(file_sensor_dict[0])
print(file_motor_dict[0])


[[53.0, 70.0, 54.5, 69.4, 53.6, 69.8, 53.9, 69.6, 53.5, 70.0, 54.8, 69.6, 54.0, 69.6, 53.4, 70.2], [53.0, 70.0, 54.5, 69.4, 53.6, 69.8, 53.9, 69.6, 53.5, 70.0, 54.8, 69.6, 54.0, 69.6, 53.4, 70.2], [53.0, 70.0, 54.5, 69.4, 53.6, 69.8, 53.9, 69.6, 53.5, 70.0, 54.8, 69.6, 54.0, 69.6, 53.4, 70.2], [53.0, 70.0, 54.5, 69.4, 53.6, 69.8, 53.9, 69.6, 53.5, 70.0, 54.8, 69.6, 54.0, 69.6, 53.4, 70.2], [53.0, 70.2, 54.4, 69.4, 53.6, 69.8, 53.8, 69.6, 53.4, 70.0, 54.8, 69.6, 53.9, 69.6, 53.4, 70.2], [52.6, 70.5, 54.4, 69.6, 53.6, 69.8, 53.8, 69.8, 53.4, 70.0, 54.8, 69.6, 53.9, 69.6, 53.4, 70.2], [52.1, 70.9, 54.5, 69.6, 53.6, 69.8, 53.9, 69.6, 53.5, 70.0, 54.8, 69.6, 53.8, 69.6, 53.3, 70.2], [52.1, 70.9, 54.5, 69.6, 53.7, 70.0, 53.9, 69.6, 53.4, 70.0, 54.8, 69.6, 53.9, 69.6, 53.4, 70.2], [51.8, 71.2, 54.5, 69.6, 53.6, 70.0, 53.9, 69.6, 53.4, 70.0, 54.7, 69.6, 53.8, 69.8, 53.4, 70.2], [51.9, 71.4, 54.5, 69.6, 53.6, 70.0, 53.9, 69.6, 53.5, 70.0, 54.7, 69.6, 53.8, 69.8, 53.5, 70.2], [51.7, 71.6, 54.5, 

In [3]:
# fix random seed so this can be reproduced
seed_val = 7
def reset_random_seeds():
    tf.random.set_seed(seed_val)
    np.random.seed(seed_val)
    random.seed(seed_val)
    
reset_random_seeds()

In [4]:
# setting some specifications for model based on our data
num_features = 36 # i think this is number of variables but im not 100% sure
batch_size = 64 # the number of chunks of data being fed into the ml algorithm at a time (always a power of 2)
time_steps = 10 # the size of each chunk of data being fed into the ml algorithm
shift_steps = 15 # the number of cells to shift the y values so the predictions line up with the current data
train_percent = 0.8 # the percentage of the input files to set aside for training

In [22]:
# ** converting all training data into batches and putting it into a file **

def train_batches(batch_size, time_steps, shift_steps):
    # for each file
    train_set = [[None for x in range(2)] for i in range(8192)]
    ind = 0
    for i in range(file_len - 5):
        # get num rows 
        current_sensor = file_sensor_dict[i]
        current_motor = file_motor_dict[i]
        current_len = len(current_sensor)
        # start at i and i + 15 (j)
        for j in range(0, current_len - shift_steps + 1):
            print(ind)
            current_batch = [None] * time_steps
            for k in range(0,time_steps):
                current_batch[k] = current_input_row = list(np.append(current_sensor[j + k:j + k + 1], current_motor[j + k:j + k + 1]))
            train_set[ind][0] = current_batch
            train_set[ind][1] = current_sensor[j + shift_steps - 1: j + shift_steps]
            #print(current_sensor[j: j + time_steps].append(current_motor[j:j + time_steps]))
            print(train_set[ind][0])
            ind += 1
                
    return train_set

train_set = train_batches(batch_size, time_steps, shift_steps)


with open('train_batches.csv', "w", newline = "") as x:
    writeX = csv.writer(x)
    writeX.writerows(train_set) 

0
[[53.0, 70.0, 54.5, 69.4, 53.6, 69.8, 53.9, 69.6, 53.5, 70.0, 54.8, 69.6, 54.0, 69.6, 53.4, 70.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [53.0, 70.0, 54.5, 69.4, 53.6, 69.8, 53.9, 69.6, 53.5, 70.0, 54.8, 69.6, 54.0, 69.6, 53.4, 70.2, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [53.0, 70.0, 54.5, 69.4, 53.6, 69.8, 53.9, 69.6, 53.5, 70.0, 54.8, 69.6, 54.0, 69.6, 53.4, 70.2, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [53.0, 70.0, 54.5, 69.4, 53.6, 69.8, 53.9, 69.6, 53.5, 70.0, 54.8, 69.6, 54.0, 69.6, 53.4, 70.2, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [53.0, 70.2, 54.4, 69.4, 53.6, 69.8, 53.8, 69.6, 53.4, 70.0, 54.8, 69.6, 53.9, 69.6, 53.4, 70.2, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [52.6, 70.5, 54.4, 69.6, 53.6, 69.8, 53.8, 69.8, 53.4, 70

[[47.3, 77.0, 53.7, 73.6, 55.3, 72.7, 55.7, 72.5, 53.8, 73.0, 52.5, 74.1, 50.6, 74.5, 55.3, 72.9, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [47.4, 77.0, 53.6, 73.6, 55.3, 72.7, 55.7, 72.5, 53.8, 73.0, 52.5, 74.1, 50.6, 74.5, 55.3, 72.9, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [47.5, 77.0, 53.6, 73.6, 55.2, 72.7, 55.7, 72.5, 53.8, 73.0, 52.5, 74.1, 50.5, 74.7, 55.3, 72.9, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [47.5, 77.0, 53.6, 73.6, 55.2, 72.7, 55.7, 72.5, 53.8, 73.0, 52.5, 74.1, 50.5, 74.7, 55.3, 72.9, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [47.5, 77.0, 53.6, 73.6, 55.2, 72.7, 55.7, 72.5, 53.8, 73.0, 52.5, 74.1, 50.5, 74.7, 55.3, 72.9, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [47.5, 77.0, 53.6, 73.6, 55.2, 72.7, 55.7, 72.5, 53.8, 73.0

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



PermissionError: [Errno 13] Permission denied: 'train_batches.csv'

In [25]:
# ** converting all testing data into batches and putting it into a file **

def test_batches(batch_size, time_steps, shift_steps):
    # for each file
    test_set = [[None for x in range(2)] for i in range(8192)]
    ind = 0
    for i in range(file_len - 5, file_len):
        # get num rows 
        current_sensor = file_sensor_dict[i]
        current_motor = file_motor_dict[i]
        current_len = len(current_sensor)
        # start at i and i + 15 (j)
        for j in range(0, current_len - shift_steps + 1):
            print(ind)
            current_batch = [None] * time_steps
            for k in range(0,time_steps):
                current_batch[k] = current_input_row = list(np.append(current_sensor[j + k:j + k + 1], current_motor[j + k:j + k + 1]))
            test_set[ind][0] = current_batch
            test_set[ind][1] = current_sensor[j + shift_steps - 1: j + shift_steps]
            #print(current_sensor[j: j + time_steps].append(current_motor[j:j + time_steps]))
            print(test_set[ind][0])
            ind += 1
                
    return test_set

test_set = test_batches(batch_size, time_steps, shift_steps)


with open('test_batches.csv', "w", newline = "") as x:
    writeX = csv.writer(x)
    writeX.writerows(test_set)

0
[[50.7, 70.7, 53.4, 69.8, 51.1, 71.1, 51.2, 70.9, 53.4, 70.5, 53.0, 70.5, 51.2, 70.7, 51.5, 71.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [50.7, 70.7, 53.4, 69.8, 51.0, 71.1, 51.2, 70.9, 53.3, 70.3, 53.1, 70.5, 51.2, 70.7, 51.4, 71.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [50.7, 70.7, 53.4, 69.8, 51.0, 71.1, 51.2, 70.9, 53.3, 70.3, 53.1, 70.5, 51.2, 70.7, 51.4, 71.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [50.7, 70.7, 53.4, 69.8, 51.0, 71.1, 51.2, 70.9, 53.3, 70.3, 53.1, 70.5, 51.2, 70.7, 51.4, 71.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [50.7, 70.7, 53.4, 69.8, 51.0, 71.1, 51.2, 70.9, 53.3, 70.3, 53.1, 70.5, 51.2, 70.7, 51.4, 71.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [50.7, 70.7, 53.4, 69.8, 51.0, 71.1, 51.2, 70.9, 53.3, 70

602
[[52.1, 71.6, 51.9, 72.1, 52.1, 71.8, 53.0, 71.6, 53.6, 72.5, 52.3, 72.3, 50.4, 72.3, 53.4, 72.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0], [52.1, 71.6, 51.9, 72.1, 52.1, 71.8, 52.5, 71.6, 53.6, 72.3, 52.4, 72.3, 50.5, 72.3, 53.5, 72.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0], [52.2, 71.6, 52.1, 72.1, 52.4, 71.8, 52.3, 71.4, 53.7, 72.3, 52.4, 72.3, 50.5, 72.3, 53.5, 72.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0], [52.0, 71.6, 52.1, 72.1, 52.5, 71.8, 52.4, 71.6, 53.7, 72.3, 52.4, 72.3, 50.5, 72.1, 53.5, 72.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0], [52.0, 71.6, 52.1, 72.1, 52.5, 71.8, 52.4, 71.6, 53.7, 72.3, 52.3, 72.3, 50.5, 72.1, 53.5, 72.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0], [51.9, 71.6, 52.1, 72.1, 52.4, 71.8, 52.4, 71.5, 53.6, 

1008
[[49.1, 77.9, 54.8, 74.3, 56.1, 72.7, 57.1, 72.3, 55.9, 73.2, 54.7, 74.5, 53.0, 74.7, 55.9, 72.7, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [49.1, 77.9, 54.8, 74.3, 56.2, 72.7, 57.1, 72.3, 55.9, 73.2, 54.7, 74.5, 53.0, 74.7, 55.9, 72.7, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [49.0, 77.9, 54.7, 74.3, 56.2, 72.7, 57.1, 72.3, 55.9, 73.4, 54.7, 74.5, 52.9, 74.8, 55.9, 72.9, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [49.0, 77.9, 54.7, 74.3, 56.2, 72.7, 57.1, 72.3, 55.9, 73.4, 54.7, 74.5, 52.9, 74.8, 55.9, 72.9, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [49.0, 77.9, 54.7, 74.3, 56.2, 72.7, 57.1, 72.3, 55.9, 73.2, 54.7, 74.7, 52.9, 74.8, 55.9, 72.9, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [49.1, 78.1, 54.7, 74.3, 56.2, 72.7, 57.2, 72.5, 55.9,

[[49.3, 76.1, 49.7, 76.1, 53.7, 73.8, 55.4, 73.2, 54.4, 74.8, 51.3, 74.8, 48.7, 75.7, 55.5, 73.9, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [49.3, 76.1, 49.7, 76.1, 53.7, 73.8, 55.4, 73.2, 54.4, 74.8, 51.3, 74.8, 48.7, 75.7, 55.5, 73.9, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [49.4, 75.9, 49.2, 76.1, 53.9, 73.8, 55.4, 73.2, 54.6, 74.8, 51.8, 74.8, 49.1, 75.7, 55.3, 73.9, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [49.4, 75.9, 49.2, 76.1, 53.9, 73.8, 55.4, 73.2, 54.6, 74.8, 51.8, 74.8, 49.1, 75.7, 55.3, 73.9, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [49.4, 75.9, 49.2, 76.1, 53.9, 73.8, 55.4, 73.2, 54.6, 74.8, 51.8, 74.8, 49.1, 75.7, 55.3, 73.9, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [49.3, 75.7, 49.2, 76.1, 54.0, 73.8, 55.5, 73.2, 54.5, 74.8

In [None]:
# get random data from different files and batch them into one file
# we have a little over 5,000 data points which gives us approx. 4,000 for training and 1,000 for testing
# 4,000 data points is 26 batches of size 16 (meaning one group of 160 data points)


In [None]:
model = Sequential()






# turn all data into batches
# shuffle all the batches
# split into training and testing