In [46]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
	n_vars = 1 if type(data) is list else data.shape[1]
	df = pd.DataFrame(data)
	cols, names = list(), list()
	# input sequence (t-n, ... t-1)
	for i in range(n_in, 0, -1):
		cols.append(df.shift(i))
		names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
	# forecast sequence (t, t+1, ... t+n)
	for i in range(0, n_out):
		cols.append(df.shift(-i))
		if i == 0:
			names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
		else:
			names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
	# put it all together
	agg = pd.concat(cols, axis=1)
	agg.columns = names
	# drop rows with NaN values
	if dropnan:
		agg.dropna(inplace=True)
	return agg

# features = ['SWTP Total Influent Flow', 'SWTP Plant 1 Influent Flow', 'SWTP Plant 2 Influent Flow',
#             'Wilsons Gauge Height (ft)', 'James Gauge Height (ft)', 
#             'Fire 120 Hour Rainfall Aggregate', 'Bingham 120 Hour Rainfall Aggregate', 'Field 120 Hour Rainfall Aggregate', 
#             'Springfield Plateau Aquifer Depth to Water Level (ft)', 'Ozark Aquifer Depth to Water Level (ft)']
features = ['SWTP Total Influent Flow', 'Fire Rainfall (in)', 'Bingham Rainfall (in)', 'SWTP Plant 1 Gravity Flow']

df = pd.read_csv("Train and Test Data.csv", usecols=features)
data = df.values
n_hours_out = 24
transformedDf = series_to_supervised(data, n_hours_out, 1)

# columns we want
cols = np.array(transformedDf.columns)
# desiredCols = cols[:10*24]	# all previous t-24 to t-1 data to predict t
desiredCols = cols[:10]		# t-24 to predict t	
desiredCols = np.append(desiredCols, cols[-10])
removeCols = np.array([x for x in cols if x not in desiredCols])
transformedDf.drop(columns=removeCols, inplace=True)

scaler = MinMaxScaler()
data = transformedDf.values
data = scaler.fit_transform(data)
bias = np.ones((data.shape[0], 1))		# adding in a bias
data = np.hstack((bias, data))

y = np.array([[x] for x in data[:, -1]])
X = np.delete(data, -1, axis=1)

In [50]:
numTrain = int(y.shape[0] * 0.75)
X_train, X_test = X[:numTrain], X[numTrain:]
y_train, y_test = y[:numTrain], y[numTrain:]
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(26607, 11)
(26607, 1)
(8870, 11)
(8870, 1)
