In [1]:
#things to be fixed in the model: clear sequence if gap greater than 2 days
#look forward for one day

import numpy as np
import pandas as pd
from sklearn import preprocessing
from collections import deque
import random
import time

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, CuDNNLSTM, BatchNormalization
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.callbacks import ModelCheckpoint

VALIDATION_HEADS = 12
SEQ_LEN = 10
EPOCHS = 10  
BATCH_SIZE = 10 
NAME = f"{SEQ_LEN}-SEQ-{BATCH_SIZE}-BATCH-{int(time.time())}"



def prepare(df):
	df.drop(['Event','Before PM','After PM','end_date','Nozzle-A','Nozzle-B','Nozzle-C','Nozzle-D','Nozzle-E','Nozzle-F','Nozzle-G','Nozzle-H','Nozzle-I','Nozzle-J','Nozzle-K','Nozzle-L','Nozzle-M','Nozzle-N','Nozzle-O','Nozzle-P','Nozzle-Q','Nozzle-R','Nozzle-S','Nozzle-T','Nozzle-U','Nozzle-V','Nozzle-W','Nozzle-X','Nozzle-Y','Nozzle-Z'],axis=1,inplace=True)
	df.start_dt = pd.to_datetime(df.start_dt)
	df.sort_values(['start_dt'],inplace=True)
	df.iloc[:,4:-1] = preprocessing.RobustScaler().fit_transform(df.iloc[:,4:-1])
	

	return df

def preprocess(df,heads, validation=False):

	sequential_data = []
	prev_days = deque(maxlen=SEQ_LEN)

	for head in heads:
		data = df[df['head_id']==head]
		data.index = data.start_dt
		data.sort_index(axis=1, inplace=True)
		data.drop(['start_dt','end_dt','head_id','module_position'],axis=1,inplace=True) 
		for i in data.values:
			prev_days.append(i[:-1])
			if len(prev_days) == SEQ_LEN:
				sequential_data.append([np.array(prev_days), i[-1]])
		prev_days.clear()

	random.shuffle(sequential_data)
    print(sequential_data.shape)

	if validation==False:
		positives = []
		negatives = []

		for seq, target in sequential_data:  
			if target == 0:  
				negatives.append([seq, target])  
			elif target == 1:  
				positives.append([seq, target])  

		random.shuffle(positives)  
		random.shuffle(negatives)  

		lower = min(len(positives), len(negatives)) 

		positives = positives[:lower]  
		negatives = negatives[:lower]  
	
		sequential_data = positives+negatives
	
		random.shuffle(sequential_data)
        print(sequential_data.shape)
	
	X = []
	y = []

	for seq, target in sequential_data:  
		X.append(seq)  
		y.append(target) 

	return np.array(X), y


df = pd.read_csv("final_model_sep_27.csv")
df = prepare(df)

heads_train = df.head_id.unique()[VALIDATION_HEADS:]
heads_validation = df.head_id.unique()

train_x, train_y = preprocess(df,heads_train)
validation_x, validation_y = preprocess(df,heads_validation, validation=True)



model = Sequential()
model.add(CuDNNLSTM(128, input_shape=(train_x.shape[1:]), return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(CuDNNLSTM(128, return_sequences=True))
model.add(Dropout(0.1))
model.add(BatchNormalization())

model.add(CuDNNLSTM(128))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(2, activation='softmax'))


opt = tf.keras.optimizers.Adam(lr=0.001, decay=1e-6)

# Compile model
model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer=opt,
    metrics=['accuracy']
)

tensorboard = TensorBoard(log_dir="logs/{}".format(NAME))

filepath = "RNN_Final-{epoch:02d}-{val_acc:.3f}"  # unique file name that will include the epoch and the validation acc for that epoch
checkpoint = ModelCheckpoint("models/{}.model".format(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')) # saves only the best ones

# Train model
history = model.fit(
    train_x, train_y,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=(validation_x, validation_y),
    callbacks=[tensorboard, checkpoint],
)

# Score model
score = model.evaluate(validation_x, validation_y, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
# Save model
model.save("models/{}".format(NAME))

heads_test = df.head_id.unique()[2]
test_x, test_y = preprocess(df, heads_test)
model.predict(test_x)
















  from ._conv import register_converters as _register_converters


(3870, 418)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)


Train on 72 samples, validate on 2970 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test loss: 0.6468585929485282
Test accuracy: 0.5734006736013625


ValueError: Error when checking input: expected cu_dnnlstm_input to have 3 dimensions, but got array with shape (0, 1)