In [2]:
import numpy as np
import pandas as pd
from sklearn import preprocessing
from collections import deque
import random
import time

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, CuDNNLSTM, BatchNormalization
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.callbacks import ModelCheckpoint

FUTURE_LENGTH = 1
VALIDATION_HEADS = 12
SEQ_LEN = 10
EPOCHS = 10 
BATCH_SIZE = 10 
NAME = f"{SEQ_LEN}-SEQ-{BATCH_SIZE}-BATCH-{int(time.time())}"

  from ._conv import register_converters as _register_converters


In [3]:
def prepare(df):
	df.drop(['Event','Before PM','After PM','end_date','Nozzle-A','Nozzle-B','Nozzle-C','Nozzle-D','Nozzle-E','Nozzle-F','Nozzle-G','Nozzle-H','Nozzle-I','Nozzle-J','Nozzle-K','Nozzle-L','Nozzle-M','Nozzle-N','Nozzle-O','Nozzle-P','Nozzle-Q','Nozzle-R','Nozzle-S','Nozzle-T','Nozzle-U','Nozzle-V','Nozzle-W','Nozzle-X','Nozzle-Y','Nozzle-Z'],axis=1,inplace=True)
	df.start_dt = pd.to_datetime(df.start_dt)
	df.sort_values(['start_dt'],inplace=True)
	df.iloc[:,4:-1] = preprocessing.RobustScaler().fit_transform(df.iloc[:,4:-1])
	df['target'] = df.parent_event.shift(-FUTURE_LENGTH)
	df.dropna(inplace=True)
	df.head

	return df

def preprocess(df,heads, validation=False):

	sequential_data = []
	prev_days = deque(maxlen=SEQ_LEN)

	for head in heads:
		data = df[df['head_id']==head]
		data.sort_values(['start_dt'], inplace=True)
		data.drop(['end_dt','head_id','module_position','parent_event'],axis=1,inplace=True) 
		day = data.iloc[0,0].day        
		for i in data.values:
			if((i[0].day<=day+2)|((i[0].day==1)&(day>=30))):
				prev_days.append(i[1:-1])
				if len(prev_days) == SEQ_LEN:
					sequential_data.append([np.array(prev_days), i[-1]])
			else:
				prev_days.clear()
			day = i[0].day
		prev_days.clear()

	random.shuffle(sequential_data)
	print(pd.DataFrame(sequential_data).shape)

	if validation==False:
		positives = []
		negatives = []

		for seq, target in sequential_data:  
			if target == 0:  
				negatives.append([seq, target])  
			elif target == 1:  
				positives.append([seq, target])  

		random.shuffle(positives)  
		random.shuffle(negatives)  

		lower = min(len(positives), len(negatives)) 

		positives = positives[:lower]  
		negatives = negatives[:2*lower]  
	
		sequential_data = positives+negatives
	
		random.shuffle(sequential_data)
		print(pd.DataFrame(sequential_data).shape)
	
	X = []
	y = []

	for seq, target in sequential_data:  
		X.append(seq)  
		y.append(target) 

	return np.array(X), y

In [1]:
df = pd.read_csv("final_model_sep_27.csv")
df = prepare(df)

heads_train = df.head_id.unique()#[VALIDATION_HEADS:]
heads_validation = df.head_id.unique()#[:VALIDATION_HEADS]

train_x, train_y = preprocess(df,heads_train)
validation_x, validation_y = preprocess(df,heads_validation, validation=True)



model = Sequential()
model.add(CuDNNLSTM(128, input_shape=(train_x.shape[1:]), return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(CuDNNLSTM(128, return_sequences=True))
model.add(Dropout(0.1))
model.add(BatchNormalization())

model.add(CuDNNLSTM(128))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(2, activation='softmax'))


opt = tf.keras.optimizers.Adam(lr=0.001, decay=1e-6)

# Compile model
model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer=opt,
    metrics=['accuracy']
)

tensorboard = TensorBoard(log_dir="logs/{}".format(NAME))

filepath = "RNN_Final-{epoch:02d}-{val_acc:.3f}"  # unique file name that will include the epoch and the validation acc for that epoch
checkpoint = ModelCheckpoint("models/{}.model".format(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')) # saves only the best ones

# Train model
history = model.fit(
    train_x, train_y,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=(validation_x, validation_y),
    callbacks=[tensorboard, checkpoint],
)

# Score model
score = model.evaluate(validation_x, validation_y, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
# Save model
model.save("models/{}".format(NAME))


heads = df.head_id.unique()[:1]
test_X, test_Y = preprocess(df, heads, validation=True)
model.predict(test_X[:1])














  from ._conv import register_converters as _register_converters
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)


(2924, 2)
(123, 2)
(2924, 2)
Train on 123 samples, validate on 2924 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test loss: 0.3746243488201528
Test accuracy: 0.8847469220246238
(34, 2)


array([[0.6622421 , 0.33775792]], dtype=float32)

In [26]:
heads_test = df.head_id.unique()[:1]
test_x, test_y = preprocess(df, heads_test, validation=True)
#test_x = test_x[:1]
model.predict(test_x[:1])

(34, 2)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)


array([[0.5527676 , 0.44723243]], dtype=float32)

In [27]:
test_y[:1]

[0.0]

In [31]:
model.evaluate(test_x[:100],test_y[:100])



[0.6073197336757884, 0.8529411764705882]

In [24]:
model.metrics_names

['loss', 'acc']

In [4]:
import pandas as pd
df = pd.read_csv("final_model_sep_27.csv")

In [6]:
df.columns

Index(['module_position', 'head_id', 'start_dt', 'end_dt', 'Event', 'duration',
       '80000705_errorline', '8000070F_errorline', '80000751_errorline',
       '80000F01_errorline',
       ...
       'Nozzle-U', 'Nozzle-V', 'Nozzle-W', 'Nozzle-X', 'Nozzle-Y', 'Nozzle-Z',
       'Before PM', 'After PM', 'end_date', 'parent_event'],
      dtype='object', length=448)

In [20]:
df.loc[:,['target','parent_event']]

Unnamed: 0,target,parent_event
1934,0.0,0
2575,0.0,0
216,0.0,0
2535,0.0,0
2510,0.0,0
2449,0.0,0
2406,0.0,0
2363,0.0,0
2320,0.0,0
258,0.0,0


In [17]:
df['target']

KeyError: 'target'

In [19]:
df

Unnamed: 0,module_position,head_id,start_dt,end_dt,duration,80000705_errorline,8000070F_errorline,80000751_errorline,80000F01_errorline,8000390B_errorline,...,HolderSkip,Pickup count,Parts usage,Reject parts,No pickup,Error parts,Dislodged parts,Rescan count,parent_event,target
1934,D-2__1,HR0A1 001015,2018-08-14 12:26:19,2018-08-15 12:26:19,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.236169,0.236548,0.0,-0.052632,0.250,2.000,0.0,0,0.0
2575,E-1__3,HZ0C1 000879,2018-08-14 12:26:19,2018-08-15 12:26:19,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.243563,0.243399,0.0,0.484211,1.625,0.000,0.0,0,0.0
216,A1_16__6,HR0A1 001013,2018-08-14 12:26:19,2018-08-15 12:26:19,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.266520,0.266668,0.0,0.200000,0.075,-0.125,0.0,0,0.0
2535,E-1__2,HZ0C1 000876,2018-08-14 12:26:19,2018-08-15 12:26:19,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.181548,0.181887,0.0,-0.063158,0.425,1.500,0.0,0,0.0
2510,E-1__1,HE2C2 001985,2018-08-14 12:26:19,2018-08-15 12:26:19,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.401422,0.400552,24.0,1.315789,1.325,0.750,0.0,0,0.0
2449,D-3__5,HS0A4 003208,2018-08-14 12:26:19,2018-08-15 12:26:19,0.0,0.0,0.0,0.0,1.0,3.0,...,0.0,-0.275346,-0.275157,0.0,-0.326316,-0.175,1.500,0.0,0,0.0
2406,D-3__4,HS0A4 003207,2018-08-14 12:26:19,2018-08-15 12:26:19,0.0,0.0,0.0,0.0,0.0,2.0,...,0.0,-0.253024,-0.252890,0.0,-0.252632,-0.200,0.125,0.0,0,0.0
2363,D-3__3,GP0A2 004259,2018-08-14 12:26:19,2018-08-15 12:26:19,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,-0.241045,-0.240824,0.0,-0.326316,-0.175,-0.125,0.0,0,0.0
2320,D-3__2,HL0A1 004308,2018-08-14 12:26:19,2018-08-15 12:26:19,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,-0.223996,-0.223782,0.0,-0.305263,-0.250,0.000,0.0,0,0.0
258,A2_16__1,HR0A1 003616,2018-08-14 12:26:19,2018-08-15 12:26:19,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.176123,0.176188,0.0,0.200000,0.075,2.500,0.0,0,0.0


In [25]:
dff = df[df['head_id']=='HR0A1 001015']

In [26]:
dff.drop(['end_dt','head_id','module_position','parent_event'],axis=1,inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)


In [27]:
dff

Unnamed: 0,start_dt,duration,80000705_errorline,8000070F_errorline,80000751_errorline,80000F01_errorline,8000390B_errorline,80810000_errorline,8000060B_errorline,80002C01_errorline,...,ModuleConfig,HolderSkip,Pickup count,Parts usage,Reject parts,No pickup,Error parts,Dislodged parts,Rescan count,target
1934,2018-08-14 12:26:19,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.236169,0.236548,0.0,-0.052632,0.25,2.0,0.0,0.0
1935,2018-08-15 12:26:19,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,-0.176091,-0.175897,0.0,-0.242105,-0.25,0.75,0.0,0.0
1936,2018-08-16 12:26:19,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,-0.27921,-0.279024,0.0,-0.326316,-0.25,0.0,0.0,0.0
1937,2018-08-17 12:26:19,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.451854,1.453047,0.0,0.242105,0.85,2.625,0.0,0.0
1938,2018-08-18 12:26:19,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.167997,0.168281,0.0,-0.021053,-0.05,1.875,0.0,0.0
1939,2018-08-19 12:26:19,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.170247,0.170597,0.0,-0.084211,0.125,0.375,0.0,0.0
1940,2018-08-20 12:26:19,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.631198,0.632187,0.0,-0.294737,0.05,0.25,0.0,0.0
1941,2018-08-21 12:26:19,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,-0.305288,-0.305267,0.0,-0.189474,-0.1,-0.125,0.0,0.0
1942,2018-08-22 12:26:19,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,-0.336974,-0.336842,0.0,-0.326316,-0.25,-0.125,0.0,0.0
1943,2018-08-23 12:26:19,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.090577,1.089758,0.0,1.884211,0.825,0.5,0.0,0.0


In [34]:
for i in dff.values:
    print(i[0].day)

14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25


In [40]:
dff.iloc[0,0]

Timestamp('2018-08-14 12:26:19')

In [46]:
heads_test = ['HR0A1 001015']#df.head_id.unique()[:1]
test_x, test_y = preprocess(df, heads_test,validation=True)
model.predict(test_x[:1])

(34, 2)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)


array([[0.9958032 , 0.00419679]], dtype=float32)

In [6]:
heads = df.head_id.unique()[:1]
test_X, test_Y = preprocess(df, heads, validation=True)
model.predict(test_X[-1:])

(34, 2)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)


array([[0.412039  , 0.58796096]], dtype=float32)

In [12]:
from tensorflow.keras.models import load_model
MODEL_NAME = 'models/10-SEQ-10-BATCH-1538502212.h5'

df = pd.read_csv("final_model_sep_27.csv")
df = prepare(df)
heads = df.head_id.unique()[:1]
test_X, test_Y = preprocess(df, heads, validation=True)



(34, 2)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)


In [15]:
model = load_model(MODEL_NAME)
model.predict(test_X)

array([[9.9966407e-01, 3.3593507e-04],
       [9.9999404e-01, 5.9257309e-06],
       [9.9957079e-01, 4.2921072e-04],
       [9.9933392e-01, 6.6609005e-04],
       [9.9999392e-01, 6.0940647e-06],
       [9.9990666e-01, 9.3341281e-05],
       [9.9998438e-01, 1.5630783e-05],
       [9.9994063e-01, 5.9314389e-05],
       [9.9921811e-01, 7.8183901e-04],
       [9.9957854e-01, 4.2142480e-04],
       [9.3690765e-01, 6.3092381e-02],
       [9.9911362e-01, 8.8640180e-04],
       [9.9999440e-01, 5.5927590e-06],
       [9.9996102e-01, 3.9016631e-05],
       [9.9935549e-01, 6.4446795e-04],
       [9.9999321e-01, 6.8130448e-06],
       [9.9974424e-01, 2.5570928e-04],
       [9.9995792e-01, 4.2111096e-05],
       [9.9762434e-01, 2.3756225e-03],
       [9.9950576e-01, 4.9426343e-04],
       [9.9999595e-01, 4.0038030e-06],
       [9.9997056e-01, 2.9468045e-05],
       [9.9999368e-01, 6.3120578e-06],
       [9.9901760e-01, 9.8243053e-04],
       [9.9999678e-01, 3.2100911e-06],
       [6.7747939e-01, 3.

In [14]:
model.predict(test_X[-1:])

array([[0.90707403, 0.09292601]], dtype=float32)