In [1]:
import tensorflow as tf
from tensorflow import keras
import keras_tuner as kt

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

from numpy import mean
from numpy import std
from sklearn.datasets import make_regression
from sklearn.model_selection import RepeatedKFold
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,BatchNormalization,Flatten,Conv1D,MaxPooling1D,Dropout
from tensorflow.keras.callbacks import EarlyStopping,ModelCheckpoint
from tensorflow.keras.utils import plot_model
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import zipfile
from numpy import array
from numpy import hstack

In [3]:
blocks = [1030, 2030, 3030]
#blocks = [1030, 2030, 3030, 4030, 5030, 6030, 7030]

dfss = []
for block in blocks:
    print(block)
    dfss.append(pd.read_csv('combined_df_'+str(block)+'.csv'))

1030
2030
3030


In [7]:
df = pd.concat(dfss)

In [8]:
df.shape

(1685106, 71)

In [9]:
def get_dataset():
    X = df[['Leading angle','Side tilt angle','Tool Tip Point X',
           'Tool Tip Point Y', 'Tool Tip Point Z', 'Tool Orientation X',
           'Tool Orientation Y', 'Tool Orientation Z',]].copy(deep=True).to_numpy()
    y = df[['MachineX', 'MachineY', 'MachineZ', 'MachineA', 'MachineC']].copy(deep=True).to_numpy()
    
    return X,y

In [None]:
# split a multivariate sequence into samples
def split_sequences(sequences, n_steps_in):
    X, y = list(), list()
   
    for i in range(len(sequences)):
        # find the end of this pattern
        end_ix = i + n_steps_in
        
        # check if we are beyond the dataset
        if end_ix > len(sequences):
            break
        # gather input and output parts of the pattern
        seq_x, seq_y = sequences[i:end_ix, 0:8], sequences[end_ix-1, 8:14]  # columns to see which are I/P and O/P features
        
        X.append(seq_x)
        y.append(seq_y)
    return array(X), array(y)

# define input sequence
X,y = get_dataset()
print(X.shape,y.shape)
#dataset = np.concatenate((X[0:10000,:],y[0:10000,:]),axis=1)  
dataset = np.concatenate((X,y),axis=1)  
# choose a number of time steps
n_steps_in = 400
# convert into input/output
X, y = split_sequences(dataset, n_steps_in)
print(X.shape,y.shape)

num = 50
size = np.floor(n_steps_in/num)

sub_sampled = np.arange(0,n_steps_in,size, dtype=int)

X_sampled = np.empty((X.shape[0],sub_sampled.shape[0],X.shape[2]))
for i in np.arange(X.shape[0]):
    X_sampled[i] = X[i][sub_sampled][:]

print(X.shape,y.shape)

X_train, X_test, y_train, y_test = train_test_split(X_sampled, y, test_size=0.33, random_state=42)
print(X_train.shape,y_train.shape)

n_ip_features = X_train.shape[2]              
n_op_features = y_train.shape[1]
#print(n_op_features)
# define model

def build_model(hp):
    model = Sequential()
    model.add(Conv1D(filters=hp.Choice('num_filters',values = [600, 1000,1500], default=600,), kernel_size=5, activation='relu', input_shape=(num, n_ip_features)))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Dropout(rate=hp.Float('dropout_1',min_value=0.0,max_value=0.5,default=0.25,step=0.05,)))
    model.add(BatchNormalization())
    model.add(Conv1D(filters=600, kernel_size=10, activation='relu'))
    model.add(Dropout(rate=hp.Float('dropout_1',min_value=0.0,max_value=0.5,default=0.25,step=0.05,)))
    model.add(Flatten())
    #model.add(Dense( units=hp.Int("units", min_value=32, max_value=512, step=32), activation='relu'))
    
    for i in range(hp.Int("num_layers", 1, 3)):
        model.add(Dense(units=hp.Int("units_" + str(i), min_value=20, max_value=500, step=20,default=160), activation=hp.Choice('dense_activation',values=['relu', 'tanh', 'sigmoid'],default='relu')))
        
    model.add(Dropout(rate=hp.Float('dropout_1',min_value=0.0,max_value=0.5,default=0.25,step=0.05,)))
    model.add(Dense(n_op_features))
    model.compile(optimizer= tf.keras.optimizers.Adam(hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='LOG', default=1e-3)), loss=tf.keras.losses.MeanSquaredError() )  
    #model.compile(optimizer='adam', loss='mse')  
    
    return model

early_stopping = EarlyStopping(monitor='val_loss', patience=100, mode='min')
model_checkpoint = ModelCheckpoint('best_model.h5', monitor='val_loss', mode='min', save_best_only=True, verbose=1)

tuner = kt.RandomSearch(build_model,objective='val_loss',max_trials=10,executions_per_trial=1,directory='random_search', project_name='RegCNN')
tuner.search(X_train, y_train, validation_data=(X_test, y_test),callbacks=[early_stopping,model_checkpoint],epochs=2000,batch_size = 10)
#tuner.search(X_train, y_train, validation_data=(X_test, y_test),epochs=2000)
                  
tuner.search_space_summary()
                  
tuner.results_summary()
models = tuner.get_best_models(num_models=1)
best_model = models[0]
loss = best_model.evaluate(X_test, y_test)  

history = best_model.fit(X_train, y_train, epochs=2000,validation_data=(X_test,y_test), verbose=0,callbacks=[early_stopping,model_checkpoint], batch_size = 10)
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='test')

plt.legend()
plt.show()

model.summary()
print(loss)

#plot_model(model)


(1685106, 8) (1685106, 5)
(1684707, 400, 8) (1684707, 5)
(1684707, 400, 8) (1684707, 5)
