In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
import os
from scipy import signal
import lightgbm as lgb
from sklearn.preprocessing import MinMaxScaler
# import shap

In [2]:
train = pd.read_csv("Dataset/train.csv")
train.head()

Unnamed: 0,No,material,feedrate,clamp_pressure,tool_condition,machining_finalized,passed_visual_inspection
0,1,wax,6,4.0,unworn,yes,yes
1,2,wax,20,4.0,unworn,yes,yes
2,3,wax,6,3.0,unworn,yes,yes
3,4,wax,6,2.5,unworn,no,
4,5,wax,20,3.0,unworn,no,


In [3]:
le1=LabelEncoder()
le1.fit(train['material'])
train['Encoded_material']=le1.transform(train['material'])

le2=LabelEncoder()
le2.fit(train['tool_condition'])
train['Encoded_tool_condition']=le2.transform(train['tool_condition'])

le3=LabelEncoder()
le3.fit(train['machining_finalized'])
train['Encoded_machining_finalized']=le3.transform(train['machining_finalized'])

le4=LabelEncoder()
le4.fit(train['feedrate'])
train['Encoded_feedrate']=le4.transform(train['feedrate'])

le5=LabelEncoder()
le5.fit(train['clamp_pressure'])
train['Encoded_clamp_pressure']=le5.transform(train['clamp_pressure'])

In [4]:
frames = []
for i in range(1,19):
    ex_num = '0' + str(i) if i < 10 else str(i)
    frame = pd.read_csv("Dataset/experiment_{}.csv".format(ex_num))
    ex_result_row = train[train['No'] == i]
    frame.replace({'Machining_Process': {'Starting':'Prep','end':'End'}}, inplace=True)
    le6=LabelEncoder()
    le6.fit(frame['Machining_Process'])
    frame['Machining_Process']=le6.transform(frame['Machining_Process'])
    frame['Feedrate'] = ex_result_row.iloc[0]['Encoded_feedrate']
    frame['Clamp_pressure'] = ex_result_row.iloc[0]['Encoded_clamp_pressure']
    frame['tool_condition'] = ex_result_row.iloc[0]['Encoded_tool_condition']
    frames.append(frame)

In [5]:
frames[0]

Unnamed: 0,X1_ActualPosition,X1_ActualVelocity,X1_ActualAcceleration,X1_CommandPosition,X1_CommandVelocity,X1_CommandAcceleration,X1_CurrentFeedback,X1_DCBusVoltage,X1_OutputCurrent,X1_OutputVoltage,...,S1_OutputVoltage,S1_OutputPower,S1_SystemInertia,M1_CURRENT_PROGRAM_NUMBER,M1_sequence_number,M1_CURRENT_FEEDRATE,Machining_Process,Feedrate,Clamp_pressure,tool_condition
0,198.0,0.000,0.00,198.0,0.0,0.000000,0.18,0.0207,329.0,2.77,...,0.0,6.960000e-07,12.0,1.0,0.0,50.0,7,1,2,0
1,198.0,-10.800,-350.00,198.0,-13.6,-358.000000,-10.90,0.1860,328.0,23.30,...,0.0,-5.270000e-07,12.0,1.0,4.0,50.0,7,1,2,0
2,196.0,-17.800,-6.25,196.0,-17.9,-0.000095,-8.59,0.1400,328.0,30.60,...,0.0,9.100000e-07,12.0,1.0,7.0,50.0,7,1,2,0
3,194.0,-18.000,0.00,194.0,-17.9,-0.000095,-6.11,0.1300,327.0,30.30,...,0.0,1.070000e-06,12.0,1.0,7.0,50.0,7,1,2,0
4,193.0,-17.900,-18.80,192.0,-17.9,0.000095,-5.70,0.1140,328.0,30.50,...,0.0,1.960000e-06,12.0,1.0,7.0,50.0,7,1,2,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1050,141.0,0.175,87.50,141.0,0.0,0.000000,-2.84,0.0522,325.0,7.39,...,118.0,1.790000e-01,12.0,1.0,132.0,50.0,0,1,2,0
1051,141.0,-0.150,-87.50,141.0,0.0,0.000000,-1.85,0.0522,325.0,2.69,...,117.0,1.380000e-01,12.0,1.0,132.0,50.0,0,1,2,0
1052,141.0,-0.175,-87.50,141.0,0.0,0.000000,-2.20,0.0527,325.0,2.22,...,120.0,1.920000e-01,12.0,1.0,132.0,50.0,0,1,2,0
1053,141.0,0.150,87.50,141.0,0.0,0.000000,-4.02,0.0502,326.0,5.80,...,120.0,1.780000e-01,12.0,1.0,132.0,50.0,0,1,2,0


In [6]:
for frame in frames:
    for ax in ['X','Y','Z','S']:
        frame[f'{ax}1_Position_Diff'] = abs(frame[f'{ax}1_CommandPosition']-frame[f'{ax}1_ActualPosition'])
        frame[f'{ax}1_Velocity_Diff'] = abs(frame[f'{ax}1_CommandVelocity']-frame[f'{ax}1_ActualVelocity'])
        frame[f'{ax}1_Acceleration_Diff'] = abs(frame[f'{ax}1_CommandAcceleration']-frame[f'{ax}1_ActualAcceleration'])

In [7]:
for col in ['ActualPosition','ActualVelocity','ActualAcceleration','CurrentFeedback','DCBusVoltage','OutputCurrent','OutputVoltage','OutputPower']:
    dt = 0.1
    for frame in frames:
        for ax in ['X','Y','Z','S']:
            try:
                f = frame.reset_index()[f'{ax}1_{col}']
            except:
                continue
                
            N = len(f)
            t = np.arange(0, N*dt, dt)
            freq = np.linspace(0, 1.0/dt, N)
            F = np.fft.fft(f)
            F_abs = np.abs(F) / (N/2) 
            F_abs[0] = F_abs[0] / 2
            maximal_idx = signal.argrelmax(F_abs, order=1)[0]

            high_amp = np.max(F_abs[maximal_idx]) if len(maximal_idx) > 0 else 0
            high_freq = freq[maximal_idx][np.argmax(F_abs[maximal_idx])] if len(maximal_idx) > 0 else 0

            frame[f'{ax}1_{col}_High_Amp'] = high_amp
            frame[f'{ax}1_{col}_High_Freq'] = high_freq
            frame[f'{ax}1_{col}_High_Amp_Freq'] = high_amp * high_freq

In [8]:
frames[0]

Unnamed: 0,X1_ActualPosition,X1_ActualVelocity,X1_ActualAcceleration,X1_CommandPosition,X1_CommandVelocity,X1_CommandAcceleration,X1_CurrentFeedback,X1_DCBusVoltage,X1_OutputCurrent,X1_OutputVoltage,...,S1_OutputVoltage_High_Amp_Freq,X1_OutputPower_High_Amp,X1_OutputPower_High_Freq,X1_OutputPower_High_Amp_Freq,Y1_OutputPower_High_Amp,Y1_OutputPower_High_Freq,Y1_OutputPower_High_Amp_Freq,S1_OutputPower_High_Amp,S1_OutputPower_High_Freq,S1_OutputPower_High_Amp_Freq
0,198.0,0.000,0.00,198.0,0.0,0.000000,0.18,0.0207,329.0,2.77,...,0.222398,0.000492,0.199241,0.000098,0.000725,9.867173,0.00715,0.011293,3.358634,0.03793
1,198.0,-10.800,-350.00,198.0,-13.6,-358.000000,-10.90,0.1860,328.0,23.30,...,0.222398,0.000492,0.199241,0.000098,0.000725,9.867173,0.00715,0.011293,3.358634,0.03793
2,196.0,-17.800,-6.25,196.0,-17.9,-0.000095,-8.59,0.1400,328.0,30.60,...,0.222398,0.000492,0.199241,0.000098,0.000725,9.867173,0.00715,0.011293,3.358634,0.03793
3,194.0,-18.000,0.00,194.0,-17.9,-0.000095,-6.11,0.1300,327.0,30.30,...,0.222398,0.000492,0.199241,0.000098,0.000725,9.867173,0.00715,0.011293,3.358634,0.03793
4,193.0,-17.900,-18.80,192.0,-17.9,0.000095,-5.70,0.1140,328.0,30.50,...,0.222398,0.000492,0.199241,0.000098,0.000725,9.867173,0.00715,0.011293,3.358634,0.03793
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1050,141.0,0.175,87.50,141.0,0.0,0.000000,-2.84,0.0522,325.0,7.39,...,0.222398,0.000492,0.199241,0.000098,0.000725,9.867173,0.00715,0.011293,3.358634,0.03793
1051,141.0,-0.150,-87.50,141.0,0.0,0.000000,-1.85,0.0522,325.0,2.69,...,0.222398,0.000492,0.199241,0.000098,0.000725,9.867173,0.00715,0.011293,3.358634,0.03793
1052,141.0,-0.175,-87.50,141.0,0.0,0.000000,-2.20,0.0527,325.0,2.22,...,0.222398,0.000492,0.199241,0.000098,0.000725,9.867173,0.00715,0.011293,3.358634,0.03793
1053,141.0,0.150,87.50,141.0,0.0,0.000000,-4.02,0.0502,326.0,5.80,...,0.222398,0.000492,0.199241,0.000098,0.000725,9.867173,0.00715,0.011293,3.358634,0.03793


In [9]:
feature_df = pd.concat(frames, ignore_index = True)
y_series = feature_df['tool_condition']
x_df = feature_df.drop(['tool_condition'], axis=1) 
X_train, X_valid, Y_train, Y_valid = train_test_split(x_df, y_series, test_size=0.2, random_state=0, stratify=y_series)

lgb_train = lgb.Dataset(X_train, Y_train)
lgb_valid = lgb.Dataset(X_valid, Y_valid, reference=lgb_train)

In [10]:
params = {
    'task' : 'train',
    'boosting' : 'gbdt',
    'objective': 'binary',
    'metric': 'l2',
    'num_leaves': 200,
    'feature_fraction': 1.0,
    'bagging_fraction': 1.0,
    'bagging_freq': 0,
    'min_child_samples': 5
}
gbm_tool_wear = lgb.train(params,
            lgb_train,
            num_boost_round=100,
            valid_sets=lgb_valid,
            early_stopping_rounds=100)

[LightGBM] [Info] Number of positive: 10646, number of negative: 9582
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 10479
[LightGBM] [Info] Number of data points in the train set: 20228, number of used features: 138
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.526300 -> initscore=0.105298
[LightGBM] [Info] Start training from score 0.105298
[1]	valid_0's l2: 0.202065
Training until validation scores don't improve for 100 rounds
[2]	valid_0's l2: 0.164086
[3]	valid_0's l2: 0.133432
[4]	valid_0's l2: 0.108622
[5]	valid_0's l2: 0.0885
[6]	valid_0's l2: 0.0721553
[7]	valid_0's l2: 0.0588624
[8]	valid_0's l2: 0.0480409
[9]	valid_0's l2: 0.0392243
[10]	valid_0's l2: 0.0320365
[11]	valid_0's l2: 0.0261733
[12]	valid_0's l2: 0.0213885
[13]	valid_0's l2: 0.0174821
[14]	valid_0's l2: 0.0142918
[15]	valid_0's l2: 0.0116857
[16]	valid_0's l2: 0.00955611
[17]	valid_0's l2: 0.00781562
[18]	valid_0's l2: 0.00639284
[19]	valid_0's l2: 0.00522958
[20]	valid

[80]	valid_0's l2: 2.61066e-08
[81]	valid_0's l2: 2.13742e-08
[82]	valid_0's l2: 1.74997e-08
[83]	valid_0's l2: 1.43275e-08
[84]	valid_0's l2: 1.17304e-08
[85]	valid_0's l2: 9.604e-09
[86]	valid_0's l2: 7.86309e-09
[87]	valid_0's l2: 6.43774e-09
[88]	valid_0's l2: 5.27078e-09
[89]	valid_0's l2: 4.31534e-09
[90]	valid_0's l2: 3.5331e-09
[91]	valid_0's l2: 2.89266e-09
[92]	valid_0's l2: 2.36831e-09
[93]	valid_0's l2: 1.939e-09
[94]	valid_0's l2: 1.58752e-09
[95]	valid_0's l2: 1.29975e-09
[96]	valid_0's l2: 1.06415e-09
[97]	valid_0's l2: 8.7125e-10
[98]	valid_0's l2: 7.13319e-10
[99]	valid_0's l2: 5.84016e-10
[100]	valid_0's l2: 4.78152e-10
Did not meet early stopping. Best iteration is:
[100]	valid_0's l2: 4.78152e-10


In [12]:
pred = lgb.predict(valid)

AttributeError: module 'lightgbm' has no attribute 'predict'

In [41]:
feature_imp = pd.DataFrame()
feature_imp['feature'] = gbm_tool_wear.feature_name()
feature_imp['importance'] = gbm_tool_wear.feature_importance()
feature_imp

Unnamed: 0,feature,importance
0,X1_ActualPosition,244
1,X1_ActualVelocity,177
2,X1_ActualAcceleration,97
3,X1_CommandPosition,4
4,X1_CommandVelocity,16
...,...,...
150,Y1_OutputPower_High_Freq,0
151,Y1_OutputPower_High_Amp_Freq,0
152,S1_OutputPower_High_Amp,0
153,S1_OutputPower_High_Freq,0


In [45]:
mask = feature_imp['importance'] != 0
features = feature_imp.loc[mask]

In [48]:
features = features['feature'].tolist()

In [49]:
len(features)

32

In [50]:
features.append('tool_condition')

In [55]:
for frame in frames:
    frame.drop(columns=[col for col in frame if col not in features], inplace = True)

In [157]:
frames[0].columns

Index(['X1_ActualPosition', 'X1_ActualVelocity', 'X1_ActualAcceleration',
       'X1_CommandPosition', 'X1_CommandVelocity', 'X1_CommandAcceleration',
       'X1_CurrentFeedback', 'X1_DCBusVoltage', 'X1_OutputVoltage',
       'X1_OutputPower', 'Y1_ActualPosition', 'Y1_ActualVelocity',
       'Y1_ActualAcceleration', 'Y1_CommandPosition', 'Y1_CommandVelocity',
       'Y1_CommandAcceleration', 'Y1_CurrentFeedback', 'Y1_DCBusVoltage',
       'Y1_OutputVoltage', 'Y1_OutputPower', 'Z1_ActualPosition',
       'Z1_CommandPosition', 'S1_ActualVelocity', 'S1_ActualAcceleration',
       'S1_CommandVelocity', 'S1_CurrentFeedback', 'S1_DCBusVoltage',
       'S1_OutputPower', 'tool_condition', 'X1_Acceleration_Diff',
       'Y1_Acceleration_Diff', 'Y1_ActualAcceleration_High_Amp',
       'X1_OutputPower_High_Freq'],
      dtype='object')

In [87]:
# frames1 = []
# frames2 = []
# for frame in frames[:15]:
#     frames1.append(frame)
# for frame in frames[15:]:
#     frames2.append(frame)

In [89]:
# feature_df = pd.concat(frames1, ignore_index = True)
# y_train = feature_df['tool_condition']
# X_train = feature_df.drop(['tool_condition'], axis=1) 
# feature_df = pd.concat(frames2, ignore_index = True)
# y_test = feature_df['tool_condition']
# X_test = feature_df.drop(['tool_condition'], axis=1) 

In [61]:
feature_df = pd.concat(frames1, ignore_index = True)
y_series = feature_df['tool_condition']
x_df = feature_df.drop(['tool_condition'], axis=1) 
X_train, X_test, y_train, y_test = train_test_split(x_df, y_series, test_size=0.3, random_state=0, stratify=y_series)

In [62]:
from sklearn.linear_model import  LogisticRegression
from sklearn import svm
from sklearn import tree
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import NearestCentroid
from sklearn.linear_model import SGDClassifier

In [90]:
LR = LogisticRegression(C=1e20)
LR.fit(X_train, y_train)
print(LR.score(X_test,y_test))

0.8105894105894106


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


In [91]:
SVM_tool_condition = svm.SVC()
SVM_tool_condition.fit(X_train, y_train)
print("\n========== Support vector machine score of tool_condition  ==========")
print(SVM_tool_condition.score(X_test,y_test))


0.8579420579420579


In [92]:
mlp_tool_condition = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1, max_iter = 100000)
mlp_tool_condition.fit(X_train, y_train)
print("\n========== MLPClassifier score of tool_condition  ==========")
print(mlp_tool_condition.score(X_test,y_test))


0.4909090909090909


In [93]:
nrc_tool_condition= NearestCentroid()
nrc_tool_condition.fit(X_train, y_train)
print("\n========== NearestCentroid score of tool_condition  ==========")
print(nrc_tool_condition.score(X_test,y_test))


0.45814185814185815


In [94]:
SGD_tool_condition= SGDClassifier(loss="hinge", penalty="l2", max_iter=10000)
SGD_tool_condition.fit(X_train, y_train)
print("\n========== SGDClassifier score of tool_condition  ==========")
print(SGD_tool_condition.score(X_test,y_test))


0.8721278721278721


In [95]:
tree_tool_condition = tree.DecisionTreeClassifier()
tree_tool_condition.fit(X_train, y_train)
print("\n========== DecisionTreeClassifier score of tool_condition  ==========")
print(tree_tool_condition.score(X_test, y_test))


1.0


In [154]:
from sklearn.model_selection import KFold
from statistics import mean

max_scores = []
for i in range(5):
    kf = KFold(n_splits = 3, shuffle = True)
    scores = []

    for i in range(3):
        result = next(kf.split(frames), None)
        frames = np.array(frames)
        train = frames[result[0]]
        test = frames[result[1]]
        feature_df = pd.concat(train, ignore_index = True)
        y_train = feature_df['tool_condition']
        X_train = feature_df.drop(['tool_condition'], axis=1) 
        feature_df1 = pd.concat(test, ignore_index = True)
        y_test = feature_df1['tool_condition']
        X_test = feature_df1.drop(['tool_condition'], axis=1)
        LR = LogisticRegression(C=1e20, max_iter = 1000)
        LR.fit(X_train, y_train)
        scores.append(LR.score(X_test,y_test))

    max_scores.append(mean(scores))
print(max(max_scores))

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


0.789932131926813


In [155]:
max_scores = []
for i in range(5):
    kf = KFold(n_splits = 3, shuffle = True)
    scores = []

    for i in range(3):
        result = next(kf.split(frames), None)
        frames = np.array(frames)
        train = frames[result[0]]
        test = frames[result[1]]
        feature_df = pd.concat(train, ignore_index = True)
        y_train = feature_df['tool_condition']
        X_train = feature_df.drop(['tool_condition'], axis=1) 
        feature_df1 = pd.concat(test, ignore_index = True)
        y_test = feature_df1['tool_condition']
        X_test = feature_df1.drop(['tool_condition'], axis=1)
        SVM_tool_condition = svm.SVC()
        SVM_tool_condition.fit(X_train, y_train)
        scores.append(SVM_tool_condition.score(X_test,y_test))

    max_scores.append(mean(scores))
print(max(max_scores))

0.5981014238754704


In [158]:
max_scores = []
for i in range(5):
    kf = KFold(n_splits = 3, shuffle = True)
    scores = []

    for i in range(3):
        result = next(kf.split(frames), None)
        frames = np.array(frames)
        train = frames[result[0]]
        test = frames[result[1]]
        feature_df = pd.concat(train, ignore_index = True)
        y_train = feature_df['tool_condition']
        X_train = feature_df.drop(['tool_condition'], axis=1) 
        feature_df1 = pd.concat(test, ignore_index = True)
        y_test = feature_df1['tool_condition']
        X_test = feature_df1.drop(['tool_condition'], axis=1)
        mlp_tool_condition = MLPClassifier(solver='adam', alpha=1e-5, hidden_layer_sizes=(100, 100, 2), random_state=1, max_iter = 1000)
        mlp_tool_condition.fit(X_train, y_train)
        scores.append(mlp_tool_condition.score(X_test,y_test))

    max_scores.append(mean(scores))
print(max(max_scores))

0.44308219354512973


In [159]:
max_scores = []
for i in range(5):
    kf = KFold(n_splits = 3, shuffle = True)
    scores = []

    for i in range(3):
        result = next(kf.split(frames), None)
        frames = np.array(frames)
        train = frames[result[0]]
        test = frames[result[1]]
        feature_df = pd.concat(train, ignore_index = True)
        y_train = feature_df['tool_condition']
        X_train = feature_df.drop(['tool_condition'], axis=1) 
        feature_df1 = pd.concat(test, ignore_index = True)
        y_test = feature_df1['tool_condition']
        X_test = feature_df1.drop(['tool_condition'], axis=1)
        SGD_tool_condition= SGDClassifier(loss="hinge", penalty="l2", max_iter=1000)
        SGD_tool_condition.fit(X_train, y_train)
        scores.append(SGD_tool_condition.score(X_test,y_test))

    max_scores.append(mean(scores))
print(max(max_scores))

0.7481837521478371


In [162]:
max_scores = []
for i in range(3):
    kf = KFold(n_splits = 3, shuffle = True)
    scores = []

    for i in range(3):
        result = next(kf.split(frames), None)
        frames = np.array(frames)
        train = frames[result[0]]
        test = frames[result[1]]
        feature_df = pd.concat(train, ignore_index = True)
        y_train = feature_df['tool_condition']
        X_train = feature_df.drop(['tool_condition'], axis=1) 
        feature_df1 = pd.concat(test, ignore_index = True)
        y_test = feature_df1['tool_condition']
        X_test = feature_df1.drop(['tool_condition'], axis=1)
        tree_tool_condition = tree.DecisionTreeClassifier()
        tree_tool_condition.fit(X_train, y_train)
        scores.append(tree_tool_condition.score(X_test,y_test))

    max_scores.append(mean(scores))
print(max(max_scores))

0.9447699100016981


In [165]:
from sklearn.ensemble import RandomForestClassifier

max_scores = []
for i in range(3):
    kf = KFold(n_splits = 3, shuffle = True)
    scores = []

    for i in range(3):
        result = next(kf.split(frames), None)
        frames = np.array(frames)
        train = frames[result[0]]
        test = frames[result[1]]
        feature_df = pd.concat(train, ignore_index = True)
        y_train = feature_df['tool_condition']
        X_train = feature_df.drop(['tool_condition'], axis=1) 
        feature_df1 = pd.concat(test, ignore_index = True)
        y_test = feature_df1['tool_condition']
        X_test = feature_df1.drop(['tool_condition'], axis=1)
        tree_tool_condition = RandomForestClassifier(max_depth=10, random_state=0)
        tree_tool_condition.fit(X_train, y_train)
        scores.append(tree_tool_condition.score(X_test,y_test))

    max_scores.append(mean(scores))
print(max(max_scores))

0.9018377429107776
