In [1]:
import pandas as pd 
import matplotlib.pyplot as plt
import numpy as np

# load dataset and show the first run
case1run1 = pd.read_csv('./mill_data/case1run1.csv')
case1run2 = pd.read_csv('./mill_data/case1run2.csv')
columns=['case', 'run', 'VB', 'time', 'doc','feed','material','smcAC', 'smcDC', 'vib_table', 'vib_spindle', 'AE_table', 'AE_spindle']
case1run1.columns = columns
case1run2.columns = columns

In [4]:
import pandas as pd
import numpy as np
import dask.dataframe as dd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from sklearn import preprocessing, metrics
from ipywidgets import widgets, interactive
import gc
import joblib
import glob
import os
import warnings
from datetime import datetime, timedelta
from typing import Union
from tqdm.notebook import tqdm_notebook as tqdm
from itertools import cycle
import datetime as dt
from torch.autograd import Variable
import random
from matplotlib.pyplot import figure
import torch
import torch.nn as nn
from copy import deepcopy
from torch.utils.data import Dataset
from sklearn.metrics import mean_squared_error



warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)
from scipy.signal import hilbert, chirp

from resnet_lstm_cbam import cbam_ResNet_lstm

In [9]:
def sliding_windows(data):
    x = []
    for i in range(int((len(data)-512)/100)):
        _x = data[i*100:i*100+512]
        x.append(_x)
    x=np.array(x)
    return x

In [10]:
def labeling(data,lab):
    x = []
    for i in range(int((len(data)-512)/100)):
        _x = data[i*100:i*100+512]
        x.append(lab)

    return np.array(x)

In [12]:
path1 = './mill_data/train/'
path2 = './mill_data/test/'

file_list1 = os.listdir(path1)
file_list2 = os.listdir(path2)

In [15]:
fs = 100000
def hibert_transform(data):
    analytic_signal = hilbert(data)
    amplitude_envelope = np.abs(analytic_signal)
    instantaneous_phase = np.unwrap(np.angle(analytic_signal))
    instantaneous_frequency = (np.diff(instantaneous_phase) /(2.0*np.pi) * fs)
    return amplitude_envelope,instantaneous_phase

In [16]:
def to_dataset(data,label):
    smcAC=np.expand_dims(sliding_windows(data['smcAC'][2000:-2000]),axis=2)
    evelop,ins_freq=hibert_transform(data['smcAC'][2000:-2000])
    evelop_d=np.expand_dims(sliding_windows(evelop),axis=2)
    ins_freq_d=np.expand_dims(sliding_windows(ins_freq),axis=2)

    # smcDC=np.expand_dims(sliding_windows(data['smcDC'][3000:-3000],4),axis=2)
    vib_table=np.expand_dims(sliding_windows(data['vib_table'][2000:-2000]),axis=2)
    vib_spindle=np.expand_dims(sliding_windows(data['vib_spindle'][2000:-2000]),axis=2)
    AE_table=np.expand_dims(sliding_windows(data['AE_table'][2000:-2000]),axis=2)
    AE_spindle=np.expand_dims(sliding_windows(data['AE_spindle'][2000:-2000]),axis=2)
    xdata = np.concatenate((smcAC,vib_table,vib_spindle,AE_table,AE_spindle,evelop_d,ins_freq_d),axis=2)
    ydata = labeling(data[2000:-2000], label)

    return xdata,ydata

In [17]:
def to_casedataset_tr(data_list):
    empty_x=np.zeros(((1, 512, 7)))
    empty_y=np.zeros(((1)))

    for i in data_list:
        pdd=pd.read_csv('./mill_data/train/'+i)
        pdd.columns = columns
        lab=pdd['VB'][0]
        if str(lab)!='nan':
            x_,y_=to_dataset(pdd,lab)
            empty_x=np.concatenate((empty_x,x_),axis=0)
            empty_y=np.concatenate((empty_y,y_),axis=0)
    empty_x=np.transpose(empty_x[1:],(0,2,1))

    return empty_x,empty_y[1:]

In [18]:
def to_casedataset_ts(data_list):
    empty_x=np.zeros(((1, 512, 7)))
    empty_y=np.zeros(((1)))

    for i in data_list:
        pdd=pd.read_csv('./mill_data/test/'+i)
        pdd.columns = columns
        lab=pdd['VB'][0]
        if str(lab)!='nan':
            x_,y_=to_dataset(pdd,lab)
            empty_x=np.concatenate((empty_x,x_),axis=0)
            empty_y=np.concatenate((empty_y,y_),axis=0)
    empty_x=np.transpose(empty_x[1:],(0,2,1))

    return empty_x,empty_y[1:]

In [19]:
case1list=[file for file in file_list1 if file.startswith('case')]
case9list=[file for file in file_list2 if file.startswith('case9')]

In [21]:
c1_x,c1_y=to_casedataset_tr(case1list)

In [22]:
c9_x,c9_y=to_casedataset_ts(case9list)

In [26]:
class SequenceDataset(Dataset):
    def __init__(self, x, y):
        self.x=x
        self.y=y
        
    def __len__(self):
        return self.x.shape[0]

    def __getitem__(self, i): 
        data=self.x[i]
        label=self.y[i]

        return data,label

In [27]:
c1_x = Variable(torch.Tensor(np.array(c1_x)))
c1_y = Variable(torch.Tensor(np.array(c1_y)))
c9_x = Variable(torch.Tensor(np.array(c9_x)))
c9_y = Variable(torch.Tensor(np.array(c9_y)))


print("train shape is:",c1_x.size())
print("train label shape is:",c1_y.size())
print("test shape is:",c9_x.size())
print("test label shape is:",c9_y.size())

train shape is: torch.Size([2068, 7, 512])
train label shape is: torch.Size([2068])
test shape is: torch.Size([396, 7, 512])
test label shape is: torch.Size([396])


In [28]:
c1_dataset=SequenceDataset(c1_x,c1_y)
c9_dataset=SequenceDataset(c9_x,c9_y)

In [29]:
torch.manual_seed(99)

train_loader = DataLoader(c1_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(c9_dataset, batch_size=1, shuffle=False)


In [65]:
learning_rate = 1e-3
# num_hidden_units = 4
epochs = 50
model = cbam_ResNet_lstm(BasicBlock, [2, 2, 2, 2])
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [66]:
from copy import deepcopy
def train_model(data_loader, model, loss_function, optimizer):
    num_batches = len(data_loader)
    total_loss = 0
    model.train()

    for X, y in data_loader:
        output = model(X)
        loss = torch.sqrt(loss_function(output, y))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_loss = total_loss / num_batches
    return avg_loss

def test_model(data_loader, model, loss_function):

    num_batches = len(data_loader)
    total_loss = 0

    model.eval()
    with torch.no_grad():
        for X, y in data_loader:
            output = model(X)
            total_loss += torch.sqrt(loss_function(output, y)).item()

    avg_loss = total_loss / num_batches
    return avg_loss

In [67]:

print("Untrained test")
print(test_model(test_loader, model, loss_function))
best_fitness = -100000


Untrained test
0.21377789586631937


In [68]:
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=10, threshold_mode='abs',min_lr=1e-8, verbose=True)


In [None]:
fig, axs = plt.subplots(1, 2)
for epoch in range(45):
    print("run:", epoch)
    train_loss = train_model(train_loader, model, loss_function, optimizer=optimizer)
    test_loss = test_model(test_loader, model, loss_function)
    print("train_loss:",train_loss)
    print("test_loss:",test_loss)
    axs[0].scatter(epoch, train_loss, color='g')
    axs[1].scatter(epoch, test_loss, color='r')
    fitness = -test_loss
    scheduler.step(test_loss)

    if fitness > best_fitness:
        best_fitness = fitness
        best_model = deepcopy(model)
axs[0].set_yscale('log')
axs[1].set_yscale('log')
plt.show()
plt.cla()
plt.clf()
plt.close()

run: 0
train_loss: 0.1416539456981879
test_loss: 0.20886810981866086
run: 1
train_loss: 0.14061608750086566
test_loss: 0.2086678066036918
run: 2
train_loss: 0.14056312132340212
test_loss: 0.2082288899656498
run: 3
train_loss: 0.14055488705635072
test_loss: 0.2087863405997103
run: 4
train_loss: 0.1403837536390011
test_loss: 0.2083234034403406
run: 5
train_loss: 0.1405871768410389
test_loss: 0.20812203441605423
run: 6
train_loss: 0.14063247052522806
test_loss: 0.20807502486489035
run: 7
train_loss: 0.14006182906719356
test_loss: 0.20844225374737171
run: 8
train_loss: 0.14066043484669466
test_loss: 0.2078573229638013
run: 9
train_loss: 0.1406086374933903
test_loss: 0.20868051277868677
run: 10
train_loss: 0.14062879360639133
test_loss: 0.2086037141506118
run: 11
train_loss: 0.14088000471775347
test_loss: 0.2083182077516209
Epoch 00012: reducing learning rate of group 0 to 5.0000e-04.
run: 12
train_loss: 0.14071000482027346
test_loss: 0.20839732606904676
run: 13
train_loss: 0.14078478721471

In [None]:
def test_model2(data_loader, model, loss_function):

    num_batches = len(data_loader)
    total_loss = 0

    model.eval()
    a=[]
    b=[]
    with torch.no_grad():
        for X, y in data_loader:
            output = model(X)
            total_loss += torch.sqrt(loss_function(output, y)).item()
            a.append(y)
            b.append(output)

    avg_loss = total_loss / num_batches
    return avg_loss,a,b

In [None]:
test_loader = DataLoader(c9_dataset, batch_size=int(len(c9_dataset)/13), shuffle=False)
print('Predictions on test set')

In [None]:
c,v,n=test_model2(test_loader, model, loss_function)

In [None]:
q,w=[],[]
for i in range(len(v)):
    q.append(sum(v[i])/len(v[i]))
    w.append(sum(n[i])/len(n[i]))

In [None]:
plt.plot(q, 'bo-',color='r')
plt.plot(w, 'bo-',color='b')

In [None]:
train_loader = DataLoader(c1_dataset, batch_size=int(len(c1_dataset)/13), shuffle=False)
print('Predictions on train set')

In [None]:
c,v,n=test_model2(train_loader, model, loss_function)
q,w=[],[]
for i in range(len(v)):
    q.append(sum(v[i])/len(v[i]))
    w.append(sum(n[i])/len(n[i]))
    
plt.plot(q, 'bo-',color='r')
plt.plot(w, 'bo-',color='b')