In [1]:
%%time
# Data science libraries
import scipy.io
from scipy.io import savemat
from scipy import signal
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import random

# Pytorch
import torch
from torch import nn
from torch.nn import functional as F
from torch import Tensor
from torch.utils.data import TensorDataset, DataLoader
from torch import optim
from torch.nn.modules.loss import CrossEntropyLoss

# Others
from IPython.core.debugger import set_trace
from pathlib import Path

from helper import get_df_all, download
from train_helper import get_dataloader, fit, validate 
import nn_model
from data_urls import URLS

Wall time: 5.42 s


In [2]:
working_dir = Path('.')
DATA_PATH = Path("./Data")
save_model_path = working_dir / 'Model'
DE_path = DATA_PATH / 'K001'

### 'force', 'phase_current_1', 'phase_current_2', 'speed', 'temp_2_bearing_module', 'torque','vibration_1'

In [3]:
%%time
# df_all = get_df_all(DE_path, data_cat='vibration_1',segment_length=512, normalize=False)
df_all = get_df_all(DE_path, data_cat='vibration_1', normalize=True)
features = df_all.columns[2:]
target = 'label'

Wall time: 165 ms


In [4]:
df_all.shape

(501, 514)

In [5]:
df_all.sample(5)

Unnamed: 0,label,filename,0,1,2,3,4,5,6,7,...,502,503,504,505,506,507,508,509,510,511
446,0,N09_M07_F10_K001_1,0.233175,0.00053,0.291336,-0.281967,0.116853,-0.423215,0.166705,-0.024396,...,0.058692,-1.644599,0.299645,-2.62503,0.166705,1.072357,-0.182262,-1.420263,-0.223805,0.831404
197,0,N09_M07_F10_K001_1,1.147136,-0.290275,-0.489685,-0.232114,0.756625,-0.024396,-0.107483,0.00053,...,4.179825,0.025457,-0.049322,-0.149027,-1.436881,-0.356745,-0.45645,-0.25704,-0.157336,-0.041013
352,0,N09_M07_F10_K001_1,0.108544,0.548907,-0.024396,-2.384077,-0.032705,2.110949,-0.007778,-0.788799,...,0.241484,0.116853,-0.913431,-0.298584,-0.365054,-0.140718,0.706773,0.141779,0.341189,-0.024396
459,0,N09_M07_F10_K001_1,-3.397743,0.299645,-0.315202,-0.223805,-7.867842,0.058692,-0.331819,0.125161,...,0.083618,0.507363,0.233175,-0.165644,0.19994,0.175014,0.058692,0.033765,-0.016087,-0.25704
135,0,N09_M07_F10_K001_1,0.13347,0.299645,0.274719,0.083618,0.100235,-0.190571,-0.024396,0.083618,...,-0.115792,-0.149027,-0.298584,-0.281967,-0.157336,-0.115792,0.208249,0.366115,0.357806,0.349497


In [6]:
# wkwkwk = df_all['vibration_1'][0]

In [7]:
# len(wkwkwk)

In [8]:
# Fs = 64e3  # sampling frequency
# dt = 1/Fs  # sampling interval
# t = np.arange(0, 4, dt)

In [9]:
# if len(wkwkwk) != len(t):
#     wkwkwk = wkwkwk[:len(t)]

In [10]:
# figure(figsize=(33, 7))
# plt.xlabel('Seconds', fontsize=27)
# plt.ylabel('Amplitude', fontsize=23)
# fig = plt.plot(t, wkwkwk)


In [11]:
#### HYPERPARAMETERS ####
bs = 64
lr = 0.001
wd = 1e-5
betas=(0.99, 0.999)
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
random_seed = 42

# Load Data and Preprocessing

In [12]:
## Split the data into train and validation set
X_train, X_valid, y_train, y_valid = train_test_split(df_all[features], 
                                                      df_all[target], 
                                                      test_size=0.20, random_state=random_seed, shuffle=True
                                                     )

In [13]:
# rand_row_test = random.sample(range(X_valid.shape[0]), 960)
# rand_row_train = random.sample(range(X_train.shape[0]), 500)
# wkwkwk = {'trainData': np.array(X_train.iloc[rand_row_train]), 'testData': np.array(X_valid.iloc[rand_row_test])}
# savemat('alhamdulillah.mat', wkwkwk)

In [14]:
## Create DataLoader of train and validation set
X_train = torch.tensor(X_train.values, dtype=torch.float32)
X_valid = torch.tensor(X_valid.values, dtype=torch.float32)
y_train = torch.tensor(y_train.values, dtype=torch.long)
y_valid = torch.tensor(y_valid.values, dtype=torch.long)

train_ds = TensorDataset(X_train, y_train)
valid_ds = TensorDataset(X_valid, y_valid)
train_dl, valid_dl = get_dataloader(train_ds, valid_ds, bs)

# Training with Adams Optimizer

In [15]:
%%time
# Instantiate model, optimizer and loss function
model = nn_model.CNN_1D_2L(len(features))
model.to(device)
opt = optim.Adam(model.parameters(), lr=lr, betas=betas, weight_decay=wd)
loss_func = CrossEntropyLoss()

Wall time: 3.4 s


In [16]:
# model2 = nn_model.CNN_1D_2L(len(features))
# model2.load_state_dict(torch.load(save_model_path / 'model.pth'))
# model2.eval()
# model2.to(device)
# opt = optim.Adam(model2.parameters(), lr=lr, betas=betas, weight_decay=wd)

In [18]:
%%time
## Train
epochs = 7
model, metrics, (y_true, predictions) = fit(epochs, model, loss_func, opt, train_dl, valid_dl, train_metric=False)

EPOCH 	 Train Loss 	 Val Loss 	 Train Acc 	 Val Acc 	
0 	 0.00000 	 0.00000 	 0.00000 	1.00000 	
1 	 0.00000 	 0.00000 	 0.00000 	1.00000 	
2 	 0.00000 	 0.00000 	 0.00000 	1.00000 	
3 	 0.00000 	 0.00000 	 0.00000 	1.00000 	
4 	 0.00000 	 0.00000 	 0.00000 	1.00000 	
5 	 0.00000 	 0.00000 	 0.00000 	1.00000 	
6 	 0.00000 	 0.00000 	 0.00000 	1.00000 	
Wall time: 2.12 s


In [13]:
%%time
## Train CNN_1D_3L
epochs = 100
model, metrics, (y_true, predictions) = fit(epochs, model, loss_func, opt, train_dl, valid_dl, train_metric=False)

EPOCH 	 Train Loss 	 Val Loss 	 Train Acc 	 Val Acc 	
0 	 0.01948 	 1.24169 	 0.00000 	0.44607 	
1 	 0.01654 	 1.16235 	 0.00000 	0.46397 	
2 	 0.01526 	 1.00918 	 0.00000 	0.51673 	
3 	 0.01459 	 0.96704 	 0.00000 	0.57728 	
4 	 0.01408 	 1.00092 	 0.00000 	0.52062 	
5 	 0.01380 	 0.96123 	 0.00000 	0.55136 	
6 	 0.01355 	 0.91958 	 0.00000 	0.59370 	
7 	 0.01329 	 0.88770 	 0.00000 	0.60825 	
8 	 0.01322 	 0.93226 	 0.00000 	0.58911 	
9 	 0.01287 	 0.89141 	 0.00000 	0.61167 	
10 	 0.01273 	 0.91815 	 0.00000 	0.60062 	
11 	 0.01253 	 0.91020 	 0.00000 	0.59432 	
12 	 0.01253 	 0.91560 	 0.00000 	0.58934 	
13 	 0.01237 	 0.88387 	 0.00000 	0.62934 	
14 	 0.01250 	 0.87770 	 0.00000 	0.62903 	
15 	 0.01221 	 0.88144 	 0.00000 	0.61323 	
16 	 0.01203 	 0.85045 	 0.00000 	0.64311 	
17 	 0.01198 	 0.89114 	 0.00000 	0.60615 	
18 	 0.01189 	 0.87758 	 0.00000 	0.62397 	
19 	 0.01192 	 0.87857 	 0.00000 	0.61658 	
20 	 0.01176 	 0.90462 	 0.00000 	0.59424 	
21 	 0.01178 	 0.83239 	 0.00000

In [13]:
(predictions[0])

1

# Save trained model

In [15]:
torch.save(model.state_dict(), save_model_path / 'model.pth')

In [18]:
%%time
print(validate(model, valid_dl, loss_func))

(0.1940675784208433, 0.9641555999529909, (array([0, 0, 0, ..., 0, 0, 0], dtype=int64), array([0, 0, 0, ..., 0, 0, 0], dtype=int64)))
Wall time: 1.29 s


In [None]:
def validate(model, dl, loss_func):
    total_loss = 0.0
    total_size = 0
    predictions = []
    y_true = []
    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
    for xb, yb in dl: 
        xb, yb = xb.to(device), yb.to(device)
        loss, batch_size, pred = loss_batch(model, loss_func, xb, yb)
        total_loss += loss*batch_size
        total_size += batch_size
        predictions.append(pred)
        y_true.append(yb.cpu().numpy())
    mean_loss = total_loss / total_size
    predictions = np.concatenate(predictions, axis=0)
    y_true = np.concatenate(y_true, axis=0)
    accuracy = np.mean((predictions == y_true))
    return mean_loss, accuracy, (y_true, predictions)

In [21]:
train_dl, valid_dl = get_dataloader(train_ds, valid_ds, bs)

<torch.utils.data.dataloader.DataLoader at 0x19f7fc77ec8>