In [1]:
import pandas as pd
import numpy as np

In [2]:
import matplotlib
import matplotlib.pyplot as plt
% matplotlib inline

In [3]:
X = np.load("X.npy")[:150000]
y = np.load("y.npy")[:150000]

In [4]:
X.shape

(150000, 39)

In [5]:
w1, w0 = len(y[y==1]), len(y[y==0])
w1, w0

(10000, 140000)

In [6]:
1-w1/w0

0.9285714285714286

In [7]:
w=y.copy().astype(float)
n=float(1.3)
w[y==1] = (w0*n)/(w0*n+w1)
w[y==0] = w1/(w0*n+w1)

In [8]:
col_names = np.load("col_names.npy")
len(col_names)

39

In [9]:
col_names

array(['POR1_A3ES1_AMP', 'POR1_A3ES1_V2', 'POR1_A3ES1_V3',
       'POR1_A3ES1SCREW_RPM', 'Q11_AD1_RTD2', 'Q11_KB_PT2',
       'Q11_2WA1A_PV', 'Q11_2WA1A_SP', 'Q11_2WA1A_WEI', 'Q11_2WB1_PV',
       'Q11_2WB1_SP', 'Q11_2WB1_WEI', 'Q11_2WS1_PV', 'Q11_2WS1_SP_NEW',
       'Q11_2WS1_WEI', 'Q11_1C_PV', 'Q11_1C_SP', 'Q11_1D_PV', 'Q11_1D_SP',
       'Q11_1H_PV', 'Q11_1H_SP', 'Q11_2C_PV', 'Q11_2C_SP', 'Q11_2D_PV',
       'Q11_2D_SP', 'Q11_3C_PV', 'Q11_3C_SP', 'Q11_3D_PV', 'Q11_3D_SP',
       'Q11_4C_PV', 'Q11_4C_SP', 'Q11_5C_PV', 'Q11_5C_SP', 'Q11_6C_PV',
       'Q11_6C_SP', 'Q11_7C_PV', 'Q11_7C_SP', 'Q11_8C_PV', 'Q11_8C_SP'],
      dtype=object)

# Data Prepross

In [10]:
# fill missing value
for c in range(X.shape[1]):
    if np.isnan(X[:,c]).any():
        if np.isnan(X[:,c]).all():
            X[:,c][np.isnan(X[:,c])] = 0
            #print (c, 0)
        else:
            m = np.median(X[:,c][~np.isnan(X[:,c])])
            X[:,c][np.isnan(X[:,c])] = m
            #print (c, m)
        

In [11]:
steps = 500
X_ = []
for i in range(0,X.shape[0],steps):
    X_.append(X[i:i+steps,:])
X_ = np.array(X_)
X_.shape    

(300, 500, 39)

In [12]:
y_ = y.astype(int)[::steps]
y_.shape

(300,)

In [13]:
from sklearn.model_selection import train_test_split

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X_, y_, test_size=0.33, random_state=42)

# 1-D CNN

In [15]:
from sklearn.metrics import confusion_matrix

In [16]:
import keras

Using TensorFlow backend.


In [17]:
from keras.models import Sequential
from keras.layers import Activation, Conv1D, AvgPool1D, BatchNormalization, LeakyReLU, Flatten, Dense, Dropout
from keras.optimizers import Adam, SGD
from keras import backend as K
#from keras.losses import binary_crossentropy, MSE

In [18]:
model = Sequential()

In [19]:
layers = [Conv1D(8, kernel_size=7),  
          Conv1D(16, kernel_size=3),  
          BatchNormalization(), 
          LeakyReLU(0.2), 
          AvgPool1D(), 
          Flatten(), 
          Dense(1),
          Activation('sigmoid')
         ]

In [20]:
for layer in layers:
    model.add(layer)

In [21]:
def w_loss(alpha=0.95):
    def loss(y_true, y_pred):
        return K.mean((y_true)*(alpha)*(y_pred-y_true)**2+(1-y_true)*(1-alpha)*(y_pred-y_true)**2)
    return loss

In [22]:
def train(opt=Adam, lr=3e-4, w_loss_alpha=0.95, epochs=2):
    model.compile(opt(lr), w_loss(w_loss_alpha))
    model.fit(X_train,y_train,
              epochs=epochs,
              batch_size=int(X_train.shape[0]/10)
             )
    #model.fit(X_,y_,epochs=1,batch_size=2)

    y_pred = model.predict(X_test)
    y_pred[y_pred>0.5]=1
    y_pred[y_pred<=0.5]=0

    cm = confusion_matrix(y_test, y_pred, labels=[0,1])
    tn, fp, fn, tp = cm.ravel()
    print('confusion_matrix: \n', cm)
    print('tn, fp, fn, tp: ', tn, fp, fn, tp)
    print('predict label=0 判斷正確率: ', tn/(fn+tn) )
    print('predict label=1 判斷正確率: ', tp/(tp+fp) )

In [23]:
train(opt=Adam, lr=3e-4, w_loss_alpha=0.95, epochs=2)

Epoch 1/2
Epoch 2/2
confusion_matrix: 
 [[88  0]
 [11  0]]
tn, fp, fn, tp:  88 0 11 0
predict label=0 判斷正確率:  0.8888888888888888
predict label=1 判斷正確率:  nan




In [None]:
train(opt=Adam, lr=3e-5, w_loss_alpha=0.95, epochs=5)

In [None]:
train(opt=Adam, lr=5e-5, w_loss_alpha=0.95, epochs=5)