In [1]:
import os
import joblib
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.python.framework import ops
from tensorflow.contrib import predictor
from sklearn import preprocessing
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder
from keras.utils import np_utils
from sklearn.pipeline import Pipeline
from sklearn.datasets.samples_generator import make_blobs
from sklearn.preprocessing import MinMaxScaler
from numpy import array
from keras.models import model_from_json
import urllib.request as request
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, recall_score
from sklearn.ensemble import RandomForestClassifier

Using TensorFlow backend.


In [3]:

def create_placeholders():
    X = tf.placeholder(shape=(None, 11), dtype=tf.float64, name="X")
    Y = tf.placeholder(shape=(None, 2), dtype=tf.float64, name="Y")
    return X, Y


def initialize_parameters(hidden_nodes):
    W1 = tf.get_variable("W1", [11, hidden_nodes], dtype=tf.float64,
                         initializer=tf.contrib.layers.xavier_initializer(seed=1))
    b1 = tf.get_variable("b1", [hidden_nodes], dtype=tf.float64, initializer=tf.zeros_initializer())

    W2 = tf.get_variable("W2", [hidden_nodes, 2], dtype=tf.float64,
                         initializer=tf.contrib.layers.xavier_initializer(seed=1))
    b2 = tf.get_variable("b2", [2], dtype=tf.float64, initializer=tf.zeros_initializer())

    parameters = {"W1": W1,
                  "b1": b1,
                  "W2": W2,
                  "b2": b2
                  }
    return parameters


def forward_propagation(X, parameters):
    # Retrieve the parameters from the dictionary "parameters"
    W1 = parameters['W1']
    b1 = parameters['b1']
    W2 = parameters['W2']
    b2 = parameters['b2']
    # Build computational graph
    Z1 = tf.add(tf.matmul(X, W1), b1)
    A1 = tf.nn.relu(Z1)
    Z2 = tf.add(tf.matmul(A1, W2), b2)

    y_hat = tf.sigmoid(Z2)
    return y_hat


def compute_cost(y_hat, Y):
    #     logits = tf.transpose(Z1)
    #     labels = tf.transpose(Y)
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=y_hat, labels=Y))
    return cost

def train_model(data_file):
    csv_file = data_file
    feature_names = ["GCS", "GSS", "Permeability", "Moisture", "C", "Mn", "Si", "S", "P", "Cr", "Metal-Temp"]
    classes = ["Scab", "Crack", "Blowhole", "Air-lock", "Misrun", "Defect"]
    # classes = ["Scab", "Crack", "Defect"]

    train = pd.read_csv(csv_file)
    Xtrain = train[feature_names].copy()
    # normalize input feature using MinMax
    min_max_scaler = preprocessing.MinMaxScaler()
    x_scaled = min_max_scaler.fit_transform(Xtrain)
    Xtrain_nor = pd.DataFrame(x_scaled)
    # save scaling parameters into disk
    joblib.dump(min_max_scaler, 'model/scaler.pkl')

    print("====start training>>>>>")
    for itor in classes:
        #start training model
       
        if itor == "Defect":
            Ytrain = pd.get_dummies(train[itor])
            model_nn(Xtrain_nor, Ytrain, label=itor)
        else:
            Ytrain = train[itor]
            model_randomforest(Xtrain, Ytrain, itor)

def model_randomforest(X_train, Y_train, label):
    rfc = RandomForestClassifier(n_estimators=10).fit(X_train, Y_train)
    model_file = "model/"+ label
    joblib.dump(rfc, model_file)

def model_nn(X_train, Y_train, label="Defect", hidden_nodes=50, learning_rate=0.05,
          momentum=0.09, num_iters=50000, show_log=True):
    tf.reset_default_graph()
    tf.set_random_seed(1)
    X, Y = create_placeholders()

    W1 = tf.get_variable("W1", [11, hidden_nodes], dtype=tf.float64,
                         initializer=tf.contrib.layers.xavier_initializer(seed=1))
    b1 = tf.get_variable("b1", [hidden_nodes], dtype=tf.float64, initializer=tf.zeros_initializer())

    W2 = tf.get_variable("W2", [hidden_nodes, 2], dtype=tf.float64,
                         initializer=tf.contrib.layers.xavier_initializer(seed=1))
    b2 = tf.get_variable("b2", [2], dtype=tf.float64, initializer=tf.zeros_initializer())
    parameters = {"W1": W1,
                  "b1": b1,
                  "W2": W2,
                  "b2": b2
                  }

    Y_hat = forward_propagation(X, parameters)
    outputs = {'output': Y_hat}

    cost = compute_cost(Y_hat, Y)
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cost)

    correct_pred = tf.equal(tf.round(Y_hat), Y)
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
    init = tf.global_variables_initializer()
    sess = tf.Session()
    sess.run(init)

    display_step = 500
    saver = tf.train.Saver()
    # Go through num_iters iterations
    for i in range(num_iters):

        _, loss, acc =  sess.run([optimizer, cost, accuracy], feed_dict={X: X_train.values, Y: Y_train.values})
        if i % display_step == 0 or i == 1:
            print("Step " + str(i) + " _loss = {:.4f}".format(loss)
                  + ", Training Accuracy= " + \
                  "{:.3f}".format(acc))

        theta1 = sess.run(W1)
        bias_1 = sess.run(b1)
        theta2 = sess.run(W2)
        bias_2 = sess.run(b2)
    export_dir = "model/" + label
    save_path = saver.save(sess, export_dir)
#     print("Model saved in path: %s" % save_path)
    sess.close()
    return True

def Predict(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10, arg11):
    X_predict = array([[arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10, arg11]])
    # load scaler to normalize data
    min_max_scaler = joblib.load('model/scaler.pkl')
    X_predict_nor = min_max_scaler.transform(X_predict)

    classes = ["Scab", "Crack", "Blowhole", "Air-lock", "Misrun", "Defect"]
    # classes = ["Scab", "Crack", "Defect"]
    pre_arr = []
    for itor in classes:
        if itor == "Defect":
            ret = predict_nn(X_predict_nor, itor)
            pre_arr.append(ret)
        else:
            ret = predict_randomforest(X_predict, itor)
            pre_arr.append(ret[0])
            

    return pre_arr

def predict_randomforest(X_predict, label):
    model_file_path = "model/" + label
    loaded_model = joblib.load(model_file_path)
    pred_val = loaded_model.predict(X_predict)
    return pred_val
    
def predict_nn(X_predict, label, hidden_nodes=50):
    tf.reset_default_graph()
    W1 = tf.get_variable("W1", [11, hidden_nodes], dtype=tf.float64)
    b1 = tf.get_variable("b1", [hidden_nodes], dtype=tf.float64)

    W2 = tf.get_variable("W2", [hidden_nodes, 2], dtype=tf.float64)
    b2 = tf.get_variable("b2", [2], dtype=tf.float64)

    X = tf.placeholder(shape=(None, 11), dtype=tf.float64)

    saver = tf.train.Saver()
    with tf.Session() as sess:
        # Restore variables from disk.
        model_path = "model/"+ label
        saver.restore(sess, model_path)
        parameters = {"W1": W1,
                      "b1": b1,
                      "W2": W2,
                      "b2": b2
                      }

        y_pred = forward_propagation(X, parameters)
        prediction = sess.run(y_pred, feed_dict={X: X_predict})

        if (prediction[0][0] > prediction[0][1]):
            return 0
        else:
            return 1


def main():
    if not os.path.exists('model'):
        os.makedirs('model')
    train_model("input_overall.csv")
    predict = Predict(348880, 130340,	170,	4.5,	0.29,	0.92,	0.42,	0.041,	0.038,	0.017,	1610)

    print(predict)
    
if __name__ == '__main__':
    main()

  return self.partial_fit(X, y)


====start training>>>>>
Step 0 _loss = 0.6863, Training Accuracy= 0.512
Step 1 _loss = 0.6860, Training Accuracy= 0.512
Step 500 _loss = 0.6415, Training Accuracy= 0.649
Step 1000 _loss = 0.6035, Training Accuracy= 0.732
Step 1500 _loss = 0.5803, Training Accuracy= 0.768
Step 2000 _loss = 0.5670, Training Accuracy= 0.786
Step 2500 _loss = 0.5575, Training Accuracy= 0.786
Step 3000 _loss = 0.5487, Training Accuracy= 0.792
Step 3500 _loss = 0.5398, Training Accuracy= 0.786
Step 4000 _loss = 0.5303, Training Accuracy= 0.792
Step 4500 _loss = 0.5214, Training Accuracy= 0.804
Step 5000 _loss = 0.5144, Training Accuracy= 0.810
Step 5500 _loss = 0.5089, Training Accuracy= 0.815
Step 6000 _loss = 0.5043, Training Accuracy= 0.839
Step 6500 _loss = 0.5002, Training Accuracy= 0.845
Step 7000 _loss = 0.4966, Training Accuracy= 0.845
Step 7500 _loss = 0.4935, Training Accuracy= 0.851
Step 8000 _loss = 0.4906, Training Accuracy= 0.851
Step 8500 _loss = 0.4881, Training Accuracy= 0.851
Step 9000 _los

'10'