In [1]:
import numpy as np 
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings("ignore")

Matplotlib created a temporary config/cache directory at /tmp/matplotlib-o366g3ni because the default path (/home/mosaic-ai/.config/matplotlib) is not a writable directory; it is highly recommended to set the MPLCONFIGDIR environment variable to a writable directory, in particular to speed up the import of Matplotlib and to better support multiprocessing.


In [2]:
pd.set_option("display.max_columns",None)
pd.set_option("display.max_rows",None)

In [3]:
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix,plot_confusion_matrix, f1_score,roc_auc_score, roc_curve, recall_score
from sklearn import model_selection

from sklearn.preprocessing import MinMaxScaler,StandardScaler,RobustScaler

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB

acc_log=[]

In [4]:
df=pd.read_csv("/data/predictive_maintenance_upsample.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Type_L,Type_M,Failure Type,Split
0,0,295.5,305.9,1542.0,36.2,12.0,1.0,0.0,1,train
1,1,304.0,313.2,1416.0,46.0,128.0,1.0,0.0,1,train
2,2,300.5,311.8,1591.0,42.0,182.0,0.0,1.0,1,train
3,3,302.5,312.0,1487.0,46.1,175.0,0.0,1.0,1,train
4,4,298.0,308.1,1403.0,44.8,148.0,0.0,1.0,1,train


In [5]:
df['Failure Type'].value_counts()

1    9627
3    6796
0    6794
2    6792
5    6772
4    6765
Name: Failure Type, dtype: int64

In [6]:
df.drop(columns="Unnamed: 0", axis=1, inplace=True)

In [7]:
df.head()

Unnamed: 0,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Type_L,Type_M,Failure Type,Split
0,295.5,305.9,1542.0,36.2,12.0,1.0,0.0,1,train
1,304.0,313.2,1416.0,46.0,128.0,1.0,0.0,1,train
2,300.5,311.8,1591.0,42.0,182.0,0.0,1.0,1,train
3,302.5,312.0,1487.0,46.1,175.0,0.0,1.0,1,train
4,298.0,308.1,1403.0,44.8,148.0,0.0,1.0,1,train


In [None]:
df_train = df[df['Split']=="train"]
df_test = df[df['Split']=="test"]

In [None]:
df_train.shape, df_test.shape

In [None]:
feature_col = ['Air temperature [K]', 'Process temperature [K]',
       'Rotational speed [rpm]', 'Torque [Nm]', 'Tool wear [min]', 'Type_L',
       'Type_M']

target_col = "Failure Type"

In [None]:
X_train = df_train[['Air temperature [K]', 'Process temperature [K]',
       'Rotational speed [rpm]', 'Torque [Nm]', 'Tool wear [min]', 'Type_L',
       'Type_M']]

X_test = df_test[['Air temperature [K]', 'Process temperature [K]',
       'Rotational speed [rpm]', 'Torque [Nm]', 'Tool wear [min]', 'Type_L',
       'Type_M']]

In [None]:
y_train = df_train['Failure Type']
y_test = df_test['Failure Type']

In [None]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

In [None]:
ro_scaler=RobustScaler()
X_train_scaled = ro_scaler.fit_transform(X_train)
X_test_scaled = ro_scaler.transform(X_test)

In [None]:
X_train_scaled.shape

# Gaussian Naive Bayes

In [None]:
gnb=GaussianNB()
gnb.fit(X_train_scaled,y_train)
y_pred_gnb=gnb.predict(X_test_scaled)

gnb_train = round(gnb.score(X_train_scaled, y_train) * 100, 2)
gnb_accuracy = round(accuracy_score(y_pred_gnb, y_test) * 100, 2)
gnb_f1score = round(f1_score(y_pred_gnb, y_test, average="weighted")*100, 2)

print("Training Accuracy    :",gnb_train ,"%")
print("Model Accuracy Score :",gnb_accuracy ,"%")
print("F1 Score             :",gnb_f1score, "%")
print("\033[1m--------------------------------------------------------\033[0m")
print("Classification_Report: \n",classification_report(y_test,y_pred_gnb))
print("\033[1m--------------------------------------------------------\033[0m")
plot_confusion_matrix(gnb, X_test_scaled, y_test);

# Support Vector Machine

In [None]:
svc=SVC(kernel="linear")
svc.fit(X_train_scaled,y_train)
y_pred_svc=svc.predict(X_test_scaled)

svc_train = round(svc.score(X_train_scaled, y_train) * 100, 2)
svc_accuracy = round(accuracy_score(y_pred_svc, y_test) * 100, 2)
svc_f1score = round(f1_score(y_pred_svc, y_test, average="weighted")*100, 2)

print("Training Accuracy    :",svc_train ,"%")
print("Model Accuracy Score :",svc_accuracy ,"%")
print("F1 Score             :",svc_f1score, "%")
print("\033[1m--------------------------------------------------------\033[0m")
print("Classification_Report: \n",classification_report(y_test,y_pred_svc))
print("\033[1m--------------------------------------------------------\033[0m")
plot_confusion_matrix(svc, X_test_scaled, y_test);

In [None]:
svc_p=SVC(kernel="poly")
svc_p.fit(X_train_scaled,y_train)
y_pred_svc_p=svc_p.predict(X_test_scaled)

svc_p_train = round(svc_p.score(X_train_scaled, y_train) * 100, 2)
svc_p_accuracy = round(accuracy_score(y_pred_svc_p, y_test) * 100, 2)
svc_p_f1score = round(f1_score(y_pred_svc_p, y_test, average="weighted")*100, 2)

print("Training Accuracy    :",svc_p_train ,"%")
print("Model Accuracy Score :",svc_p_accuracy ,"%")
print("F1 Score             :",svc_p_f1score, "%")
print("\033[1m--------------------------------------------------------\033[0m")
print("Classification_Report: \n",classification_report(y_test,y_pred_svc_p))
print("\033[1m--------------------------------------------------------\033[0m")
plot_confusion_matrix(svc_p, X_test_scaled, y_test);

# Logistics Regression

In [None]:
log_reg=LogisticRegression()
log_reg.fit(X_train_scaled,y_train)
y_pred_log=log_reg.predict(X_test_scaled)

log_train = round(log_reg.score(X_train_scaled, y_train) * 100, 2)
log_accuracy = round(accuracy_score(y_pred_log, y_test) * 100, 2)
log_f1score = round(f1_score(y_pred_log, y_test, average="weighted")*100, 2)

print("Training Accuracy    :",log_train ,"%")
print("Model Accuracy Score :",log_accuracy ,"%")
print("F1 Score             :",log_f1score, "%")
print("\033[1m--------------------------------------------------------\033[0m")
print("Classification_Report: \n",classification_report(y_test,y_pred_log))
print("\033[1m--------------------------------------------------------\033[0m")
plot_confusion_matrix(log_reg, X_test_scaled, y_test);

# Decision Tree

In [None]:
decision = DecisionTreeClassifier()
decision.fit(X_train_scaled, y_train)
y_pred_dec = decision.predict(X_test_scaled)

decision_train = round(decision.score(X_train_scaled, y_train) * 100, 2)
decision_accuracy = round(accuracy_score(y_pred_dec, y_test) * 100, 2)
decision_f1score = round(f1_score(y_pred_dec, y_test,average="weighted")*100, 2)

print("Training Accuracy    :",decision_train ,"%")
print("Model Accuracy Score :",decision_accuracy ,"%")
print("F1 Score             :",decision_f1score, "%")
print("\033[1m--------------------------------------------------------\033[0m")
print("Classification_Report: \n",classification_report(y_test,y_pred_dec))
print("\033[1m--------------------------------------------------------\033[0m")
plot_confusion_matrix(decision, X_test_scaled, y_test);
plt.title('Confusion Matrix');

# Random Forest Classifier

In [None]:
# Random Forest
random_forest = RandomForestClassifier(n_estimators=100)
random_forest.fit(X_train_scaled, y_train)
y_pred_rf = random_forest.predict(X_test_scaled)
random_forest.score(X_train_scaled, y_train)

random_forest_train = round(random_forest.score(X_train_scaled, y_train) * 100, 2)
random_forest_accuracy = round(accuracy_score(y_pred_rf, y_test) * 100, 2)
random_forest_f1score = round(f1_score(y_pred_rf, y_test,average="weighted")*100, 2)

print("Training Accuracy    :",random_forest_train ,"%")
print("Model Accuracy Score :",random_forest_accuracy ,"%")
print("F1 Score             :",random_forest_f1score, "%")
print("\033[1m--------------------------------------------------------\033[0m")
print("Classification_Report: \n",classification_report(y_test,y_pred_rf))
print("\033[1m--------------------------------------------------------\033[0m")
plot_confusion_matrix(random_forest, X_test_scaled, y_test);
plt.title('Confusion Matrix');

# Model building

In [None]:
models = pd.DataFrame({
    'Model': [
         'Logistic Regression','Linear SVM','Polynomial SVM','Decision Tree', 'Random Forest','GuassianNB'],
    
    'Training Accuracy': [log_train, svc_train,svc_p_train, decision_train, random_forest_train, gnb_train],
    
    'Model Accuracy Score': [log_accuracy, svc_accuracy,svc_p_accuracy, decision_accuracy, random_forest_accuracy, gnb_accuracy],
    
    'Model F1 Score': [log_f1score , svc_f1score, svc_p_f1score,decision_f1score, random_forest_f1score, gnb_f1score]
})

In [None]:
pd.set_option('precision',2)
models.sort_values(by='Model F1 Score', ascending=False).style.background_gradient(
        cmap='coolwarm').hide_index().set_properties(**{
            'font-family': 'Lucida Calligraphy',
            'color': 'LigntGreen',
            'font-size': '15px'
        })

In [None]:
#Model function
def model(algorithm,dtrain_X,dtrain_Y,dtest_X,dtest_Y,cols=None):

    algorithm.fit(dtrain_X,dtrain_Y)
    predictions = algorithm.predict(dtest_X)
    print (algorithm)
    
    print ("Accuracy score : ", accuracy_score(predictions,dtest_Y))
    print ("Recall score   : ", recall_score(predictions,dtest_Y, average="weighted"))
    print ("classification report :\n",classification_report(predictions,dtest_Y))
    
    fig = plt.figure(figsize=(10,8))
    ax  = fig.add_subplot(111)
    prediction_probabilities = algorithm.predict_proba(dtest_X)[:,1]
    #fpr , tpr , thresholds   = roc_curve(dtest_Y,prediction_probabilities)
    #ax.plot(fpr,tpr,label   = ["Area under curve : ",auc(fpr,tpr)],linewidth=2,linestyle="dotted")
    #ax.plot([0,1],[0,1],linewidth=2,linestyle="dashed")
    #plt.legend(loc="best")
    #plt.title("ROC-CURVE & AREA UNDER CURVE")
    #ax.set_facecolor("k")
    return predictions,prediction_probabilities,algorithm

In [None]:
rfc =  RandomForestClassifier(n_estimators=100)

y_pred,y_prob,model_obj=model(rfc,X_train_scaled,y_train,X_test_scaled,y_test,feature_col)

In [None]:
y_pred

In [None]:
y_prob

In [None]:
X_train_scaled

In [None]:
X_train.head()

In [None]:
from mosaicml import *
from mosaicml.constants import MLModelFlavours

In [None]:
@scoring_func
def score(model, request):
    payload = request.json["payload"]
    print(payload)
    data = pd.DataFrame(eval(payload))
    data = ro_scaler.transform(data)
    print(data)
    prediction = model.predict(data)[0]
    print(prediction)
    if prediction == 0:
        message = "Heat Dissipation Failure"
    elif prediction == 1:
        message = "No Failure"
    elif prediction == 2:
        message = "Overstrain Failure"
    elif prediction == 3:
        message = "Power Failure"
    elif prediction == 4:
        message = "Random Failure"
    else:
        message = "Tool wear Failure"
    return message

In [None]:
import requests
req = requests.Request()
#req.json = {"payload":train_X.head(1).to_numpy()}
req.json = {"payload":X_train.head(1).to_json()}

In [None]:
req.json

## Payload

{'payload': '{"Air temperature [\\u00b0C]":{"5504":30.65},
 "Process temperature [\\u00b0C]":{"5504":40.05},
 "Rotational speed [rpm]":{"5504":1458},
 "Torque [Nm]":{"5504":51.0},
 "Tool wear [min]":{"5504":42},
 "Temperature difference [\\u00b0C]":{"5504":9.4}}'}

In [None]:
sch = generate_schema(score,(model_obj, req),X_train)
print(sch)

In [None]:
score(rfc,req)

In [None]:
tmp = register_model(model_obj, 
               score, 
               name="Predictive_Maintenance_Multi_Classification", 
               description="Predictive Maintenance Multi Classification",
               flavour=MLModelFlavours.sklearn,
               model_type="classification",
               schema=sch,
               y_true=y_test,
               y_pred=y_pred, 
               prob=y_prob, 
               features=X_train.columns,
               labels=[0,1], 
               init_script="" ,
               input_type="json", 
               explain_ai=True, 
               x_train=X_train, 
               x_test=X_test, 
               y_train=y_train.tolist(),
               y_test=y_test.tolist(),
               feature_names=X_train.columns.tolist(),
               original_features=X_train.columns.tolist(),
               feature_ids=X_train.columns,
               target_names=['Overstrain Failure','Random Failure','Heat dissipation Failure','Tool Wear Failure','Power Failure'],
               kyd=True, kyd_score = True)
#print("Registering model")

# Sample Payloads

### No Failure
{"payload":"{'Air temperature [K]':{'0':295.5}, 'Process temperature [K]':{'0':305.9}, 'Rotational speed [rpm]':{'0':1542}, 'Torque [Nm]':{'0':36.2}, 'Tool wear [min]':{'0':12.0} , 'Type_L':{'0':1.0} ,'Type_M':{'0':0.0}}"}

### Heat Dissipation Failure
{"payload":"{'Air temperature [K]':{'0':303.0}, 'Process temperature [K]':{'0':311.3}, 'Rotational speed [rpm]':{'0':1365}, 'Torque [Nm]':{'0':56.2}, 'Tool wear [min]':{'0':146.0} , 'Type_L':{'0':1.0} ,'Type_M':{'0':0.0}}"}

### Overstrain Failure
{"payload":"{'Air temperature [K]':{'0':299.2}, 'Process temperature [K]':{'0':309.1}, 'Rotational speed [rpm]':{'0':1345}, 'Torque [Nm]':{'0':60.7}, 'Tool wear [min]':{'0':191.0} , 'Type_L':{'0':1.0} ,'Type_M':{'0':0.0}}"}

### Power Failure
{"payload":"{'Air temperature [K]':{'0':301.5}, 'Process temperature [K]':{'0':310.7}, 'Rotational speed [rpm]':{'0':1336}, 'Torque [Nm]':{'0':65.6}, 'Tool wear [min]':{'0':0} , 'Type_L':{'0':0.0} ,'Type_M':{'0':1.0}}"}

### Random Failure
{"payload":"{'Air temperature [K]':{'0':298.6}, 'Process temperature [K]':{'0':309.8}, 'Rotational speed [rpm]':{'0':1505}, 'Torque [Nm]':{'0':45.7}, 'Tool wear [min]':{'0':144} , 'Type_L':{'0':1.0} ,'Type_M':{'0':0.0}}"}

### Tool wear Failure
{"payload":"{'Air temperature [K]':{'0':302.6}, 'Process temperature [K]':{'0':311.5}, 'Rotational speed [rpm]':{'0':1629}, 'Torque [Nm]':{'0':34.4}, 'Tool wear [min]':{'0':228} , 'Type_L':{'0':0.0} ,'Type_M':{'0':0.0}}"}