In [None]:
import pandas as pd
import numpy as np
import math
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.datasets import make_regression
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import plot_confusion_matrix

# We add all Plotly and Dash necessary librairies
import plotly.graph_objects as go
import dash
import dash_core_components as dcc
import dash_html_components as html
import dash_daq as daq
from dash.dependencies import Input, Output

In [None]:
df = df= pd.read_csv("Data/predictive_maintainence.csv")
X= df.drop(columns=["process_id","failure"])
Y= df["failure"]


corr = X.corr()
mask = np.zeros_like(corr)
mask[np.triu_indices_from(mask)] = True
with sns.axes_style("white"):
    f, ax = plt.subplots(figsize=(100, 10))
    ax = sns.heatmap(corr, mask=mask,cmap='coolwarm', vmin=-1,vmax=1,annot=True, square=True)

cor_matrix = X.corr().abs()
upper_tri = cor_matrix.where(np.triu(np.ones(cor_matrix.shape),k=1).astype(np.bool))
to_drop = [column for column in upper_tri.columns if any(upper_tri[column] > 0.79)]

X = X.drop(columns=to_drop)
X_colm= X.columns
print(X_colm)
type(to_drop)

In [None]:
seed = 7
test_size = 0.33
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=test_size, random_state=seed)
model = XGBClassifier()
model.fit(X_train, y_train)
model.score(X_test, y_test)
print(model)

In [None]:

df_feature_importances = pd.DataFrame(model.feature_importances_*100,columns=["Importance"],index=X_colm)
df_feature_importances = df_feature_importances.sort_values("Importance", ascending=False)


# We create a Features Importance Bar Chart
fig_features_importance = go.Figure()
fig_features_importance.add_trace(go.Bar(x=df_feature_importances.index,
                                         y=df_feature_importances["Importance"],
                                         marker_color='rgb(171, 226, 251)')
                                 )
fig_features_importance.update_layout(title_text='<b>Features Importance of the model<b>', title_x=0.5)
fig_features_importance.show()



In [None]:

model_features= df_feature_importances[df_feature_importances["Importance"]<5.]
model_features= model_features.transpose()

model_features= list(model_features.columns)
model_features
X= X.drop(columns= model_features)
X

In [None]:
seed = 7
test_size = 0.33
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=test_size, random_state=seed)
model = XGBClassifier()
model.fit(X_train, y_train)
model.score(X_test, y_test)
print(model)

In [None]:
y_pred_test= model.predict(X_test)
y_pred_train= model.predict(X_train)
predictions = [round(value) for value in y_pred_test]
accuracy = accuracy_score(y_test, predictions)
print("Accuracy: %.2f%%" % (accuracy * 100.0))


In [None]:

df_feature_importances = pd.DataFrame(model.feature_importances_*100,columns=["Importance"],index=X.columns)
df_feature_importances = df_feature_importances.sort_values("Importance", ascending=False)


# We create a Features Importance Bar Chart
fig_features_importance = go.Figure()
fig_features_importance.add_trace(go.Bar(x=df_feature_importances.index,
                                         y=df_feature_importances["Importance"],
                                         marker_color='rgb(171, 226, 251)')
                                 )
fig_features_importance.update_layout(title_text='<b>Features Importance of the model<b>', title_x=0.5)
fig_features_importance.show()

In [None]:
performance_log = pd.DataFrame({'Error_metric': ['Accuracy','Precision','Recall'],
                               'Train': [accuracy_score(y_train, y_pred_train),
                                         precision_score(y_train, y_pred_train),
                                         recall_score(y_train, y_pred_train)],
                               'Test': [accuracy_score(y_test, y_pred_test),
                                        precision_score(y_test, y_pred_test),
                                        recall_score(y_test, y_pred_test)]})

display(performance_log)

print("Confusion matrix for the train set")
print(confusion_matrix(y_train,y_pred_train))
plot_confusion_matrix(model,X_train,y_train, values_format = 'd')
plt.show()

print()
print()

print("Confusion matrix for the test set")
print(confusion_matrix(y_test, y_pred_test))
plot_confusion_matrix(model,X_test,y_test, values_format = 'd')
plt.show()


In [27]:
# We record the name, min, mean and max of the three most important features
# We record the name, min, mean and max of the three most important features
dropdown_1_label = df_feature_importances.index[0]
dropdown_1_min = math.floor(df[dropdown_1_label].min())
dropdown_1_mean = round(df[dropdown_1_label].mean())
dropdown_1_max = round(df[dropdown_1_label].max())

dropdown_2_label = df_feature_importances.index[1]
dropdown_2_min = math.floor(df[dropdown_2_label].min())
dropdown_2_mean = round(df[dropdown_2_label].mean())
dropdown_2_max = round(df[dropdown_2_label].max())

dropdown_3_label = df_feature_importances.index[2]
dropdown_3_min = math.floor(df[dropdown_3_label].min())
dropdown_3_mean = round(df[dropdown_3_label].mean())
dropdown_3_max = round(df[dropdown_3_label].max())

dropdown_4_label = df_feature_importances.index[3]
dropdown_4_min = math.floor(df[dropdown_4_label].min())
dropdown_4_mean = round(df[dropdown_4_label].mean())
dropdown_4_max = round(df[dropdown_4_label].max())

dropdown_5_label = df_feature_importances.index[4]
dropdown_5_min = math.floor(df[dropdown_5_label].min())
dropdown_5_mean = round(df[dropdown_5_label].mean())
dropdown_5_max = round(df[dropdown_5_label].max())

dropdown_6_label = df_feature_importances.index[5]
dropdown_6_min = math.floor(df[dropdown_6_label].min())
dropdown_6_mean = round(df[dropdown_6_label].mean())
dropdown_6_max = round(df[dropdown_6_label].max())


Unnamed: 0,Importance
min_temperature,63.487061
launch_time,9.49437
oscillations_count,7.861978
y_distance,7.225467
avg_main_force,6.630096
avg_vibrations,5.30103


In [None]:
app = dash.Dash()

# The page structure will be:
#    Features Importance Chart
#    <H4> Feature #1 name
#    Slider to update Feature #1 value
#    <H4> Feature #2 name
#    Slider to update Feature #2 value
#    <H4> Feature #3 name
#    Slider to update Feature #3 value
#    <H2> Updated Prediction
#    Callback fuction with Sliders values as inputs and Prediction as Output

# We apply basic HTML formatting to the layout
app.layout = html.Div(style={'textAlign': 'center', 'width': '800px', 'font-family': 'Verdana'},
                      
                    children=[

                        # Title display
                        html.H1(children="Simulation Tool"),
                        
                        # Dash Graph Component calls the fig_features_importance parameters
                        dcc.Graph(figure=fig_features_importance),
                        
                        # We display the most important feature's name
                        html.H4(children=dropdown_1_label),

                        # The Dash Slider is built according to Feature #1 ranges
                        dcc.Slider(
                            id='X1_slider',
                            min=dropdown_1_min,
                            max=dropdown_1_max,
                            step=0.5,
                            value=dropdown_1_mean,
                            marks={i: '{}'.format(i) for i in range(dropdown_1_min, dropdown_1_max+1,25)}
                            ),

                        html.H4(children=dropdown_2_label),

                        dcc.Slider(
                            id='X2_slider',
                            min=dropdown_2_min,
                            max=dropdown_2_max,
                            step=0.5,
                            value=dropdown_2_mean,
                            marks={i: '{}'.format(i) for i in range(dropdown_2_min, dropdown_2_max+1,400)}
                        ),

                        html.H4(children=dropdown_3_label),

                        dcc.Slider(
                            id='X3_slider',
                            min=dropdown_3_min,
                            max=dropdown_3_max,
                            step=0.5,
                            value=dropdown_3_mean,
                            marks={i: '{}'.format(i) for i in range(dropdown_3_min, dropdown_3_max+1,40000)}
                        ),

                        html.H4(children=dropdown_4_label),

                        dcc.Slider(
                            id='X4_slider',
                            min=dropdown_4_min,
                            max=dropdown_4_max,
                            step=0.5,
                            value=dropdown_4_mean,
                            marks={i: '{}'.format(i) for i in range(dropdown_4_min, dropdown_4_max+1,300)}
                        ),

                        html.H4(children=dropdown_5_label),

                        dcc.Slider(
                            id='X5_slider',
                            min=dropdown_5_min,
                            max=dropdown_5_max,
                            step=0.5,
                            value=dropdown_5_mean,
                            marks={i: '{}'.format(i) for i in range(dropdown_5_min, dropdown_5_max+1,10000)}
                        ),

                        html.H4(children=dropdown_3_label),

                        dcc.Slider(
                            id='X6_slider',
                            min=dropdown_6_min,
                            max=dropdown_6_max,
                            step=0.5,
                            value=dropdown_6_mean,
                            marks={i: '{}'.format(i) for i in range(dropdown_6_min, dropdown_6_max+1,20)}
                        ),

                        html.H2(id="prediction_result"),

                        html.H2(id="prediction_probability")

                    ])


In [24]:
# The callback function will provide one "Ouput" in the form of a string (=children)
@app.callback(Output(component_id="prediction_result",component_property="children"),Output(component_id="prediction_probability",component_property="children"),
              [Input("X1_slider","value"), Input("X2_slider","value"), Input("X3_slider","value"),Input("X4_slider","value"),Input("X5_slider","value"),Input("X6_slider","value")])

def update_prediction(X1,X2,X3,X4,X5,X6):
    ip_df = pd.DataFrame()
    ip_se= pd.Series([X6,X3,X1,X2,X5,X4],index = X.columns)
    ip_df= ip_df.append(ip_se, ignore_index=True)
    prediction_prob = ((model.predict_proba(ip_df))[0][0])*100.
    prediction= model.predict(ip_df)
    return (prediction,prediction_prob)


if __name__ == "__main__":
    app.run_server()


