In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OrdinalEncoder, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
import pickle
import joblib
 
# Load data from Fabric Lakehouse
# df = spark.read.format("delta").load("log_data_LH").toPandas()
# Load data into pandas DataFrame from "/lakehouse/default/Files/predictive_maintenance.csv"
df = pd.read_csv("/lakehouse/default/Files/predictive_maintenance.csv")
# display(df)
 
# Display basic information
# df.info()
 
# Check for unique values in 'Type'
df['Type'].nunique()
 
# Check for null values
# print("Null values per column:\n", df.isnull().sum())
 
# Filter rows where Target = 1
df[df['Target'] == 1]
 
# Data preprocessing
X = df.iloc[:, 2:8]
y = df.iloc[:, -1]
 
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
 
# Encode 'Type' column
oe = OrdinalEncoder(categories=[['L', 'M', 'H']])
oe.fit(X_train[['Type']])
X_train['Type'] = oe.transform(X_train[['Type']]).astype(int)
X_test['Type'] = oe.transform(X_test[['Type']]).astype(int)
 
# Encode target labels
categories = ['No Failure', 'Heat Dissipation Failure', 'Power Failure', 'Overstrain Failure', 'Tool Wear Failure', 'Random Failures']
custom_encoder = {cat: i for i, cat in enumerate(categories)}
y_train_encoded = [custom_encoder.get(cat, len(categories)) for cat in y_train]
y_test_encoded = [custom_encoder.get(cat, len(categories)) for cat in y_test]
le = LabelEncoder()
y_train = le.fit_transform(y_train_encoded)
y_test = le.transform(y_test_encoded)
 
# Logistic Regression
clf = LogisticRegression(solver='lbfgs', max_iter=10000)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
log_train = round(clf.score(X_train, y_train) * 100, 2)
log_accuracy = round(accuracy_score(y_pred, y_test) * 100, 2)
 
# print("Logistic Regression")
# print("Training Accuracy:", log_train, "%")
# print("Model Accuracy Score:", log_accuracy, "%")
# print("Classification Report:\n", classification_report(y_test, y_pred))
 
# Decision Tree Classifier
decision = DecisionTreeClassifier()
decision.fit(X_train, y_train)
y_pred_dec = decision.predict(X_test)
decision_train = round(decision.score(X_train, y_train) * 100, 2)
decision_accuracy = round(accuracy_score(y_pred_dec, y_test) * 100, 2)
 
# print("\nDecision Tree Classifier")
# print("Training Accuracy:", decision_train, "%")
# print("Model Accuracy Score:", decision_accuracy, "%")
# print("Classification Report:\n", classification_report(y_test, y_pred_dec))
 
# Random Forest Classifier
random_forest = RandomForestClassifier(n_estimators=100)
random_forest.fit(X_train, y_train)
y_pred_rf = random_forest.predict(X_test)
random_forest_train = round(random_forest.score(X_train, y_train) * 100, 2)
random_forest_accuracy = round(accuracy_score(y_pred_rf, y_test) * 100, 2)
 
# print("\nRandom Forest Classifier")
# print("Training Accuracy:", random_forest_train, "%")
# print("Model Accuracy Score:", random_forest_accuracy, "%")
# print("Classification Report:\n", classification_report(y_test, y_pred_rf))
 
# Support Vector Machine
svc = SVC()
svc.fit(X_train, y_train)
y_pred_svc = svc.predict(X_test)
svc_train = round(svc.score(X_train, y_train) * 100, 2)
svc_accuracy = round(accuracy_score(y_pred_svc, y_test) * 100, 2)
 
# print("\nSupport Vector Machine")
# print("Training Accuracy:", svc_train, "%")
# print("Model Accuracy Score:", svc_accuracy, "%")
# print("Classification Report:\n", classification_report(y_test, y_pred_svc))
 
# Save Random Forest Model
joblib.dump(random_forest, 'model2.joblib')



StatementMeta(, , , Waiting, , Waiting)

  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))


['model2.joblib']

In [None]:
import pandas as pd
from sklearn.preprocessing import OrdinalEncoder
import joblib
 
simulated_data = [
    {'Machine ID': 1, 'Type': 'L', 'Air temperature [K]': 320.5, 'Process temperature [K]': 335.5,
     'Rotational speed [rpm]': 1700, 'Torque [Nm]': 50, 'Tool wear [min]': 300,
     'Failure Type': 'No Failure', 'Target': 0, 'Timestamp': '2024-11-04 10:00:00'},
]

def mloutput(simulated_data):

    df_simulated = simulated_data.toPandas()
    
    # Separate the columns that are not needed for the model (e.g., 'Machine ID', 'Timestamp', 'Failure Type', 'Target')
    df_identifiers = df_simulated[['MachineID', 'Timestamp', 'FailureType', 'Target']]
    
    # Select only the feature columns (e.g., 'Type', 'Air temperature [K]', etc.)
    df_features = df_simulated.drop(columns=['MachineID', 'Timestamp', 'FailureType', 'Target','uid'])
    
    # Encode the 'Type' column as per the training setup
    oe = OrdinalEncoder(categories=[['L', 'M', 'H']])
    df_features['Type'] = oe.fit_transform(df_features[['Type']]).astype(int)
    
    # Load the trained Random Forest model
    model = joblib.load('model2.joblib')
    
    # Predict probabilities for each failure type
    probabilities = model.predict_proba(df_features)
    
    # Define the target class names in the same order as during training
    failure_classes = ['No Failure', 'Heat Dissipation Failure', 'Power Failure', 
                    'Overstrain Failure', 'Tool Wear Failure', 'Random Failures']
    
    # Convert the predicted probabilities to a DataFrame for readability
    probabilities_df = pd.DataFrame(probabilities, columns=failure_classes)
    
    # Combine the identifiers, original features, and the predicted probabilities into a single DataFrame
    results_df = pd.concat([df_identifiers.reset_index(drop=True), df_features.reset_index(drop=True), probabilities_df], axis=1)
    
    # Display the results
    print(results_df)
    return results_df


StatementMeta(, , , Waiting, , Waiting)

   Machine ID            Timestamp Failure Type  Target  Type  \
0           1  2024-11-04 10:00:00   No Failure       0     0   

   Air temperature [K]  Process temperature [K]  Rotational speed [rpm]  \
0                320.5                    335.5                    1700   

   Torque [Nm]  Tool wear [min]  No Failure  Heat Dissipation Failure  \
0           50              300        0.27                      0.03   

   Power Failure  Overstrain Failure  Tool Wear Failure  Random Failures  
0            0.0                0.31               0.39              0.0  
