In [1]:
pip install xgboost

Note: you may need to restart the kernel to use updated packages.


In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import xgboost as xgb

df = pd.read_csv("predictive_maintenance.csv")


# Convert 'Target' to binary (if needed, seems already binary)
df['Target'] = df['Target'].apply(lambda x: 1 if x != 0 else 0)

# Preparing the features and target variable
X = df[['Type', 'Air temperature [K]', 'Process temperature [K]', 'Rotational speed [rpm]', 'Torque [Nm]', 'Tool wear [min]']]
y = df['Target']

# Encoding categorical data
categorical_features = ['Type']
categorical_transformer = OneHotEncoder()

preprocessor = ColumnTransformer(
    transformers=[
        ('cat', categorical_transformer, categorical_features)],
    remainder='passthrough')

# Create a pipeline that processes data and then runs the classifier
pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                           ('classifier', xgb.XGBClassifier(objective='binary:logistic', use_label_encoder=False, eval_metric='logloss'))])

# Splitting the dataset into the Training set and Test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Fitting XGBoost to the Training set
pipeline.fit(X_train, y_train)

# Predicting the Test set results
y_pred = pipeline.predict(X_test)

# Making the Confusion Matrix and calculating accuracy
cm = confusion_matrix(y_test, y_pred)
acc = accuracy_score(y_test, y_pred)

print("Confusion Matrix:")
print(cm)
print("Accuracy:", acc)


Confusion Matrix:
[[1916    9]
 [  25   50]]
Accuracy: 0.983


  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):


In [6]:
sample_data = {
    'Type': ['L', 'L'],
    'Air temperature [K]': [298.2, 298.1],
    'Process temperature [K]': [308.7, 308.5],
    'Rotational speed [rpm]': [1408, 1498],
    'Torque [Nm]': [46.3, 49.4],
    'Tool wear [min]': [3, 5]
}

# Assuming 'sample_df' contains the sample input data for predictions

# Predict 'Target'
target_predictions = pipeline_target.predict(sample_df)

# Predict 'Failure Type'
failure_type_predictions = pipeline_failure.predict(sample_df)

# Print predictions
print("Target Predictions:", target_predictions)
print("Failure Type Predictions:", failure_type_predictions)


Predictions: [0 0]


  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):


In [16]:
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, confusion_matrix

# Load the dataset
df = pd.read_csv("predictive_maintenance.csv")

# Prepare features and targets for 'Target' (binary classification)
X_target = df[['Type', 'Air temperature [K]', 'Process temperature [K]', 'Rotational speed [rpm]', 'Torque [Nm]', 'Tool wear [min]']]
y_target = df['Target']  # Assuming 'Target' contains only binary values (0 and 1)

# Prepare features and targets for 'Failure Type' (multi-class classification)
X_failure = X_target  # Same features as 'Target'
label_encoder_failure = LabelEncoder()
y_failure = label_encoder_failure.fit_transform(df['Failure Type'])  # Encoding string labels to integers

# Encoding categorical data
categorical_features = ['Type']
categorical_transformer = OneHotEncoder()

preprocessor = ColumnTransformer(
    transformers=[
        ('cat', categorical_transformer, categorical_features)],
    remainder='passthrough')

# Creating pipelines for both prediction tasks
pipeline_target = Pipeline([
    ('preprocessor', preprocessor),
    ('classifier', xgb.XGBClassifier(objective='binary:logistic', use_label_encoder=False, eval_metric='logloss'))
])

pipeline_failure = Pipeline([
    ('preprocessor', preprocessor),
    ('classifier', xgb.XGBClassifier(objective='multi:softprob', use_label_encoder=False, eval_metric='mlogloss'))
])

# Splitting the dataset into the Training set and Test set
X_train_target, X_test_target, y_train_target, y_test_target = train_test_split(X_target, y_target, test_size=0.2, random_state=0)
X_train_failure, X_test_failure, y_train_failure, y_test_failure = train_test_split(X_failure, y_failure, test_size=0.2, random_state=0)

# Training the models
pipeline_target.fit(X_train_target, y_train_target)
pipeline_failure.fit(X_train_failure, y_train_failure)

# Making predictions (using hypothetical sample data, replace 'sample_df' with actual DataFrame)
sample_df = pd.DataFrame({
    'Type': ['L', 'L', 'L'],
    'Air temperature [K]': [298.2, 298.1, 298.9],
    'Process temperature [K]': [308.7, 308.5, 309],
    'Rotational speed [rpm]': [1408, 1498, 1410],
    'Torque [Nm]': [46.3, 49.4, 65.7],
    'Tool wear [min]': [3, 5, 191]
})

target_predictions = pipeline_target.predict(sample_df)
failure_type_predictions_encoded = pipeline_failure.predict(sample_df)
failure_type_predictions = label_encoder_failure.inverse_transform(failure_type_predictions_encoded)

# Displaying results
print("Target Predictions:", target_predictions)
print("Failure Type Predictions:", failure_type_predictions)


  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):


Target Predictions: [0 0 1]
Failure Type Predictions: ['No Failure' 'No Failure' 'Power Failure']


  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):


In [18]:
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score

# Load the dataset
df = pd.read_csv("predictive_maintenance.csv")

# Prepare features and targets for 'Target' (binary classification)
X_target = df[['Type', 'Air temperature [K]', 'Process temperature [K]', 'Rotational speed [rpm]', 'Torque [Nm]', 'Tool wear [min]']]
y_target = df['Target']  # Assuming 'Target' contains only binary values (0 and 1)

# Prepare features and targets for 'Failure Type' (multi-class classification)
X_failure = X_target  # Same features as 'Target'
label_encoder_failure = LabelEncoder()
y_failure = label_encoder_failure.fit_transform(df['Failure Type'])  # Encoding string labels to integers

# Encoding categorical data
categorical_features = ['Type']
categorical_transformer = OneHotEncoder()

preprocessor = ColumnTransformer(
    transformers=[
        ('cat', categorical_transformer, categorical_features)],
    remainder='passthrough')

# Creating pipelines for both prediction tasks
pipeline_target = Pipeline([
    ('preprocessor', preprocessor),
    ('classifier', xgb.XGBClassifier(objective='binary:logistic', use_label_encoder=False, eval_metric='logloss'))
])

pipeline_failure = Pipeline([
    ('preprocessor', preprocessor),
    ('classifier', xgb.XGBClassifier(objective='multi:softprob', use_label_encoder=False, eval_metric='mlogloss'))
])

# Splitting the dataset into the Training set and Test set
X_train_target, X_test_target, y_train_target, y_test_target = train_test_split(X_target, y_target, test_size=0.2, random_state=0)
X_train_failure, X_test_failure, y_train_failure, y_test_failure = train_test_split(X_failure, y_failure, test_size=0.2, random_state=0)

# Training the models
pipeline_target.fit(X_train_target, y_train_target)
pipeline_failure.fit(X_train_failure, y_train_failure)

# Predicting the test sets
y_pred_target = pipeline_target.predict(X_test_target)
y_pred_failure = pipeline_failure.predict(X_test_failure)

# Calculating accuracies
accuracy_target = accuracy_score(y_test_target, y_pred_target)
accuracy_failure = accuracy_score(y_test_failure, y_pred_failure)

# Printing the accuracies
print("Accuracy for 'Target':", accuracy_target)
print("Accuracy for 'Failure Type':", accuracy_failure)


  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):


Accuracy for 'Target': 0.983
Accuracy for 'Failure Type': 0.985


  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
