In [31]:
import pandas as pd

# Load the dataset
df = pd.read_csv(r'../dataset/predictive_maintenance.csv')
df.head(2)

# Display the first few rows of the dataframe
print(df.head())

# Basic information about the dataset
print(df.info())


   UDI Product ID Type  Air temperature [K]  Process temperature [K]  \
0    1     M14860    M                298.1                    308.6   
1    2     L47181    L                298.2                    308.7   
2    3     L47182    L                298.1                    308.5   
3    4     L47183    L                298.2                    308.6   
4    5     L47184    L                298.2                    308.7   

   Rotational speed [rpm]  Torque [Nm]  Tool wear [min]  Target Failure Type  
0                    1551         42.8                0       0   No Failure  
1                    1408         46.3                3       0   No Failure  
2                    1498         49.4                5       0   No Failure  
3                    1433         39.5                7       0   No Failure  
4                    1408         40.0                9       0   No Failure  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (tota

In [34]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import DBSCAN

# Load the dataset
data = pd.read_csv(r'../dataset/predictive_maintenance.csv')

# Select columns to be considered for anomaly detection
cols = ['Air temperature [K]', 'Process temperature [K]', 'Rotational speed [rpm]', 'Torque [Nm]', 'Tool wear [min]']

# Standardize the data
X = StandardScaler().fit_transform(data[cols])

# Apply DBSCAN for anomaly detection
# eps: Maximum distance between two samples for them to be considered as in the same neighborhood
# min_samples: The number of samples in a neighborhood for a point to be considered as a core point
dbscan = DBSCAN(eps=0.5, min_samples=10)
data['DBSCAN_Cluster'] = dbscan.fit_predict(X)

# Identify outliers (points labeled as -1)
outliers = data[data['DBSCAN_Cluster'] == -1]

# Save the outliers to a new CSV file for further analysis if needed
outliers.to_csv(r'../dataset/outliers_dbscan.csv', index=False)

# Display the first few outliers
print(outliers.head())


    UDI Product ID Type  Air temperature [K]  Process temperature [K]  \
14   15     L47194    L                298.6                    309.2   
50   51     L47230    L                298.9                    309.1   
62   63     L47242    L                298.8                    309.0   
69   70     L47249    L                298.9                    309.0   
70   71     M14930    M                298.9                    309.0   

    Rotational speed [rpm]  Torque [Nm]  Tool wear [min]  Target  \
14                    2035         19.6               40       0   
50                    2861          4.6              143       1   
62                    1829         22.9              172       0   
69                    1410         65.7              191       1   
70                    1924         22.6              193       0   

     Failure Type  DBSCAN_Cluster  
14     No Failure              -1  
50  Power Failure              -1  
62     No Failure              -1  
69  Powe

In [35]:
import pandas as pd
from scipy import stats
import numpy as np

# Load the dataset
data = pd.read_csv(r'../dataset/predictive_maintenance.csv')

# Columns to check for anomalies
cols = ['Air temperature [K]', 'Process temperature [K]', 'Rotational speed [rpm]', 'Torque [Nm]', 'Tool wear [min]']

# Z-Score Method
z = np.abs(stats.zscore(data[cols]))
z_threshold = 3
outliers_z = np.where(z > z_threshold)

# IQR Method
Q1 = data[cols].quantile(0.25)
Q3 = data[cols].quantile(0.75)
IQR = Q3 - Q1
outliers_iqr = ((data[cols] < (Q1 - 1.5 * IQR)) | (data[cols] > (Q3 + 1.5 * IQR))).any(axis=1)

# Print the number of outliers detected by each method
print(f"Number of outliers detected by Z-Score method: {len(np.unique(outliers_z[0]))}")
print(f"Number of outliers detected by IQR method: {outliers_iqr.sum()}")

# Optional: Save the outliers to separate CSV files for further analysis
data.iloc[outliers_z[0]].to_csv(r'../dataset/z_score_outliers.csv', index=False)
data[outliers_iqr].to_csv(r'../dataset/iqr_outliers.csv', index=False)


Number of outliers detected by Z-Score method: 178
Number of outliers detected by IQR method: 459


In [36]:
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
import pandas as pd

# Load your dataset
data = pd.read_csv(r'../dataset/predictive_maintenance.csv')

# Assuming 'data' is your DataFrame and 'cols' contains the columns to be used for anomaly detection
cols = ['Air temperature [K]', 'Process temperature [K]', 'Rotational speed [rpm]', 'Torque [Nm]', 'Tool wear [min]']
X = data[cols]

# Standardize features by removing the mean and scaling to unit variance
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Initialize Isolation Forest
iso_forest = IsolationForest(n_estimators=100, contamination='auto', random_state=42)

# Fit the model
iso_forest.fit(X_scaled)

# Predict anomalies (-1 for outliers and 1 for inliers)
data['anomaly'] = iso_forest.predict(X_scaled)

# Filter out the anomalies
anomalies = data[data['anomaly'] == -1]

# Display or process the anomalies
print(anomalies)


       UDI Product ID Type  Air temperature [K]  Process temperature [K]  \
14      15     L47194    L                298.6                    309.2   
50      51     L47230    L                298.9                    309.1   
69      70     L47249    L                298.9                    309.0   
70      71     M14930    M                298.9                    309.0   
101    102     L47281    L                298.8                    308.8   
...    ...        ...  ...                  ...                      ...   
9951  9952     L57131    L                298.2                    307.8   
9970  9971     H39384    H                298.4                    308.1   
9974  9975     L57154    L                298.6                    308.2   
9988  9989     L57168    L                298.9                    308.6   
9991  9992     M24851    M                298.9                    308.4   

      Rotational speed [rpm]  Torque [Nm]  Tool wear [min]  Target  \
14               

In [39]:
from keras.models import Model
from keras.layers import Input, Dense
from keras.optimizers import Adam
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np

# Load your dataset
data = pd.read_csv(r'../dataset/predictive_maintenance.csv')
X = data[['Air temperature [K]', 'Process temperature [K]', 'Rotational speed [rpm]', 'Torque [Nm]', 'Tool wear [min]']].values

# Standardize the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Define the Autoencoder architecture
input_dim = X_scaled.shape[1]
encoding_dim = 14  # You might need to tune this

input_layer = Input(shape=(input_dim,))
encoder = Dense(encoding_dim, activation="relu")(input_layer)
decoder = Dense(input_dim, activation="linear")(encoder)
autoencoder = Model(inputs=input_layer, outputs=decoder)

# Compile the Autoencoder
autoencoder.compile(optimizer=Adam(lr=0.001), loss='mean_squared_error')

# Train the Autoencoder
autoencoder.fit(X_scaled, X_scaled, epochs=100, batch_size=32, shuffle=True, validation_split=0.2, verbose=1)

# Use the Autoencoder for anomaly detection
# Compute the reconstruction loss on the training set
reconstructed = autoencoder.predict(X_scaled)
mse = np.mean(np.power(X_scaled - reconstructed, 2), axis=1)

# Determine a threshold for anomaly detection
threshold = np.quantile(mse, 0.95)  # Adjust based on your dataset

# Detect anomalies
outliers = mse > threshold
print("Number of anomalies detected:", np.sum(outliers))


ModuleNotFoundError: No module named 'keras'

In [40]:
from sklearn.neighbors import NearestNeighbors
import numpy as np
import pandas as pd

# Load your dataset
data = pd.read_csv(r'../dataset/predictive_maintenance.csv')
X = data[['Air temperature [K]', 'Process temperature [K]', 'Rotational speed [rpm]', 'Torque [Nm]', 'Tool wear [min]']].values

# Initialize the Nearest Neighbors model
knn = NearestNeighbors(n_neighbors=5)  # Consider adjusting n_neighbors based on your dataset

# Fit the model
knn.fit(X)

# Compute the distances and indices of the K-nearest neighbors to each point
distances, indices = knn.kneighbors(X)

# Compute the average distance to the K-nearest neighbors
avg_distance = np.mean(distances, axis=1)

# Determine a threshold for anomaly detection (e.g., based on a percentile)
threshold = np.percentile(avg_distance, 95)  # Adjust the percentile value as needed

# Flag points with an average distance above the threshold as anomalies
anomalies = avg_distance > threshold

# Print the number of anomalies detected
print(f"Number of anomalies detected: {np.sum(anomalies)}")


Number of anomalies detected: 500


In [41]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np

# Load the dataset
data = pd.read_csv(r'../dataset/predictive_maintenance.csv')
X = data[['Air temperature [K]', 'Process temperature [K]', 'Rotational speed [rpm]', 'Torque [Nm]', 'Tool wear [min]']].values

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Apply PCA
pca = PCA(n_components=2)  # Adjust n_components based on your analysis
X_pca = pca.fit_transform(X_scaled)

# Inverse transform to reconstruct the original data
X_inverse = pca.inverse_transform(X_pca)

# Calculate the reconstruction error
reconstruction_error = np.sum(np.square(X_scaled - X_inverse), axis=1)

# Determine a threshold for anomaly detection
threshold = np.percentile(reconstruction_error, 95)  # Adjust the percentile as needed

# Detect anomalies
anomalies = reconstruction_error > threshold

# Print the number of anomalies detected
print(f"Number of anomalies detected: {np.sum(anomalies)}")


Number of anomalies detected: 500


In [1]:
from flask import Flask, request, jsonify
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import pickle

# Initialize the Flask application
app = Flask(__name__)

# Load and prepare the dataset
df = pd.read_csv(r'../dataset/predictive_maintenance.csv')
X = df[['Air temperature [K]', 'Process temperature [K]', 'Rotational speed [rpm]', 'Torque [Nm]', 'Tool wear [min]']]
y = df['Target']

# Splitting the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a model (for demonstration, we re-train it here, but typically you'd load a pre-trained model)
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Save the model to a file (optional, for loading later)
pickle.dump(model, open('rf_model.pkl', 'wb'))

@app.route('/predict', methods=['POST'])
def predict():
    try:
        json_data = request.json
        input_data = pd.DataFrame([json_data])
        prediction = model.predict(input_data)
        return jsonify(prediction=int(prediction[0]))
    except Exception as e:
        return jsonify(error=str(e)), 400

if __name__ == '__main__':
    app.run(debug=True, port=5001)


 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5001
Press CTRL+C to quit
 * Restarting with stat


SystemExit: 1

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
