In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [4]:
df = pd.read_csv(r'../dataset/predictive_maintenance.csv')
df.head(2)

Unnamed: 0,UDI,Product ID,Type,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Target,Failure Type
0,1,M14860,M,298.1,308.6,1551,42.8,0,0,No Failure
1,2,L47181,L,298.2,308.7,1408,46.3,3,0,No Failure


In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline



# Drop the 'Product ID' column
df = df.drop(columns=['Product ID'])

# Define the features and target variable for multiclass classification
X = df.drop(columns=['Target', 'Failure Type'])  # Features
y_multi = df['Failure Type']  # Target variable for multiclass classification

# Define the preprocessing for numerical features
numerical_features = X.select_dtypes(include=['int64', 'float64']).columns
numerical_transformer = StandardScaler()

# Define the preprocessing for categorical features
categorical_features = ['Type']
categorical_transformer = OneHotEncoder(handle_unknown='ignore')

# Combine preprocessing steps
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_features),
        ('cat', categorical_transformer, categorical_features)
    ])

# Split the dataset into training and testing sets for multiclass classification
X_train_multi, X_test_multi, y_train_multi, y_test_multi = train_test_split(X, y_multi, test_size=0.2, random_state=42)

# Apply the preprocessing
preprocessor.fit(X_train_multi)
X_train_transformed_multi = preprocessor.transform(X_train_multi)
X_test_transformed_multi = preprocessor.transform(X_test_multi)

# Now X_train_transformed_multi and X_test_transformed_multi are ready for machine learning model training for multiclass classification


In [7]:
from sklearn.preprocessing import OneHotEncoder

# Assuming df_dropped is your DataFrame after dropping the 'Product ID' column

# Selecting the 'Type' column for one-hot encoding
type_column = df[['Type']]  # Double brackets to keep the DataFrame format

# Create the encoder instance
encoder = OneHotEncoder(sparse=False)

# Fit and transform the 'Type' column
type_encoded = encoder.fit_transform(type_column)

# Convert the encoded result into a DataFrame
type_encoded_df = pd.DataFrame(type_encoded, columns=encoder.get_feature_names_out(['Type']))

# Display the first few rows to verify
print(type_encoded_df.head())

# To merge this back with the original DataFrame (excluding the original 'Type' column),
# you can concatenate along the columns
df_encoded = pd.concat([df.drop(columns=['Type']), type_encoded_df], axis=1)

# Display the first few rows of the updated DataFrame to confirm
print(df_encoded.head())


   Type_H  Type_L  Type_M
0     0.0     0.0     1.0
1     0.0     1.0     0.0
2     0.0     1.0     0.0
3     0.0     1.0     0.0
4     0.0     1.0     0.0
   UDI  Air temperature [K]  Process temperature [K]  Rotational speed [rpm]  \
0    1                298.1                    308.6                    1551   
1    2                298.2                    308.7                    1408   
2    3                298.1                    308.5                    1498   
3    4                298.2                    308.6                    1433   
4    5                298.2                    308.7                    1408   

   Torque [Nm]  Tool wear [min]  Target Failure Type  Type_H  Type_L  Type_M  
0         42.8                0       0   No Failure     0.0     0.0     1.0  
1         46.3                3       0   No Failure     0.0     1.0     0.0  
2         49.4                5       0   No Failure     0.0     1.0     0.0  
3         39.5                7       0   No Fa



In [9]:
from sklearn.preprocessing import StandardScaler

# Assuming df_encoded is your DataFrame after encoding the 'Type' variable

# Selecting numerical features to scale
numerical_features = df_encoded.select_dtypes(include=['int64', 'float64']).columns

# Creating the scaler instance
scaler = StandardScaler()

# Fit and transform the numerical features
df_encoded[numerical_features] = scaler.fit_transform(df_encoded[numerical_features])

# Display the first few rows to verify the scaling
print(df_encoded.head())


        UDI  Air temperature [K]  Process temperature [K]  \
0 -1.731878            -0.952389                -0.947360   
1 -1.731531            -0.902393                -0.879959   
2 -1.731185            -0.952389                -1.014761   
3 -1.730838            -0.902393                -0.947360   
4 -1.730492            -0.902393                -0.879959   

   Rotational speed [rpm]  Torque [Nm]  Tool wear [min]    Target  \
0                0.068185     0.282200        -1.695984 -0.187322   
1               -0.729472     0.633308        -1.648852 -0.187322   
2               -0.227450     0.944290        -1.617430 -0.187322   
3               -0.590021    -0.048845        -1.586009 -0.187322   
4               -0.729472     0.001313        -1.554588 -0.187322   

  Failure Type    Type_H    Type_L    Type_M  
0   No Failure -0.333889 -1.224745  1.528617  
1   No Failure -0.333889  0.816497 -0.654186  
2   No Failure -0.333889  0.816497 -0.654186  
3   No Failure -0.333889  0.81