In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.compose import make_column_transformer
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler


In [13]:

# Load the dataset
df = pd.read_csv('predictive_maintenance.csv')

# Drop unnecessary columns
df.drop(df.columns[:2], axis=1, inplace=True)

# Drop rows with specific conditions as mentioned in the original code
df.drop(df[df['Failure Type'] == 'No Failure'].index, axis=0, inplace=True)
df.drop(df[df['Failure Type'] == 'Random Failures'].index, axis=0, inplace=True)

# Extract features and target variable
x = df.drop(df.columns[-2:], axis=1)
y = df['Target']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42)

# Identify categorical and numerical columns
cat_cols = X_train.select_dtypes(include='O').columns.tolist()
num_cols = X_train.select_dtypes(exclude='O').columns.tolist()

# Create a ColumnTransformer for preprocessing
enc = OneHotEncoder(handle_unknown='ignore')
sc = StandardScaler()
preprocessor = make_column_transformer(
    (
        enc, 
        cat_cols
    ),
    (
        sc, 
        num_cols
    )
)

# Create a pipeline with preprocessing, oversampling, undersampling, and logistic regression
pipeline = make_pipeline(
    ('preprocessing', preprocessor),
    ('sampling', SMOTE(sampling_strategy=0.5)),
    ('sampling2', RandomUnderSampler()),
    ('classification', LogisticRegression())
)

# Fit the pipeline on the training data
pipeline.fit(X_train, y_train)

# Evaluate the pipeline on the testing set
accuracy = pipeline.score(X_test, y_test)
print(f"Accuracy on the testing set: {accuracy}")


TypeError: All intermediate steps should be transformers and implement fit and transform or be the string 'passthrough' '('preprocessing', ColumnTransformer(transformers=[('onehotencoder',
                                 OneHotEncoder(handle_unknown='ignore'),
                                 ['Type']),
                                ('standardscaler', StandardScaler(),
                                 ['Air temperature [K]',
                                  'Process temperature [K]',
                                  'Rotational speed [rpm]', 'Torque [Nm]',
                                  'Tool wear [min]'])]))' (type <class 'tuple'>) doesn't

In [14]:
df.head()

Unnamed: 0,Type,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Target,Failure Type
50,L,298.9,309.1,2861,4.6,143,1,Power Failure
69,L,298.9,309.0,1410,65.7,191,1,Power Failure
77,L,298.8,308.9,1455,41.3,208,1,Tool Wear Failure
160,L,298.4,308.2,1282,60.7,216,1,Overstrain Failure
161,L,298.3,308.1,1412,52.3,218,1,Overstrain Failure
