In [None]:
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import RobustScaler
from xgboost import XGBClassifier
from imblearn.over_sampling import SMOTE
from joblib import dump

# Load your dataset
data = pd.read_csv('/content/aps_failure_training_set1.csv')

# Replace 'na' with 0
data.replace('na', 0, inplace=True)

# Split data into features (X) and target (y)
X = data.drop('class', axis=1)
y = data['class'].replace({'pos': 1, 'neg': 0})

# Define preprocessing steps
preprocessing_steps = [
    ('imputer', SimpleImputer(strategy='constant', fill_value=0)),
    ('scaler', RobustScaler())
]

# Apply preprocessing steps
preprocessor = Pipeline(steps=preprocessing_steps)
X_preprocessed = preprocessor.fit_transform(X)

# Apply SMOTE for oversampling
smote = SMOTE()
X_res, y_res = smote.fit_resample(X_preprocessed, y)

# Define the model
model = XGBClassifier()

# Train the model
model.fit(X_res, y_res)

# Save the trained model to Google Drive
model_file_path = 'trained_model.joblib'
dump(model, model_file_path)

['trained_model.joblib']

In [None]:
trained