# Modeling — Manutenção Preditiva (CNC)

In [None]:
import os, joblib, numpy as np, pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.multiclass import OneVsRestClassifier
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import classification_report

DATA_INTERIM = "../data/interim"
DATA_PROCESSED = "../data/processed"
os.makedirs(DATA_PROCESSED, exist_ok=True)

X_train = pd.read_csv(os.path.join(DATA_INTERIM,"X_train.csv"))
X_valid = pd.read_csv(os.path.join(DATA_INTERIM,"X_valid.csv"))
y_train = pd.read_csv(os.path.join(DATA_INTERIM,"y_train.csv"))
y_valid = pd.read_csv(os.path.join(DATA_INTERIM,"y_valid.csv"))
target_cols = ["falha_maquina","FDF","FDC","FP","FTE","FA"]
num_cols = X_train.select_dtypes(include=[np.number]).columns.tolist()
cat_cols = X_train.select_dtypes(exclude=[np.number]).columns.tolist()

num_pipe = Pipeline([("imp", SimpleImputer(strategy="median")), ("sc", StandardScaler())])
cat_pipe = Pipeline([("imp", SimpleImputer(strategy="most_frequent")), ("ohe", OneHotEncoder(handle_unknown="ignore"))])
preprocess = ColumnTransformer([("num", num_pipe, num_cols), ("cat", cat_pipe, cat_cols)])

baseline = Pipeline([("prep", preprocess), ("clf", OneVsRestClassifier(LogisticRegression(max_iter=200)))])
baseline.fit(X_train, y_train[target_cols])
y_pred = (baseline.predict_proba(X_valid) >= 0.5).astype(int)
print(classification_report(y_valid[target_cols], y_pred, target_names=target_cols, zero_division=0))
joblib.dump(baseline, os.path.join(DATA_PROCESSED,"model_baseline.joblib"))