# 📘 Treinamento - Predição de Evasão Estudantil

In [1]:
# Carregar bibliotecas e dados
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report

df = pd.read_excel("../StudentsPrepared.xlsx")

# Ajuste nome correto da variável alvo
y = df['Target']
X = df.drop('Target', axis=1)

categorical_cols = X.select_dtypes(include=['object', 'category']).columns.tolist()
numeric_cols = X.select_dtypes(include=['int64', 'float64']).columns.tolist()

numeric_transformer = Pipeline(steps=[('scaler', StandardScaler())])
categorical_transformer = Pipeline(steps=[('onehot', OneHotEncoder(handle_unknown='ignore'))])
preprocessor = ColumnTransformer(transformers=[
    ('num', numeric_transformer, numeric_cols),
    ('cat', categorical_transformer, categorical_cols)
])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

clf = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(random_state=42))
])

cv_scores = cross_val_score(clf, X_train, y_train, cv=5, scoring='accuracy')
print("Acurácias em cada fold:", cv_scores)
print("Média CV:", cv_scores.mean())

clf.fit(X_train, y_train)
train_acc = clf.score(X_train, y_train)
test_acc = clf.score(X_test, y_test)
print(f"Acurácia treino: {train_acc:.2f} | Acurácia teste: {test_acc:.2f}")

print("\nClassification Report:\n", classification_report(y_test, clf.predict(X_test)))

# Salvar modelo
joblib.dump(clf, "../model/modelo_treinado.pkl")

Acurácias em cada fold: [0.74152542 0.77966102 0.76977401 0.76977401 0.77652051]
Média CV: 0.7674509944941226
Acurácia treino: 1.00 | Acurácia teste: 0.77

Classification Report:
               precision    recall  f1-score   support

  Desistente       0.83      0.75      0.79       284
    Graduado       0.79      0.93      0.85       442
 Matriculado       0.54      0.35      0.43       159

    accuracy                           0.77       885
   macro avg       0.72      0.68      0.69       885
weighted avg       0.76      0.77      0.76       885



['../model/modelo_treinado.pkl']

In [3]:
# === 8. Salvar relatório de resultados ===
with open("treinamento_resultado.txt", "w") as f:
    f.write(f"Acurácias em cada fold: {cv_scores.tolist()}\n")
    f.write(f"Média CV: {cv_scores.mean():.4f}\n")
    f.write(f"Acurácia treino: {train_acc:.4f}\n")
    f.write(f"Acurácia teste: {test_acc:.4f}\n")
    f.write("\nClassification Report:\n")
    f.write(classification_report(y_test, y_pred))


NameError: name 'y_pred' is not defined