In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder

# Cargar datos
data = pd.read_excel('/content/datos_default (1).xlsx')

# Preprocesamiento
data.dropna(inplace=True)

# Convertir la variable objetivo 'default' a numérica
labelencoder = LabelEncoder()
data['default'] = labelencoder.fit_transform(data['default'])

# Convertir variables categóricas a numéricas usando One-Hot Encoding
categorical_columns = data.select_dtypes(include=['object']).columns
data = pd.get_dummies(data, columns=categorical_columns, drop_first=True)

# Separar la variable objetivo de las características
X = data.drop(columns=['default'])
y = data['default']

# Separar datos en entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Búsqueda de hiperparámetros para Naive Bayes
nb_model = GaussianNB()
param_grid_nb = {
    'var_smoothing': [1e-09, 1e-08, 1e-07, 1e-06, 1e-05]
}
grid_search_nb = GridSearchCV(estimator=nb_model, param_grid=param_grid_nb, cv=5, scoring='accuracy')
grid_search_nb.fit(X_train, y_train)
best_nb_model = grid_search_nb.best_estimator_

# Predicción y precisión para Naive Bayes
nb_pred = best_nb_model.predict(X_test)
nb_accuracy = accuracy_score(y_test, nb_pred)

# Búsqueda de hiperparámetros para Árbol de Decisión
tree_model = DecisionTreeClassifier(random_state=42)
param_grid_tree = {
    'criterion': ['gini', 'entropy'],
    'max_depth': [None, 10, 20, 30, 40, 50],
    'min_samples_split': [2, 10, 20],
    'min_samples_leaf': [1, 5, 10]
}
grid_search_tree = GridSearchCV(estimator=tree_model, param_grid=param_grid_tree, cv=5, scoring='accuracy')
grid_search_tree.fit(X_train, y_train)
best_tree_model = grid_search_tree.best_estimator_

# Predicción y precisión para Árbol de Decisión
tree_pred = best_tree_model.predict(X_test)
tree_accuracy = accuracy_score(y_test, tree_pred)

print(f'Precisión Naive Bayes (mejorado): {nb_accuracy}')
print(f'Precisión Árbol de Decisión (mejorado): {tree_accuracy}')

Precisión Naive Bayes (mejorado): 0.667
Precisión Árbol de Decisión (mejorado): 0.9686666666666667
