In [93]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import recall_score
from sklearn.metrics import confusion_matrix, precision_score, accuracy_score

In [168]:
# 🔹 Cargar dataset
df = pd.read_csv("MexLimpio.csv")

# 🔹 Rellenar valores nulos con forward y backward fill
#df = df.fillna(method="bfill")
#df = df.fillna(method="ffill")

In [169]:
df['host_acceptance_rate'] = df['host_acceptance_rate'].astype(str).str.replace('%', '', regex=True).astype(float)

In [170]:
# 🔹 Seleccionar variables para la regresión logística
Vars_Indep = df[['host_is_superhost', 'host_identity_verified', 'host_acceptance_rate', 
                 'accommodates', 'bathrooms', 'review_scores_rating', 'review_scores_cleanliness',
                 'availability_365', 'price', 'availability_90']]
Vars_Dep = df['instant_bookable']


In [171]:
# 🔹 Redefinir variables
X = Vars_Indep
y = Vars_Dep

In [172]:
# 🔹 Dividir en conjunto de entrenamiento y prueba (70%-30%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=None)

In [173]:
# 🔹 Escalar los datos
#X_train = X_train.replace('%', '', regex=True).astype(float)
#X_test = X_test.replace('%', '', regex=True).astype(float)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [174]:
# 🔹 Definir el modelo de regresión logística
algoritmo = LogisticRegression()

In [175]:
# 🔹 Entrenar el modelo
algoritmo.fit(X_train, y_train)

In [176]:
# 🔹 Realizar predicciones
y_pred = algoritmo.predict(X_test)


In [177]:
# 🔹 Matriz de confusión
matriz = confusion_matrix(y_test, y_pred)
print("🔹 Matriz de Confusión:")
print(matriz)


🔹 Matriz de Confusión:
[[3761 1078]
 [1506 1630]]


In [178]:
# 🔹 Calcular precisión, exactitud y sensibilidad del modelo
precision = precision_score(y_test, y_pred, average="binary", pos_label="t")
accuracy = accuracy_score(y_test, y_pred)
recall = recall_score(y_test, y_pred, average="binary", pos_label="f")

print(f"🔹 Precisión del modelo: {precision:.2f}")
print(f"🔹 Exactitud del modelo: {accuracy:.2f}")
print(f"🔹 Sensibilidad del modelo: {recall:.2f}")


🔹 Precisión del modelo: 0.60
🔹 Exactitud del modelo: 0.68
🔹 Sensibilidad del modelo: 0.78


In [142]:
# 🔹 Convertir variable dicotómica (Ejemplo de adaptación basado en 'class' del Titanic)
# Filtrar solo los valores numéricos antes de convertir
dicotomico= df['host_acceptance_rate'] = pd.to_numeric(df['host_acceptance_rate'], errors='coerce')  # Convierte a float, NaN si falla

dicotomico= df['host_acceptance_rate'] = df['host_acceptance_rate'].apply(lambda x: "Alto" if x > 50 else "Bajo")
dicotomico

0        Alto
1        Alto
2        Alto
3        Alto
4        Alto
         ... 
26577    Alto
26578    Alto
26579    Alto
26580    Alto
26581    Alto
Name: host_acceptance_rate, Length: 26582, dtype: object