In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.special as special
from scipy.optimize import curve_fit
import seaborn as sns
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
dfM=pd.read_csv('Limpio_Brasil.csv')
dfM= dfM.drop(['Unnamed: 0'], axis=1)
dfM.head(5)

Unnamed: 0,last_scraped,source,name,description,host_name,host_since,host_location,host_about,host_response_time,host_response_rate,...,number_of_reviews_l30d,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,calculated_host_listings_count,reviews_per_month
0,25/09/2024,city scrape,"Fascinating Rio, on the hills!",Your best option to stay if you want to visit ...,Marcia Longras,09/04/2011,"Rio de Janeiro, Brazil","I'm a Brazilian lady, I was born in Rio de Jan...",within a few hours,93%,...,1.0,4.69,4.82,4.58,4.9,4.85,4.9,4.64,11.0,0.22
1,25/09/2024,city scrape,STUDIO RIO LAPA CARNIVAL,"The studio is in an excellent location , dire...",Erika,02/12/2011,"Rio de Janeiro, Brazil","Sou Erika tenho 37 anos ,casada, formada em ...",within a day,50%,...,0.0,4.67,5.0,4.9,4.9,5.0,4.9,4.67,3.0,0.02
2,26/09/2024,city scrape,"5 min a pÃ© - Praia de Copacabana, MetrÃ´, WiFi I","Bedroom and living room, with a private bathro...",Renato / Louri,18/03/2011,"Rio de Janeiro, Brazil",*French version below \r\n\r\nWe host internat...,within an hour,99%,...,0.0,4.71,4.82,4.79,4.82,4.88,4.9,4.65,6.0,0.22
3,25/09/2024,city scrape,Quintal da Lagoa - RefÃºgio Secreto quase Ipa...,Our apartment is the perfect choice for those ...,Nilda,06/09/2011,"Rio de Janeiro, Brazil",Gosto de conhecer pessoas e novas culturas e ...,within an hour,100%,...,2.0,5.0,4.94,5.0,5.0,5.0,4.83,4.92,1.0,0.51
4,25/09/2024,city scrape,"Rio, hospedagem e tranquilidade",Your best option to stay if you want to visit ...,Jaqueline,04/12/2011,"Rio de Janeiro, Brazil",SimpÃ¡tica e gosto de novos amigos. Meu lazer...,within a day,50%,...,0.0,4.68,4.8,4.76,4.8,4.83,4.77,4.6,1.0,0.24


In [3]:
unico=np.unique(dfM['host_is_superhost'])
unico

array(['f', 't'], dtype=object)

In [4]:
unico=np.unique(dfM['host_response_time'])
unico

array(['a few days or more', 'within a day', 'within a few hours',
       'within an hour'], dtype=object)

In [5]:
unico=np.unique(dfM['availability_30'])
unico

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30])

In [6]:
dfM['host_is_superhost'] = dfM['host_is_superhost'].map({'t': 1, 'f': 0})

In [7]:
response_time_mapping = {
    'a few days or more': 4,   
    'within a day': 3,         
    'within a few hours': 2,    
    'within an hour': 1        
}

# Asegúrate de mapear la columna correctamente
dfM['host_response_time_numeric'] = dfM['host_response_time'].map(response_time_mapping)


In [8]:
print(dfM[['host_response_time', 'host_response_time_numeric']].head())

   host_response_time  host_response_time_numeric
0  within a few hours                           2
1        within a day                           3
2      within an hour                           1
3      within an hour                           1
4        within a day                           3


In [9]:
Vars_Indep=dfM[['host_is_superhost','host_response_time_numeric','availability_30']]
Var_Dep=dfM['instant_bookable'] #variable objetivo

In [10]:
X=Vars_Indep
y=Var_Dep

In [11]:
X_train, X_test, y_train, y_test=train_test_split(X,y, test_size=0.3, random_state=None)

In [12]:
#se escalan todos los datos
escalar= StandardScaler()

In [13]:
#para realizar el escalamiento de las variables X tanto de entrenamiento como de prueba 
X_train= escalar.fit_transform(X_train)
X_test= escalar.transform(X_test)

In [14]:
#definimos el algoritmo a utilizar
from sklearn.linear_model import LogisticRegression
algoritmo= LogisticRegression()

In [15]:
algoritmo.fit(X_train,y_train)

In [16]:
y_pred=algoritmo.predict(X_test)
y_pred

array(['f', 'f', 'f', ..., 'f', 'f', 'f'], shape=(11091,), dtype=object)

In [17]:
modelo = LogisticRegression(class_weight='balanced')
modelo.fit(X_train, y_train)
y_pred = modelo.predict(X_test)

In [18]:
#matriz de confusion
from sklearn.metrics import confusion_matrix
matriz= confusion_matrix(y_test, y_pred)
print('Matriz de Confusion:')
print(matriz)

Matriz de Confusion:
[[4588 4054]
 [ 624 1825]]


In [19]:
from sklearn.metrics import precision_score
precision = precision_score(y_test, y_pred , average='binary',pos_label='t')
print('Precision del modelo')
print(precision)

Precision del modelo
0.31042694335771387


In [20]:
#calcular exactitu del modelo
from sklearn.metrics import accuracy_score

exactitud=accuracy_score(y_test,y_pred)
print('Exactitud del modelo')
print(exactitud)

Exactitud del modelo
0.5782165719953115


In [21]:
#calcular sensibilidad del modelo
from sklearn.metrics import recall_score
sensibilidad=recall_score(y_test,y_pred,average='binary',pos_label='f')
print('Sensibilidad del modelo')
print(sensibilidad)

Sensibilidad del modelo
0.5308956260124971
