In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt


In [None]:
df_rev = pd.read_csv('calendar.csv')
df_rev.head()

In [None]:
def plot_corre_heatmap(corr):
    '''
    Definimos una función para ayudarnos a graficar un heatmap de correlación
    '''
    plt.figure(figsize=(12,10))
    sns.heatmap(corr, cbar = True,  square = False, annot=True, fmt= '.2f'
                ,annot_kws={'size': 15},cmap= 'coolwarm')
    plt.xticks(rotation = 45)
    plt.yticks(rotation = 45)
    # Arreglamos un pequeño problema de visualización
    b, t = plt.ylim() # discover the values for bottom and top
    b += 0.5 # Add 0.5 to the bottom
    t -= 0.5 # Subtract 0.5 from the top
    plt.ylim(b, t) # update the ylim(bottom, top) values
    plt.show()

In [None]:
corr = df_rev.corr()
plot_corre_heatmap(corr)

In [None]:
sns.countplot(x='minimum_nights', data=df_rev) #equivelante a sns.countplot(df.Conversion2)
plt.show()

In [None]:
sns.pairplot(df_rev, hue = 'price')
plt.show()

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_validate

In [None]:
dfresult = df_rev.dropna(how='all', axis=1)

In [None]:
from sklearn.tree import DecisionTreeClassifier

# Instanciamos un objeto de la clase DecisionTreeClassifier

clf = DecisionTreeClassifier(max_depth = 3, random_state = 42)

In [None]:
X = df_rev[['minimum_nights','listing_id','maximum_nights']]
y = df_rev['listing_id']

In [None]:
df_rev.replace([np.inf, -np.inf], np.nan, inplace=True)

In [None]:
df_rev.fillna(999, inplace=True)

In [None]:
X.fillna(X.mean(), inplace=True)

In [None]:
X_test = X.fillna(X.mean())

In [None]:
# Entrenamos el modelo

clf.fit(X.values,y.values)

In [None]:
print(clf.classes_)
print(clf.n_classes_)
print(clf.max_features_)
print(clf.feature_importances_)

In [None]:
y_pred = clf.predict(X.values)

In [None]:
from sklearn.metrics import accuracy_score

print(accuracy_score(y,y_pred))

In [None]:
plt.figure(figsize= (10,6))


ax = sns.scatterplot(x = X.iloc[:,0], y = X.iloc[:,1], hue= y, palette='Set2')
plt.legend().remove()


xlim = ax.get_xlim()
ylim = ax.get_ylim()
xx, yy = np.meshgrid(np.linspace(*xlim, num=200),
                      np.linspace(*ylim, num=200))
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)

contours = ax.contourf(xx, yy, Z, alpha=0.3, cmap = 'Set2')
plt.tight_layout()
plt.title('Fronteras de decisión', fontsize= 15)
plt.show()

In [None]:
from sklearn import tree

In [None]:
plt.figure(figsize = (10,7))
tree.plot_tree(clf, filled = True)
plt.show()

In [None]:
# Ploteamos la importancia de los atributos elegidos a la hora de hacer la predicción


plt.figure(figsize = (8,5))

importances = clf.feature_importances_
columns = X.columns
sns.barplot(x = columns, y = importances)
plt.title('Importancia de cada feature', fontsize = 15)
plt.show()

In [None]:
from sklearn.neighbors import KNeighborsClassifier

# Instanciamos un objeto de la clase KNeighborsClassifier

clf = KNeighborsClassifier(n_neighbors=100)

In [None]:

clf.fit(X.values,y.values)

In [None]:
y_pred = clf.predict(X.values)

In [None]:
print(accuracy_score(y,y_pred))

In [None]:
plt.figure(figsize= (10,6))
ax = sns.scatterplot(x = X.iloc[:,0], y = X.iloc[:,1], hue=y, palette='Set2')
plt.legend().remove()


xlim = ax.get_xlim()
ylim = ax.get_ylim()
xx, yy = np.meshgrid(np.linspace(*xlim, num=200),
                      np.linspace(*ylim, num=200))
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)

contours = ax.contourf(xx, yy, Z, alpha=0.3, cmap = 'Set2')
plt.tight_layout()
# plt.savefig('arbol_iris.png', dpi = 400)
plt.show()