In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix

# Load your data
df = pd.read_csv('final_airbnb_dataset.csv')

In [3]:
df['adjusted_reviews'] = df['number_of_reviews_x'].apply(lambda x: np.log1p(x) if x > 0 else -1)

X = df[['distance_score', 'price_score', 'number_of_reviews_x']]
y = df['booked']

# Step 1: Standardize features (very important for neural networks)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Step 2: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [4]:
model = MLPClassifier(hidden_layer_sizes=(16, 8),  # You can try (16,) for just one hidden layer
                      activation='relu',           # ReLU is good for non-linear relationships
                      solver='adam',               # Adam optimizer works well for most cases
                      max_iter=500,
                      random_state=42)

# Step 4: Train the model
model.fit(X_train, y_train)

# Step 5: Evaluate the model
y_pred = model.predict(X_test)
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# (Optional) Confusion Matrix
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))



Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      6774
           1       0.99      1.00      0.99       783

    accuracy                           1.00      7557
   macro avg       0.99      1.00      1.00      7557
weighted avg       1.00      1.00      1.00      7557


Confusion Matrix:
[[6766    8]
 [   0  783]]


In [5]:
df['predicted_proba'] = model.predict_proba(X_scaled)[:, 1]

# Sort Airbnbs by probability of being booked
top_airbnbs = df.sort_values(by='predicted_proba', ascending=False).head(10)
print("\nTop 10 Recommended Airbnbs:\n", top_airbnbs[['distance_score', 'price_score', 'number_of_reviews_x', 'predicted_proba']])



Top 10 Recommended Airbnbs:
        distance_score  price_score  number_of_reviews_x  predicted_proba
25496        0.864084     0.672372                    0              1.0
32039        0.864084     0.600831                   31              1.0
807          0.864084     0.497176                   82              1.0
6741         0.864084     0.557202                    0              1.0
1486         0.864084     0.652348                  375              1.0
19802        0.864084     0.664739                    5              1.0
23207        0.864084     0.672372                    3              1.0
5207         0.864084     0.488312                    0              1.0
2154         0.864084     0.488312                    0              1.0
17168        0.864084     0.540145                    1              1.0


In [6]:
import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(10, 6))
sns.histplot(data=test_df, x='predicted_proba', hue='booked', bins=30, kde=True, palette='Set1')
plt.title("Distribution of Prediction Probabilities by Booking Status")
plt.xlabel("Predicted Probability of Being Booked")
plt.ylabel("Count")
plt.show()


ModuleNotFoundError: No module named 'seaborn'