# Dynamic Pricing for Airbnb – Cape Town
This project models optimal nightly rates for Airbnb listings using historical data, demand/supply signals, and predictive modeling.

## Data Loading

In [None]:
import pandas as pd

# Load datasets
listings = pd.read_csv("listings.csv.gz")
calendar = pd.read_csv("calendar.csv.gz")
reviews = pd.read_csv("reviews.csv.gz")
neighbourhoods = pd.read_csv("neighbourhoods.csv")

## Data Cleaning

In [None]:
# Drop irrelevant columns, handle missing values
listings_clean = listings.dropna(subset=["price", "room_type", "neighbourhood"])
calendar["date"] = pd.to_datetime(calendar["date"])
calendar["price"] = calendar["price"].replace('[\$,]', '', regex=True).astype(float)

## Exploratory Data Analysis (EDA)

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Price distribution
sns.histplot(listings_clean["price"], bins=50)
plt.title("Price Distribution")

# Occupancy trends
calendar["available"] = calendar["available"].map({"t": 1, "f": 0})
occupancy_rate = calendar.groupby("date")["available"].mean()
occupancy_rate.plot(title="Daily Occupancy Rate")


## Feature Engineering

In [None]:
# Booking lead time
calendar["lead_time"] = (calendar["date"] - pd.to_datetime("today")).dt.days

# Host type
listings_clean["multi_listing_host"] = listings_clean["host_listings_count"] > 1

# Seasonality
calendar["month"] = calendar["date"].dt.month

## Modeling

In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

# Merge calendar with listings
merged = calendar.merge(listings_clean, on="listing_id")

# Select features
features = merged[["lead_time", "month", "room_type", "neighbourhood_cleansed", "multi_listing_host"]]
features = pd.get_dummies(features)
target = merged["price"]

# Train model
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2)
model = RandomForestRegressor()
model.fit(X_train, y_train)

## Simulation & Adjustment

In [None]:
# Apply demand/supply multipliers
def adjust_price(base, demand_mult, supply_mult, d_thresh=0.8, s_thresh=0.9):
    return base * max(demand_mult, d_thresh) * max(supply_mult, s_thresh)

# Example simulation
adjusted = adjust_price(base=1200, demand_mult=1.2, supply_mult=0.85)

## Visualization

In [None]:
# Compare original vs. adjusted prices
plt.plot([1200, adjusted], marker='o')
plt.xticks([0, 1], ["Original", "Adjusted"])
plt.title("Price Adjustment Simulation")

## Conclusion & Next Steps

In [None]:
# Summary
- Modeled dynamic pricing using historical and contextual features
- Simulated price adjustments based on demand/supply
- Future work: integrate review sentiment, event calendars, and real-time APIs