# Modeling Example
This notebook merges the cleaned datasets and fits a simple regression model predicting occupancy from price and star rating.

In [None]:
from pathlib import Path
import pandas as pd

In [None]:
clean_dir = Path('Data/clean')
acc = pd.read_csv(clean_dir / 'accommodation_facts_clean.csv')
market = pd.read_csv(clean_dir / 'market_otb_clean.csv', parse_dates=['stay_date'])
rates = pd.read_csv(clean_dir / 'rates_combined.csv', parse_dates=['stay_date', 'extract_date'])

## Prepare features

In [None]:
rates_with_dest = rates.merge(acc[['bookingdotcom_id', 'travel_destination_name', 'stars']], on='bookingdotcom_id', how='left')
agg_rates = rates_with_dest.groupby('travel_destination_name').agg(avg_rate=('price_value', 'mean'), stars=('stars', 'mean'))
agg_occ = market.groupby('travel_destination_name').agg(avg_occ=('average_occupancy', 'mean'))
data = agg_rates.join(agg_occ).dropna()

## Fit a regression model

In [None]:
from sklearn.linear_model import LinearRegression
X = data[['avg_rate', 'stars']]
y = data['avg_occ']
model = LinearRegression().fit(X, y)
model.coef_, model.intercept_