# Clean Data Analysis
This notebook continues the exploration by loading the cleaned CSV files and computing basic summary statistics.

In [ ]:
from pathlib import Path
import pandas as pd

In [ ]:
data_dir = Path('Data/clean')
acc = pd.read_csv(data_dir / 'accommodation_facts_clean.csv')
market = pd.read_csv(data_dir / 'market_otb_clean.csv', parse_dates=['stay_date'])
rates = pd.read_csv(data_dir / 'rates_combined.csv', parse_dates=['stay_date', 'extract_date'])

In [ ]:
print(f'Accommodation records: {len(acc)}')
print(f'Market rows: {len(market)}')
print(f'Rates rows: {len(rates)}')

## Average Rate by Destination

In [ ]:
rates_with_dest = rates.merge(acc[['bookingdotcom_id','travel_destination_name']], on='bookingdotcom_id', how='left')
dest_rate = rates_with_dest.groupby('travel_destination_name')['price_value'].mean().sort_values(ascending=False)
dest_rate.head()

## Average Occupancy by Destination

In [ ]:
dest_occ = market.groupby('travel_destination_name')['average_occupancy'].mean().sort_values(ascending=False)
dest_occ.head()

## Combine Rate and Occupancy

In [ ]:
summary = pd.DataFrame({'avg_rate': dest_rate, 'avg_occupancy': dest_occ})
summary.dropna().head()