# 07 - Data Quality Checks

Build checks for completeness, validity, and consistency.


## 1. Create a dataset with issues


In [1]:
import pandas as pd

bookings = pd.DataFrame({
    'booking_id': ['B-1001', 'B-1002', 'B-1003', 'B-1003'],
    'guest_id': ['G-1', 'G-2', None, 'G-3'],
    'revenue': [120.0, -40.0, 200.0, 200.0],
    'check_in': ['2026-01-10', '2026-01-12', '2026-01-15', '2026-01-15'],
    'check_out': ['2026-01-12', '2026-01-11', '2026-01-17', '2026-01-17'],
    'channel': ['Direct', 'OTA', 'Direct', 'Direct'],
})

bookings


Unnamed: 0,booking_id,guest_id,revenue,check_in,check_out,channel
0,B-1001,G-1,120.0,2026-01-10,2026-01-12,Direct
1,B-1002,G-2,-40.0,2026-01-12,2026-01-11,OTA
2,B-1003,,200.0,2026-01-15,2026-01-17,Direct
3,B-1003,G-3,200.0,2026-01-15,2026-01-17,Direct


## 2. Completeness checks


In [None]:
missing_rates = bookings.isna().mean().sort_values(ascending=False)
missing_rates


## 3. Uniqueness checks


In [None]:
duplicate_rows = bookings[bookings.duplicated(subset=['booking_id'], keep=False)]
duplicate_rows


## 4. Validity checks


In [None]:
invalid_revenue = bookings[bookings['revenue'] < 0]
invalid_revenue


## 5. Consistency checks


In [None]:
bookings['check_in'] = pd.to_datetime(bookings['check_in'])
bookings['check_out'] = pd.to_datetime(bookings['check_out'])

invalid_stays = bookings[bookings['check_out'] <= bookings['check_in']]
invalid_stays


## 6. Quick quality report


In [None]:
def quality_report(frame):
    return {
        'row_count': int(frame.shape[0]),
        'missing_guest_id_rate': float(frame['guest_id'].isna().mean()),
        'duplicate_booking_ids': int(frame['booking_id'].duplicated().sum()),
        'negative_revenue_rows': int((frame['revenue'] < 0).sum()),
        'invalid_stay_rows': int((frame['check_out'] <= frame['check_in']).sum()),
    }

quality_report(bookings)


## Next Steps

Continue your learning with:
- **08_time_series_basics.ipynb** - Time series fundamentals
- **10_sql_analytics.ipynb** - SQL patterns with Python
