In [None]:
import pandas as pd
import numpy as np

class CustomerAnalytics:
    def __init__(self, data: pd.DataFrame):
        if not isinstance(data, pd.DataFrame):
            raise TypeError("Expected a pandas DataFrame")
        if 'customer_id' not in data.columns or 'purchase_amount' not in data.columns:
            raise ValueError("Missing required columns")
        self.data = data.copy()
        self.cleaned = False

    def clean_data(self):
        # Remove duplicates and fill missing purchase amounts with 0
        self.data.drop_duplicates(subset='customer_id', inplace=True)
        self.data['purchase_amount'].fillna(0, inplace=True)
        self.cleaned = True

    def calculate_total_spent(self):
        if not self.cleaned:
            raise RuntimeError("Data must be cleaned before calculation")
        return self.data.groupby('customer_id')['purchase_amount'].sum().reset_index(name='total_spent')

    def get_top_customers(self, n=5):
        totals = self.calculate_total_spent()
        return totals.sort_values(by='total_spent', ascending=False).head(n)

    def flag_high_value_customers(self, threshold=1000):
        totals = self.calculate_total_spent()
        totals['high_value'] = totals['total_spent'] > threshold
        return totals
