In [None]:
def detect_outliers(data, column_name):


    data = data[[column_name, 'organisationunitname']].copy()

    # Placeholder for the results
    outlier_results = []

    for unit in data['organisationunitname'].unique():
        print('processing unit:', unit)

        sample_fac_data = data[data['organisationunitname'] == unit]
        period_names = sample_fac_data.index

        try:
            # Data validation and imputation
            data_test = sample_fac_data.drop('organisationunitname', axis=1)
            data_test = validate_series(data_test)
            quantile_ad = QuantileAD(high=0.999999999, low=0.0000001)
            anomaly_scores = quantile_ad.fit_detect(data_test) 
            # Create results DataFrame
            anomalies = pd.DataFrame(index=period_names)
            anomalies['organisationunitname'] = unit
            anomalies[column_name] = data_test[column_name]  
            #anomalies['outlier'] = outlier_flags
            anomalies['outlier'] = anomaly_scores != 0

            outlier_results.append(anomalies)

        except Exception as e:
            print(f"Error during processing for unit {unit}: {e}. Skipping this unit.")
            continue

    # Concatenate results
    all_outliers = pd.concat(outlier_results, ignore_index=False)

        # New code to aggregate outlier counts per facility
    outlier_counts = all_outliers.groupby('organisationunitname')['outlier'].sum().reset_index()
    outlier_counts.columns = ['Facility', 'Outlier Count']
    outlier_counts.sort_values('Outlier Count', ascending=False, inplace=True)
    
    return all_outliers , outlier_counts

def validate_series(data_test):
    """Validates the data and handles missing values.
    """
    # Other validation checks (ensure data type, etc.)
    data_test = data_test.fillna(data_test.mean())  # Impute missing values with the mean
    return data_test