In [1]:
import pandas as pd 


In [5]:
# Load the dataset and display its structure
data = pd.read_csv("data-20.csv")
data.info()

# Selecting columns of interest
columns_of_interest = ["organisationunitname", "105-EP01b. Malaria Total", "105-EP01d. Malaria cases treated"]
df = data[columns_of_interest]

# Transforming the data into long format for easier analysis
long_df = pd.melt(df, id_vars=['organisationunitname'], var_name='Metric', value_name='Value')
print(long_df.head())

# Define a function to flag outliers within each group
def flag_outliers(group):
    q1, q3 = group['Value'].quantile([0.25, 0.75])
    iqr = q3 - q1
    lower_bound, upper_bound = q1 - 1.5 * iqr, q3 + 1.5 * iqr
    
    # Vectorized operation to flag outliers
    group['possible_outlier'] = ['possible' if (x < lower_bound or x > upper_bound) else 'no' for x in group['Value']]
    return group

# Apply outlier detection and flagging, grouped by 'organisationunitname' and 'Metric'
long_df = long_df.groupby(['organisationunitname', 'Metric'], as_index=False).apply(flag_outliers)


# Filter to only include rows where 'possible_outlier' is 'possible'
outliers_df = long_df[long_df['possible_outlier'] == 'possible']

outliers_df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4188 entries, 0 to 4187
Data columns (total 20 columns):
 #   Column                                                         Non-Null Count  Dtype  
---  ------                                                         --------------  -----  
 0   periodid                                                       4188 non-null   int64  
 1   periodname                                                     4188 non-null   object 
 2   periodcode                                                     4188 non-null   int64  
 3   perioddescription                                              0 non-null      float64
 4   organisationunitid                                             4188 non-null   object 
 5   organisationunitname                                           4188 non-null   object 
 6   organisationunitcode                                           12 non-null     object 
 7   organisationunitdescription                                 

Unnamed: 0,Unnamed: 1,organisationunitname,Metric,Value,possible_outlier
20,708,Agonga Karine Community Health Centre II,105-EP01b. Malaria Total,140.0,possible
21,4896,Agonga Karine Community Health Centre II,105-EP01d. Malaria cases treated,140.0,possible
28,1061,Akurumor Health Centre II,105-EP01b. Malaria Total,273.0,possible
28,2457,Akurumor Health Centre II,105-EP01b. Malaria Total,236.0,possible
29,5249,Akurumor Health Centre II,105-EP01d. Malaria cases treated,273.0,possible
