In [9]:
# required packages
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

In [10]:
df_metric_score_by_provider_zero_impute = pd.read_pickle("../data/case_data_cleaned.pkl")

### Transform 01. Add target metric
The target metric is a constant for each metric_source. See the table below for the target.

| Metric Name | Metric Target | Metric Definition |
| --- | --- | --- |
| average_responses_agg_month | Average overall patient feedback score across initial and follow-up visits. This is in a scale 1-5 with 5 being the highest score | >= 4 |
| caseloads_months | Count of patients seen in the past 84 days (12 weeks). Target is based on average visit frequency and churn rate. | 337 |
| chart_review_months | % of chart reviews that passed our review. | >= 0.80 |
| documentation_rates_months | % notes and charge slips completed within 3 business days of the visit | >= 0.90 |
| mic_utilization_rate_months | Vsits where provider utilized MIC data/ visits with MIC data. | >= 0.80 |

Add the following columns for each entry:

* `metric_target`
* `has_met_target`

Let's transform the data to make it easier to identify the health of each metrics by each month and provider.
We will add `metric_target` and `has_met_target` columns.

In [11]:
def add_column_metric_target(df, metric_target_definition):
    """
    Adds a 'metric_target' column to a pandas DataFrame based on a dictionary.

    Args:
        df (pd.DataFrame): The input DataFrame.
        metric_target_definition (dict): A dictionary mapping 'metric_source' values to target scores.

    Returns:
        pd.DataFrame: The DataFrame with the added 'metric_target' column.
    """
    df['metric_target'] = df['metric_source'].map(metric_target_definition)
    return df

def add_column_has_met_target(df):
    """
    Adds a boolean 'as_met_target' column to a pandas DataFrame.

    Args:
        df (pd.DataFrame): The input DataFrame with 'metric_score' and 'metric_target' columns.

    Returns:
        pd.DataFrame: The DataFrame with the added 'as_met_target' column.
    """
    df['has_met_target'] = np.where(df['metric_score_imputed'].isnull(), np.nan, (df['metric_score_imputed'] >= df['metric_target']).astype(int))
    return df

In [12]:
metric_target_definition = {
    "average_responses_agg_month": 4,
    "caseloads_months": 337,
    "chart_review_months": 0.80,
    "documentation_rates_months": 0.90, 
    "mic_utilization_rate_months": 0.80
}

df_metric_score_by_provider_monthly_with_target = add_column_metric_target(
    df = df_metric_score_by_provider_zero_impute,
    metric_target_definition = metric_target_definition)

df_metric_score_by_provider_monthly_with_target = add_column_has_met_target(
    df = df_metric_score_by_provider_zero_impute)

df_metric_score_by_provider_monthly_with_target_sorted = df_metric_score_by_provider_monthly_with_target\
    .sort_values(by=['provider_id', 'metric_source', 'month_year'], ascending = [True, True, True])\
    .reset_index(drop=True)

df_metric_score_by_provider_monthly_with_target_sorted\
    .query("(provider_id == 3) & (metric_source == 'chart_review_months')")

Unnamed: 0,provider_id,month_year,metric_source,metric_score,metric_score_imputed,is_imputed,metric_target,has_met_target
144,3,2023-09-01,chart_review_months,,0.0,True,0.8,0.0
145,3,2023-10-01,chart_review_months,,0.0,True,0.8,0.0
146,3,2023-11-01,chart_review_months,,0.0,True,0.8,0.0
147,3,2023-12-01,chart_review_months,1.0,1.0,False,0.8,1.0
148,3,2024-01-01,chart_review_months,,0.0,True,0.8,0.0
149,3,2024-02-01,chart_review_months,,0.0,True,0.8,0.0
150,3,2024-03-01,chart_review_months,1.0,1.0,False,0.8,1.0
151,3,2024-04-01,chart_review_months,,0.0,True,0.8,0.0
152,3,2024-05-01,chart_review_months,,0.0,True,0.8,0.0
153,3,2024-06-01,chart_review_months,,0.0,True,0.8,0.0


### Transform 02. Add target meet/miss streaks

In [None]:
def count_consecutive_misses_individual_metric(group):
    # Function to calculate consecutive misses
    count = 0
    for i in range(len(group)):
        if group.iloc[i]['has_met_target'] == 0:
            count += 1
        else:
            count = 0
        group.at[group.index[i], 'num_month_streak_metric_missed_target'] = count
    return group

def count_consecutive_misses_monthly_metric(group):
    # Function to calculate consecutive misses
    count = 0
    for i in range(len(group)):
        if group.iloc[i]['has_met_total_metric_count'] == 0:
            count += 1
        else:
            count = 0
        group.at[group.index[i], 'num_month_streak_metric_total_missed_target'] = count
    return group

def calculate_consecutive_misses(df, granularity, total_metric_count_threashold = 3):
    # Sort the dataframe by provider_id, metric_source, and month_year
    df = df.sort_values(by=['provider_id', 'metric_source', 'month_year'])
    
    if granularity == 'individual_metric':
        # Initialize a new column for counting consecutive misses
        df['num_month_streak_metric_missed_target'] = 0

        # Apply the function to each group
        df = df.groupby(['provider_id', 'metric_source'], group_keys=False)\
                .apply(count_consecutive_misses_individual_metric)
        
        # Reset index to ensure single index dataframe
        df = df.reset_index(drop=True)

    elif granularity == 'total_metrics_count':

        df = df.groupby(['provider_id', 'month_year'])\
            .agg(
                total_num_metric_target_met = ('has_met_target', 'sum'))\
            .reset_index()

        # Add the has_met_target_total_metric_count column
        df['has_met_total_metric_count'] = (
            df['total_num_metric_target_met'] >= total_metric_count_threashold
        ).astype(int)

        # Initialize a new column for counting consecutive misses
        df['num_month_streak_metric_total_missed_target'] = 0

        # Apply the function to each group
        df = df.groupby(['provider_id'], group_keys=False)\
                .apply(count_consecutive_misses_monthly_metric)

    return df


In [14]:
df_metric_score_metric_target_miss_streak = calculate_consecutive_misses(
    df_metric_score_by_provider_monthly_with_target_sorted, 'individual_metric')

df_metric_score_metric_target_miss_streak\
    .query("(provider_id == 3) & (metric_source == 'chart_review_months')")

  .apply(count_consecutive_misses_individual_metric)


Unnamed: 0,provider_id,month_year,metric_source,metric_score,metric_score_imputed,is_imputed,metric_target,has_met_target,num_month_streak_metric_missed_target
144,3,2023-09-01,chart_review_months,,0.0,True,0.8,0.0,1
145,3,2023-10-01,chart_review_months,,0.0,True,0.8,0.0,2
146,3,2023-11-01,chart_review_months,,0.0,True,0.8,0.0,3
147,3,2023-12-01,chart_review_months,1.0,1.0,False,0.8,1.0,0
148,3,2024-01-01,chart_review_months,,0.0,True,0.8,0.0,1
149,3,2024-02-01,chart_review_months,,0.0,True,0.8,0.0,2
150,3,2024-03-01,chart_review_months,1.0,1.0,False,0.8,1.0,0
151,3,2024-04-01,chart_review_months,,0.0,True,0.8,0.0,1
152,3,2024-05-01,chart_review_months,,0.0,True,0.8,0.0,2
153,3,2024-06-01,chart_review_months,,0.0,True,0.8,0.0,3


## Transform 03. Add underperformance streaks

Underperformance is defined as missing 3 out of target metrics for three or more consequtive months.

In [15]:
df_metric_score_metric_underperformance_streak = calculate_consecutive_misses(
    df_metric_score_by_provider_monthly_with_target_sorted, 'total_metrics_count', 3)

df_metric_score_metric_underperformance_streak\
    .query("(provider_id == 3)")

  .apply(count_consecutive_misses_monthly_metric)


Unnamed: 0,provider_id,month_year,total_num_metric_target_met,has_met_total_metric_count,num_month_streak_metric_total_missed_target
24,3,2023-09-01,0.0,0,1
25,3,2023-10-01,0.0,0,2
26,3,2023-11-01,0.0,0,3
27,3,2023-12-01,1.0,0,4
28,3,2024-01-01,1.0,0,5
29,3,2024-02-01,1.0,0,6
30,3,2024-03-01,3.0,1,0
31,3,2024-04-01,1.0,0,1
32,3,2024-05-01,3.0,1,0
33,3,2024-06-01,3.0,1,0


## Next Step

In [16]:
df_metric_score_metric_target_miss_streak\
    .to_pickle("../data/case_data_target_miss_streak.pkl")

df_metric_score_metric_underperformance_streak\
    .to_pickle("../data/case_data_performance_miss_streak.pkl")