# ROI testing
Using German Credit data, compare `Predicted ROI` and `Actual ROI`

In [1]:
import pandas
from sklearn.metrics import classification_report

In [2]:
def get_type(row):
    if row['score'] == row['label_value']:
        if row['score'] == 0:
            return 'TN'
        else:
            return 'TP'
    else:
        if row['score'] == 0:
            return 'FN'
        else:
            return 'FP'

In [3]:
cost_matrix = {
    "TP": 1.5,
    "FP": -2,
    "TN": 2,
    "FN": -1.5,
}

In [4]:
df_baseline = pandas.read_json('df_baseline_scored.json', orient='records', lines=True)
df_sample = pandas.read_json('df_sample_scored.json', orient='records', lines=True)

In [5]:
df_baseline = df_baseline[['credit_amount', 'label_value', 'score']]
df_sample = df_sample[['credit_amount', 'label_value', 'score']]

In [6]:
baseline_metrics = classification_report(df_baseline['label_value'], df_baseline['score'], output_dict=True)
baseline_TPR = baseline_metrics['1']['recall']
baseline_TNR = baseline_metrics['0']['recall']

In [7]:
df_baseline['type'] = df_baseline.apply(get_type, axis=1)
df_sample['type'] = df_sample.apply(get_type, axis=1)

In [8]:
predicted = 0
for ix, row in df_sample.iterrows():
    if row['score'] == 0:
        predicted += row['credit_amount'] * (baseline_TNR * cost_matrix['TN'] + (1 - baseline_TNR) * cost_matrix['FN'])
    else:
        predicted += row['credit_amount'] * (baseline_TPR * cost_matrix['TP'] + (1 - baseline_TPR) * cost_matrix['FP'])
print(f'{round(predicted, 2):,}')

567,907.73


In [9]:
actual = 0
for ix, row in df_sample.iterrows():
    actual += row['credit_amount'] * cost_matrix[row['type']]
print(f'{round(actual, 2):,}')

389,654.5


In [28]:
print(f'Difference of ${round(abs(actual-predicted), 2):,}')
print(f'Difference of {100*round(abs((actual-predicted)/actual)}%')

Difference of $178,253.23
Difference of 46.0%


In [15]:
sample_metrics = classification_report(df_sample['label_value'], df_sample['score'], output_dict=True)
sample_TPR = sample_metrics['1']['recall']
sample_TNR = sample_metrics['0']['recall']

In [32]:
print(f'TNR Percent difference of {round((baseline_TNR - sample_TNR)/baseline_TNR, 3) * 100}%')

TNR Percent difference of 9.0%


In [33]:
print(f'TPR Percent difference of {round((baseline_TPR - sample_TPR)/baseline_TPR, 3) * 100}%')

TPR Percent difference of 6.3%


In [37]:
# modelop.init
def begin():
    """A function to declare model-specific variables used in ROI computation"""
    global amount_field, label_field, score_field
    global baseline_metrics, cost_multipliers

    amount_field = "credit_amount"  # Column containing transaction amount
    label_field = "label_value"  # Column containing ground_truth
    score_field = "score"  # Column containing model prediction

    # Classification metrics on baseline data
    baseline_metrics = {
        # old TNR
        "TNR": .88,
        # changing old TPR value of 0.2435474006116208
        "TPR": .7,
    }
    # ROI cost multipliers for each classification case
    cost_multipliers = {
        "TP": 1.5,
        "FP": -2,
        "TN": 2,
        "FN": -1.5,
    }
    pass

In [38]:
# modelop.metrics
def metrics(df_sample):
    """Function to classify records & compute actual ROI given labeled & scored DataFrame.

    Args:
        df_sample (pd.DataFrame): Slice of Production data

    Yields:
        dict: Name of transaction field and actual ROI
    """

    # Classify each record in dataframe
    # Positive Class Labeled as 1
    # Negative Class labeled as 0

    for idx in range(len(df_sample)):
        if df_sample.iloc[idx][label_field] == df_sample.iloc[idx][score_field]:
            df_sample["record_class"] = (
                "TP" if df_sample.iloc[idx][label_field] == 1 else "TN"
            )
        elif df_sample.iloc[idx][label_field] < df_sample.iloc[idx][score_field]:
            df_sample["record_class"] = "FP"
        else:
            df_sample["record_class"] = "FN"

    # Compute actual and projected ROIs
    actual_roi = compute_actual_roi(df_sample)
    projected_roi = compute_projected_roi(df_sample)

    return {
        "actual_roi": actual_roi,
        "projected_roi": projected_roi,
        "amount_field": amount_field,
        "ROI": [
            {
                "test_name": "ROI",
                "test_category": "ROI",
                "test_type": "ROI",
                "test_id": "ROI",
                "values": {
                    "actual_roi": actual_roi,
                    "projected_roi": projected_roi,
                    "amount_field": amount_field,
                    "baseline_metrics": baseline_metrics,
                    "cost_multipliers": cost_multipliers,
                },
            }
        ],
    }

In [39]:
def compute_actual_roi(data):
    """Helper function to compute actual ROI.

    Args:
        data (pd.DataFrame): Input DataFrame containing record_class

    Returns:
        float: actual ROI
    """
    actual_roi = 0
    for idx in range(len(data)):
        actual_roi += (
            data.iloc[idx][amount_field]
            * cost_multipliers[data.iloc[idx]["record_class"]]
        )

    return round(actual_roi, 2)

In [45]:
def compute_projected_roi(data):
    """Helper function to compute projected ROI.

    Args:
        data (pd.DataFrame): Input DataFrame containing record_class

    Returns:
        float: projected ROI
    """
    projected_roi = 0
    for idx in range(len(data)):
        projected_roi += data.iloc[idx][amount_field] * (
            (data.iloc[idx][score_field] == 1)
            * (
                baseline_metrics["TPR"] * cost_multipliers["TP"]
                + (1 - baseline_metrics["TPR"] * cost_multipliers["FP"])
            )
            + (data.iloc[idx][score_field] == 0)
            * (
                baseline_metrics["TNR"] * cost_multipliers["TN"]
                + (1 - baseline_metrics["TNR"] * cost_multipliers["FN"])
            )
        )

    return round(projected_roi, 2)

In [46]:
def parse_results(results):
    print(f"Actual ROI : {round(results['actual_roi']):,}")
    print(f"Projected ROI : {round(results['projected_roi']):,}")

In [47]:
begin()

In [48]:
results = metrics(df)

In [49]:
parse_results(results)

Actual ROI : 5,184,598
Projected ROI : 9,802,546
