In [1]:
%load_ext lab_black

In [2]:
import numpy as np
import pandas as pd
from pyprojroot import here
import os

path_data = here("./data")
os.chdir(path_data)

In [3]:
np.random.seed(0)
n_rows_provider = 10000
n_rows_member = 100000

provider_index = np.random.normal(0, 1, n_rows_provider)
member_index = np.random.normal(0, 1, n_rows_member)

In [4]:
# Create member IDs
member_ids = ["M" + str(i) for i in range(1, n_rows_member + 1)]

# Create provider IDs
provider_ids = ["P" + str(i) for i in range(1, n_rows_provider + 1)]

In [5]:
import pandas as pd

# Assuming provider_ids, member_ids, provider_index, and member_index are already defined

# Convert lists to Pandas Series
provider_ids_series = pd.Series(provider_ids)
member_ids_series = pd.Series(member_ids)

# Convert the numpy arrays to Pandas Series
provider_index_series = pd.Series(provider_index)
member_index_series = pd.Series(member_index)

# Concatenate the Series
data_provider_index = pd.concat([provider_ids_series, provider_index_series], axis=1)
data_member_index = pd.concat([member_ids_series, member_index_series], axis=1)

# Optionally, you can give names to the columns
data_provider_index.columns = ["Provider ID", "Provider index"]
data_provider_index = data_provider_index.round(2)
data_member_index.columns = ["Member ID", "Member index"]
data_member_index = data_member_index.round(2)

In [6]:
# Number of members and providers

# Generating PMPM
mean_pmpm, sd_pmpm = 500, 100
data_member_index["PMPM"] = np.random.normal(mean_pmpm, sd_pmpm, n_rows_member)
data_member_index["PMPM"] = data_member_index["PMPM"].clip(
    0, 20000
)  # Ensure PMPM is between 0 and 20000


# Generating HEDIS, Prevention, Generic, Social Vulnerability Index
data_member_index["HEDIS"] = np.random.uniform(0, 1, n_rows_member)
data_member_index["Prevention"] = np.random.uniform(0, 1, n_rows_member)
data_member_index["Generic"] = np.random.uniform(0, 1, n_rows_member)
data_member_index["SocialVulnerabilityIndex"] = np.random.uniform(0, 1, n_rows_member)

In [7]:
# Adjusting values based on provider index percentile
# Assuming provider_index has a column 'ZScore' indicating the z-score
bottom_10_percentile = data_member_index["Member index"] <= data_member_index[
    "Member index"
].quantile(0.1)
bottom_providers = data_member_index[bottom_10_percentile]["Member ID"]

# Adjust values for members with providers in bottom 10 percentile

adjustment_factor = 0.3  # Example adjustment factor
data_member_index.loc[
    data_member_index["Member ID"].isin(bottom_providers), "PMPM"
] *= (1 + adjustment_factor)
data_member_index.loc[
    data_member_index["Member ID"].isin(bottom_providers), "HEDIS"
] *= (1 - adjustment_factor)
data_member_index.loc[
    data_member_index["Member ID"].isin(bottom_providers), "Prevention"
] *= (1 - adjustment_factor)
data_member_index.loc[
    data_member_index["Member ID"].isin(bottom_providers), "Generic"
] *= (1 - adjustment_factor)
data_member_index.loc[
    data_member_index["Member ID"].isin(bottom_providers), "SocialVulnerabilityIndex"
] *= (1 + adjustment_factor)


data_member_index

Unnamed: 0,Member ID,Member index,PMPM,HEDIS,Prevention,Generic,SocialVulnerabilityIndex
0,M1,-0.20,600.702482,0.221901,0.729904,0.921946,0.185367
1,M2,-0.83,351.850541,0.152828,0.229193,0.478179,0.756308
2,M3,1.73,404.559448,0.273582,0.946263,0.575789,0.651382
3,M4,0.19,556.162112,0.983603,0.766034,0.281967,0.124627
4,M5,-0.18,456.119559,0.266594,0.580547,0.972669,0.278419
...,...,...,...,...,...,...,...
99995,M99996,1.68,620.222439,0.196263,0.720796,0.214165,0.176862
99996,M99997,0.54,366.646315,0.089004,0.210240,0.262633,0.194530
99997,M99998,0.71,540.351391,0.313010,0.581753,0.013070,0.945721
99998,M99999,-0.45,572.711521,0.028841,0.051847,0.229373,0.121864


In [8]:
# Ensuring the values are within the specified ranges
data_member_index["HEDIS"] = data_member_index["HEDIS"].clip(0, 1)
data_member_index["Prevention"] = data_member_index["Prevention"].clip(0, 1)
data_member_index["Generic"] = data_member_index["Generic"].clip(0, 1)
data_member_index["SocialVulnerabilityIndex"] = data_member_index[
    "SocialVulnerabilityIndex"
].clip(0, 1)

data_member_index

Unnamed: 0,Member ID,Member index,PMPM,HEDIS,Prevention,Generic,SocialVulnerabilityIndex
0,M1,-0.20,600.702482,0.221901,0.729904,0.921946,0.185367
1,M2,-0.83,351.850541,0.152828,0.229193,0.478179,0.756308
2,M3,1.73,404.559448,0.273582,0.946263,0.575789,0.651382
3,M4,0.19,556.162112,0.983603,0.766034,0.281967,0.124627
4,M5,-0.18,456.119559,0.266594,0.580547,0.972669,0.278419
...,...,...,...,...,...,...,...
99995,M99996,1.68,620.222439,0.196263,0.720796,0.214165,0.176862
99996,M99997,0.54,366.646315,0.089004,0.210240,0.262633,0.194530
99997,M99998,0.71,540.351391,0.313010,0.581753,0.013070,0.945721
99998,M99999,-0.45,572.711521,0.028841,0.051847,0.229373,0.121864


In [9]:
data_member_high_risk = pd.DataFrame(bottom_10_percentile).rename(
    columns={"Member index": "High risk member"}
)

data_member_high_risk

Unnamed: 0,High risk member
0,False
1,False
2,False
3,False
4,False
...,...
99995,False
99996,False
99997,False
99998,False


In [10]:
# Add back high risk index

data_member_index = pd.concat([data_member_index, data_member_high_risk], axis=1)

In [11]:
data_member_index.columns

Index(['Member ID', 'Member index', 'PMPM', 'HEDIS', 'Prevention', 'Generic',
       'SocialVulnerabilityIndex', 'High risk member'],
      dtype='object')

In [16]:
data_member_index.to_csv("data_member_index.csv", index=False)

In [14]:
data_member_index_means = (
    data_member_index.drop(columns=["Member ID"])
    .groupby("High risk member")
    .mean()
    .round(2)
).reset_index()

def what_if_analysis(n_members_to_move, high_risk_pmpm, average_pmpm):
    """
    Calculate the savings in PMPM by moving a specified number of high-risk members to average risk.

    Args:
    n_members_to_move (int): Number of high-risk members to move to average risk.
    high_risk_pmpm (float): Average PMPM for high-risk members.
    average_pmpm (float): Average PMPM for average members.

    Returns:
    float: Total savings in PMPM.
    """
    # Calculate the difference in PMPM for one member
    pmpm_difference_per_member = high_risk_pmpm - average_pmpm

    # Calculate total savings
    total_savings = pmpm_difference_per_member * n_members_to_move

    return total_savings


# Example usage
high_risk_average_pmpm = data_member_index_means[
    data_member_index_means["High risk member"] == True
]["PMPM"].reset_index(drop=True)
# Example average PMPM for high-risk members
average_member_pmpm = data_member_index_means[
    data_member_index_means["High risk member"] == False
]["PMPM"].reset_index(
    drop=True
)  # Example average PMPM for average members
n_members_to_move = 10  # Number of members to move from high risk to average

savings = what_if_analysis(
    n_members_to_move, high_risk_average_pmpm, average_member_pmpm
)
print(f"Total savings in PMPM: {savings}")

Total savings in PMPM: 0    1507.2
Name: PMPM, dtype: float64
