In [3]:
import pandas as pd
import numpy as np

# Set the random seed for reproducibility
np.random.seed(42)

# Generate the initial dataset
n = 1000  # Number of data points
data = {
    'Member ID': np.arange(1, n + 1),
    'Age': np.random.randint(18, 90, size=n),
    'Gender': np.random.choice(['Male', 'Female'], size=n),
    'Number of Chronic Conditions': np.random.randint(0, 6, size=n),
    'Generic Rate': np.random.randint(0, 101, size=n),
    'Total Claims': np.random.randint(1, 501, size=n),
    'Inpatient Percent': np.random.randint(0, 101, size=n)
}

df = pd.DataFrame(data)

# Generate predicted values by applying a random change
df['Generic Rate Predicted'] = df['Generic Rate'] + np.random.randint(-10, 11, size=n)
df['Total Claims Predicted'] = df['Total Claims'] + np.random.randint(-50, 51, size=n)
df['Inpatient Percent Predicted'] = df['Inpatient Percent'] + np.random.randint(-10, 11, size=n)

# Calculate adjusted values
df['Generic Rate Adjusted'] = df['Generic Rate'] - df['Generic Rate Predicted']
df['Total Claims Adjusted'] = df['Total Claims'] - df['Total Claims Predicted']
df['Inpatient Percent Adjusted'] = df['Inpatient Percent'] - df['Inpatient Percent Predicted']

# Calculate standardized adjusted values
df['Generic Rate Adjusted Standardized'] = (df['Generic Rate Adjusted'] - df['Generic Rate Adjusted'].mean()) / df['Generic Rate Adjusted'].std()
df['Total Claims Adjusted Standardized'] = (df['Total Claims Adjusted'] - df['Total Claims Adjusted'].mean()) / df['Total Claims Adjusted'].std()
df['Inpatient Percent Adjusted Standardized'] = (df['Inpatient Percent Adjusted'] - df['Inpatient Percent Adjusted'].mean()) / df['Inpatient Percent Adjusted'].std()

# Reverse the standardized scores for Total Claims and Inpatient Percent
df['Total Claims Adjusted Standardized'] *= -1
df['Inpatient Percent Adjusted Standardized'] *= -1

# Calculate the Mede Opportunity Score
df['Mede Opportunity Score'] = (
    df['Generic Rate Adjusted Standardized'] +
    df['Total Claims Adjusted Standardized'] +
    df['Inpatient Percent Adjusted Standardized']
)

# Round all numerical values to 2 decimal places
df = df.round(2)

df.head()


Unnamed: 0,Member ID,Age,Gender,Number of Chronic Conditions,Generic Rate,Total Claims,Inpatient Percent,Generic Rate Predicted,Total Claims Predicted,Inpatient Percent Predicted,Generic Rate Adjusted,Total Claims Adjusted,Inpatient Percent Adjusted,Generic Rate Adjusted Standardized,Total Claims Adjusted Standardized,Inpatient Percent Adjusted Standardized,Mede Opportunity Score
0,1,69,Male,5,40,264,67,44,233,68,-4,31,-1,-0.64,-1.08,0.19,-1.53
1,2,32,Male,0,32,263,68,38,267,59,-6,-4,9,-0.98,0.11,-1.45,-2.32
2,3,89,Female,3,49,292,0,44,322,-7,5,-30,7,0.9,0.99,-1.12,0.77
3,4,78,Male,1,67,274,67,59,225,61,8,49,6,1.41,-1.69,-0.96,-1.24
4,5,38,Male,5,70,3,11,72,-23,3,-2,26,8,-0.29,-0.91,-1.29,-2.49
