In [1]:
import pandas as pd
import numpy as np
import pickle
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, accuracy_score

In [10]:
df=pd.read_excel('/content/AR - performance review - input.xlsx')

In [12]:
# Step 3: Clean Data
# Remove $ and convert to float
df["Payment Amount"] = df["Payment Amount"].replace('[\$,]', '', regex=True).astype(float)
df["Balance"] = df["Balance"].replace('[\$,]', '', regex=True).astype(float)

# Step 4: Create Target Variable
# Denial = 1 if Denial Reason exists, else 0
df["Denied"] = df["Denial Reason"].apply(lambda x: 0 if pd.isna(x) or x.strip() == "" else 1)


In [13]:
# Step 5: Encode Categorical Features
le_cpt = LabelEncoder()
le_ins = LabelEncoder()
le_phys = LabelEncoder()

df["CPT Code"] = le_cpt.fit_transform(df["CPT Code"])
df["Insurance Company"] = le_ins.fit_transform(df["Insurance Company"])
df["Physician Name"] = le_phys.fit_transform(df["Physician Name"])

# Step 6: Prepare Features and Target
X = df[["CPT Code", "Insurance Company", "Physician Name", "Payment Amount", "Balance"]]
y = df["Denied"]

# Step 7: Train/Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [14]:
# Step 8: Train Model
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)


In [15]:
# Step 9: Evaluate Model
y_pred = clf.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 1.0
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00         3
           1       1.00      1.00      1.00         3

    accuracy                           1.00         6
   macro avg       1.00      1.00      1.00         6
weighted avg       1.00      1.00      1.00         6



In [17]:
import os

# Make sure models folder exists
os.makedirs("models", exist_ok=True)

In [18]:
# Step 10: Save Model and Encoders
with open("models/model1.pkl", "wb") as f:
    pickle.dump(clf, f)

with open("models/le_cpt.pkl", "wb") as f:
    pickle.dump(le_cpt, f)

with open("models/le_ins.pkl", "wb") as f:
    pickle.dump(le_ins, f)

with open("models/le_phys.pkl", "wb") as f:
    pickle.dump(le_phys, f)

print("Model and encoders saved in 'models/' folder.")

Model and encoders saved in 'models/' folder.
