In [103]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [104]:
# Generate synthetic data
np.random.seed(42)
n_samples = 100

# Generate 'TransactionDate' as days from a starting point
start_date = pd.to_datetime('2023-01-01')
transaction_dates = np.arange(n_samples) + 1
transaction_dates = start_date + pd.to_timedelta(transaction_dates, unit='D')

# Create 'Aging Days' with a linear relationship to 'TransactionDate'
aging_days = 5 * transaction_dates.day + np.random.normal(0, 3, n_samples)

In [105]:

# Create a DataFrame
data = pd.DataFrame({'FacilityName': 'LG', 'PayerType': np.random.choice(['Medicare B', 'Medicare B Coins - INS (MBI)'], n_samples), 'TransactionDate': transaction_dates, 'AgingDays': aging_days})
# pd.to_datetime(data['TransactionDate'])
data

Unnamed: 0,FacilityName,PayerType,TransactionDate,AgingDays
0,LG,Medicare B,2023-01-02,11.490142
1,LG,Medicare B Coins - INS (MBI),2023-01-03,14.585207
2,LG,Medicare B,2023-01-04,21.943066
3,LG,Medicare B,2023-01-05,29.569090
4,LG,Medicare B,2023-01-06,29.297540
...,...,...,...,...
95,LG,Medicare B,2023-04-07,30.609455
96,LG,Medicare B Coins - INS (MBI),2023-04-08,40.888361
97,LG,Medicare B Coins - INS (MBI),2023-04-09,45.783166
98,LG,Medicare B,2023-04-10,50.015340


In [106]:
# Split the data into training and testing sets
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

# Convert 'TransactionDate' to numeric values (days since the start date) for training set
start_date_train = train_data['TransactionDate'].min()
train_data['TransactionDate'] = (train_data['TransactionDate'] - start_date_train).dt.days.astype(int)

# Train a linear regression model for 'Medicare B'
medicare_b_data = train_data[train_data['PayerType'] == 'Medicare B']
X_medicare_b = medicare_b_data[['TransactionDate']]
y_medicare_b = medicare_b_data['AgingDays']
model_medicare_b = LinearRegression()
model_medicare_b.fit(X_medicare_b, y_medicare_b)


In [107]:

# Convert 'TransactionDate' to numeric values (days since the start date) for test set
test_data['TransactionDate'] = (test_data['TransactionDate'] - start_date_train).dt.days.astype(int)

# Make predictions on the test set for 'Medicare B'
X_test_medicare_b = test_data[test_data['PayerType'] == 'Medicare B'][['TransactionDate']]
y_test_medicare_b = test_data[test_data['PayerType'] == 'Medicare B']['AgingDays']
predictions_medicare_b = model_medicare_b.predict(X_test_medicare_b)

In [109]:

mse_medicare_b = mean_squared_error(y_test_medicare_b, predictions_medicare_b)
print(f'Mean Squared Error (Medicare B): {mse_medicare_b}')
print('RMSE')
rmse_medicare_b = np.sqrt(mse_medicare_b)
rmse_medicare_b
print(f'Root Mean Squared Error (Medicare B): {rmse_medicare_b}')
# Calculate Mean Absolute Percentage Error (MAPE) for 'Medicare B'
mape_medicare_b = np.mean(np.abs((y_test_medicare_b - predictions_medicare_b) / y_test_medicare_b)) * 100
print(f'Mean Absolute Percentage Error (Medicare B): {mape_medicare_b:.2f}%')


Mean Squared Error (Medicare B): 1849.9755714924675
RMSE
Root Mean Squared Error (Medicare B): 43.01134235864381
Mean Absolute Percentage Error (Medicare B): 304.69%


In [115]:
import pandas as pd
import openpyxl

In [116]:
df = pd.read_excel('TargetTest_sampledata.xlsx')  # adjust file format if needed
df['As Of Date'] = pd.to_datetime(df['As Of Date'])
df['Billdate'] = pd.to_datetime(df['Billdate'])
df['Days_to_Pay'] = (df['As Of Date'] - df['Billdate']).dt.days
df = df.drop(['Facility Name'], axis=1)
df = pd.get_dummies(df, columns=['Payer Type'])
X = df.drop('Aging Days', axis=1)
y = df['Aging Days']

KeyError: "['Aging Days'] not found in axis"

In [110]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestRegressor()
model.fit(X_train, y_train)

FileNotFoundError: [Errno 2] No such file or directory: 'your_dataset.xlsx'

In [ ]:
def predict_days_to_pay(model, new_data):
    # Apply the same preprocessing steps as before
    new_data['As Of Date'] = pd.to_datetime(new_data['As Of Date'])
    new_data['Billdate'] = pd.to_datetime(new_data['Billdate'])
    new_data['Days_to_Pay'] = (new_data['As Of Date'] - new_data['Billdate']).dt.days

    new_data = new_data.drop(['Facility Name', 'Facility Code', 'Contact Number', 'Resident ID', 'Insurance Co', 'Policy Number'], axis=1)
    new_data = pd.get_dummies(new_data, columns=['Payer Type'])

    # Predict 'Aging Days'
    y_pred = model.predict(new_data)

    return y_pred

In [ ]:
def weekly_prediction(model, new_data):
    # Assume 'new_data' is the dataset for the current week
    predictions = predict_days_to_pay(model, new_data)

    # Combine predictions with facility and payer information
    result_df = pd.DataFrame({
        'Facility Name': new_data['Facility Name'],
        'Payer Type': new_data['Payer Type'],
        'Predicted Days to Pay': predictions
    })

    return result_df


In [ ]:
# Load the existing model (you need to have already trained and saved the model)
existing_model = RandomForestRegressor()
existing_model.fit(X_train, y_train)

# Load new data for the current week
new_data = pd.read_excel('new_data.xlsx')  # adjust file format if needed

# Perform weekly prediction
weekly_result = weekly_prediction(existing_model, new_data)

# Display or save the results
print(weekly_result)
# weekly_result.to_excel('weekly_predictions.xlsx', index=False)
