In [13]:
import pandas as pd
from pulp import LpProblem, LpMinimize, LpVariable, lpSum, LpBinary

# Load the cleaned dataset
file_path = '/Users/kushdesai/Documents/final/filtered_healthcare_claims.csv'
data = pd.read_csv(file_path)


# Ensure the dataset has the necessary columns
assert 'Claim ID' in data.columns, "Dataset must have a 'Claim ID' column"
assert 'Processing Time' in data.columns, "Dataset must have a 'Processing Time' column"

# Extract relevant data
claim_ids = data['Claim ID']
processing_times = data['Processing Time']

# Define the optimization problem
prob = LpProblem("Claim_Processing_Optimization", LpMinimize)

# Define decision variables: 1 if claim is processed, 0 otherwise
decision_vars = {claim: LpVariable(f"Process_{claim}", 0, 1, LpBinary) for claim in claim_ids}

# Objective function: Minimize total processing time
prob += lpSum([processing_times[i] * decision_vars[claim] for i, claim in enumerate(claim_ids)]), "Total_Processing_Time"

# Constraint: Exactly 500 claims must be processed
prob += lpSum([decision_vars[claim] for claim in claim_ids]) == 500, "Total_Claims_Processed"

# Solve the optimization problem
prob.solve()

# Collect results
data['Assignment Decision'] = [int(decision_vars[claim].varValue) for claim in claim_ids]
data['Processed Time'] = data['Processing Time'] * data['Assignment Decision']

# Save the optimized results
data.to_csv('optimized_claims.csv', index=False)

print("Optimization complete. Results saved to 'optimized_claims.csv'.")

Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /opt/anaconda3/lib/python3.12/site-packages/pulp/solverdir/cbc/osx/64/cbc /var/folders/vw/59w58nz160z26c0qzj5_fz000000gn/T/f777f9979e3240f7a1e380044eac9e1e-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /var/folders/vw/59w58nz160z26c0qzj5_fz000000gn/T/f777f9979e3240f7a1e380044eac9e1e-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 6 COLUMNS
At line 2971 RHS
At line 2973 BOUNDS
At line 3715 ENDATA
Problem MODEL has 1 rows, 741 columns and 741 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 7233 - 0.00 seconds
Cgl0004I processed model has 1 rows, 716 columns (716 integer (692 of which binary)) and 716 elements
Cutoff increment increased from 1e-05 to 0.9999
Cbc0038I Initial state - 0 integers unsatisfied sum - 0
Cbc0038I Solution found of 7233
Cbc0038I Cleaned solution of 7