In [None]:
!pip install pulp

In [None]:
import pandas as pd
import numpy as np
from pulp import LpProblem, LpVariable, lpSum, LpBinary, LpMaximize, PULP_CBC_CMD

pd.set_option("display.max_columns", None)

In [None]:
df = pd.read_csv("risk_categorized_results.csv")

np.random.seed(42)
df["simulated_cost"] = np.random.randint(100, 1000, size=len(df))
df.head(10)

In [None]:
CAPACIDADE_MAXIMA = 30
ORCAMENTO_TOTAL = 18000
INCLUSAO_MIN_ALTO_RISCO = 10

print("Defined parameters:")
print(f"Maximum capacity: {CAPACIDADE_MAXIMA}")
print(f"Total budget: R$ {ORCAMENTO_TOTAL}")
print(f"Minimum inclusion of high-risk patients: {INCLUSAO_MIN_ALTO_RISCO}")

In [None]:
df["id"] = df.index

PESO_RISCO = 1.0
PESO_CUSTO = 0.001

risco = dict(zip(df["id"], df["predicted_proba"]))
custo = dict(zip(df["id"], df["simulated_cost"]))
categoria = dict(zip(df["id"], df["risk_category"]))

In [None]:
modelo = LpProblem("Alocacao_Otima_de_Pacientes", LpMaximize)

In [None]:
x = {i: LpVariable(f"x_{i}", cat=LpBinary) for i in df["id"]}
list(x.items())[:5]

In [None]:
modelo += lpSum([PESO_RISCO * risco[i] * x[i] - PESO_CUSTO * custo[i] * x[i] for i in df["id"]]), "Maximizar_Risco_Menos_Custo"

In [None]:
modelo += lpSum([x[i] for i in df["id"]]) <= CAPACIDADE_MAXIMA, "Restricao_Capacidade"
modelo += lpSum([custo[i] * x[i] for i in df["id"]]) <= ORCAMENTO_TOTAL, "Restricao_Orcamento"

ids_alto_risco = [i for i in df["id"] if categoria[i] == "alto"]
modelo += lpSum([x[i] for i in ids_alto_risco]) >= INCLUSAO_MIN_ALTO_RISCO, "Restricao_Alto_Risco"

In [None]:
status = modelo.solve(PULP_CBC_CMD(msg=True))
print(f"Optimization status: {LpStatus[modelo.status]}")

In [None]:
df["selecionado"] = df["id"].apply(lambda i: int(x[i].varValue))
df_selecionados = df[df["selecionado"] == 1]
df_selecionados.head(10)

In [None]:
def _plot_series(series, series_name, series_index=0):
  palette = list(sns.palettes.mpl_palette('Dark2'))
  xs = series['id']
  ys = series['true_label']

  plt.plot(xs, ys, label=series_name, color=palette[series_index % len(palette)])

fig, ax = plt.subplots(figsize=(10, 5.2), layout='constrained')
df_sorted = df_selecionados.sort_values('id', ascending=True)
for i, (series_name, series) in enumerate(df_sorted.groupby('risk_category')):
  _plot_series(series, series_name, i)
  fig.legend(title='risk_category', bbox_to_anchor=(1, 1), loc='upper left')
sns.despine(fig=fig, ax=ax)
plt.xlabel('id')
_ = plt.ylabel('true_label')

In [None]:
def _plot_series(series, series_name, series_index=0):
  palette = list(sns.palettes.mpl_palette('Dark2'))
  xs = series['id']
  ys = series['predicted_proba']

  plt.plot(xs, ys, label=series_name, color=palette[series_index % len(palette)])

fig, ax = plt.subplots(figsize=(10, 5.2), layout='constrained')
df_sorted = df_selecionados.sort_values('id', ascending=True)
for i, (series_name, series) in enumerate(df_sorted.groupby('risk_category')):
  _plot_series(series, series_name, i)
  fig.legend(title='risk_category', bbox_to_anchor=(1, 1), loc='upper left')
sns.despine(fig=fig, ax=ax)
plt.xlabel('id')
_ = plt.ylabel('predicted_proba')

In [None]:
figsize = (12, 1.2 * len(df_selecionados['risk_category'].unique()))
plt.figure(figsize=figsize)
sns.violinplot(df_selecionados, x='simulated_cost', y='risk_category', inner='stick', palette='Dark2')
sns.despine(top=True, right=True, bottom=True, left=True)

In [None]:
def _plot_series(series, series_name, series_index=0):
  palette = list(sns.palettes.mpl_palette('Dark2'))
  xs = series['id']
  ys = series['simulated_cost']

  plt.plot(xs, ys, label=series_name, color=palette[series_index % len(palette)])

fig, ax = plt.subplots(figsize=(10, 5.2), layout='constrained')
df_sorted = df_selecionados.sort_values('id', ascending=True)
for i, (series_name, series) in enumerate(df_sorted.groupby('risk_category')):
  _plot_series(series, series_name, i)
  fig.legend(title='risk_category', bbox_to_anchor=(1, 1), loc='upper left')
sns.despine(fig=fig, ax=ax)
plt.xlabel('id')
_ = plt.ylabel('simulated_cost')

In [None]:
df_selecionados['simulated_cost'].plot(kind='hist', bins=20, title='simulated_cost')
plt.gca().spines[['top', 'right',]].set_visible(False)

In [None]:
df_selecionados['true_label'].plot(kind='hist', bins=20, title='true_label')
plt.gca().spines[['top', 'right',]].set_visible(False)

In [None]:
n_total = df["selecionado"].sum()
media_risco = df[df["selecionado"] == 1]["predicted_proba"].mean()
custo_total = df[df["selecionado"] == 1]["simulated_cost"].sum()
n_alto_risco = df[(df["selecionado"] == 1) & (df["risk_category"] == "alto")].shape[0]

print("Optimized Allocation Summary:")
print(f"Total number of patients treated: {n_total}")
print(f"Average risk of those selected: {media_risco:.4f}")
print(f"Total cost of allocation: R$ {custo_total}")
print(f"High-risk patients included: {n_alto_risco}")

In [None]:
df.to_csv("optimized_allocation_results.csv", index=False)