### Data

Sample data related to satisfaction were generated based on https://rankings.newsweek.com/worlds-best-hospitals-2025-top-250/south-korea, and sample data related to expenses were generated based on https://www.doctorsnews.co.kr/news/articleView.html?idxno=156850.

In [16]:
# !pip install pandas numpy scikit-learn

In [19]:
import pandas as pd
df = pd.read_csv("korean_hospital_data.csv")
df.head()

Unnamed: 0,hospital,age,sex,satisfaction_rate,estimated_expense
0,Asan Medical Center,58,F,93.44,824405
1,Asan Medical Center,38,M,94.33,958960
2,Asan Medical Center,41,M,91.75,850956
3,Asan Medical Center,40,M,88.97,892098
4,Asan Medical Center,63,M,89.75,920822


### Comparative Study

In [17]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

df = pd.read_csv("korean_hospital_data.csv")
df.columns = df.columns.str.strip() 

le_hospital = LabelEncoder()
le_sex = LabelEncoder()
df["hospital_encoded"] = le_hospital.fit_transform(df["hospital"])
df["sex_encoded"] = le_sex.fit_transform(df["sex"])

features = ["hospital_encoded", "age", "sex_encoded"]
X = df[features]

# Target: satisfaction
y_satisfaction = df["satisfaction_rate"]
X_train_sat, X_test_sat, y_train_sat, y_test_sat = train_test_split(X, y_satisfaction, test_size=0.2, random_state=42)
model_satisfaction = RandomForestRegressor(n_estimators=200, random_state=42)
model_satisfaction.fit(X_train_sat, y_train_sat)

# Target: estimated expense
y_expense = df["estimated_expense"]
X_train_exp, X_test_exp, y_train_exp, y_test_exp = train_test_split(X, y_expense, test_size=0.2, random_state=42)
model_expense = RandomForestRegressor(n_estimators=200, random_state=42)
model_expense.fit(X_train_exp, y_train_exp)

def recommend_hospitals(age, sex):
    sex_encoded = le_sex.transform([sex])[0]
    hospitals = range(len(le_hospital.classes_))
    candidates = pd.DataFrame({
        "hospital_encoded": hospitals,
        "age": [age] * len(hospitals),
        "sex_encoded": [sex_encoded] * len(hospitals)
    })

    satisfaction_preds = model_satisfaction.predict(candidates)
    expense_preds = model_expense.predict(candidates)

    best_satisfaction_idx = np.argmax(satisfaction_preds)
    best_expense_idx = np.argmin(expense_preds)

    return {
        "Best by satisfaction": le_hospital.inverse_transform([best_satisfaction_idx])[0],
        "Best by lowest cost": le_hospital.inverse_transform([best_expense_idx])[0]
    }

# Example
age_input = 30
sex_input = "M"
result = recommend_hospitals(age_input, sex_input)
print(f"For a {age_input}-year-old {sex_input}:")
print("Best by satisfaction:", result["Best by satisfaction"])
print("Best by lowest cost:", result["Best by lowest cost"])

For a 30-year-old M:
Best by satisfaction: Samsung Medical Center
Best by lowest cost: SNU Bundang Hospital
