In [1]:
import pandas as pd
from sklearn.experimental import enable_iterative_imputer  # noqa
from sklearn.impute import IterativeImputer

# 1. Load dataset
df = pd.read_excel("merged_patient_data_final_ready.xlsx")

# 2. Clean 'age' column if necessary (e.g., '70yrs' → 70)
df['age'] = df['age'].astype(str).str.extract(r'(\d+)').astype(float)

# 3. Separate categorical columns to exclude from imputation
id_cols = ['id', 'name', 'sex', 'birth']  # not to impute
X = df.drop(columns=id_cols)

# 4. Apply Iterative Imputer
imputer = IterativeImputer(max_iter=10, random_state=42)
X_imputed = imputer.fit_transform(X)

# 5. Convert back to DataFrame
X_imputed_df = pd.DataFrame(X_imputed, columns=X.columns)

# 6. Merge with ID columns
df_final = pd.concat([df[id_cols].reset_index(drop=True), X_imputed_df], axis=1)

# 7. Save to Excel
df_final.to_excel("merged_patient_data_final_imputed.xlsx", index=False)
print("✅ 완료: merged_patient_data_final_imputed.xlsx 저장됨")


✅ 완료: merged_patient_data_final_imputed.xlsx 저장됨
