In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

df = pd.read_csv("../data/medical_appointments.csv")


In [2]:
# Handle missing values
df["age"].fillna(df["age"].median(), inplace=True)
df["specialty"].fillna("Unknown", inplace=True)
df["disability"].fillna("Unknown", inplace=True)
df["place"].fillna("Unknown", inplace=True)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["age"].fillna(df["age"].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["specialty"].fillna("Unknown", inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting v

In [3]:
# Encode categorical variables
le = LabelEncoder()
cat_cols = ["gender", "specialty", "disability", "place"]

for col in cat_cols:
    df[col] = le.fit_transform(df[col])


In [4]:
# Date features
df["appointment_date_continuous"] = pd.to_datetime(df["appointment_date_continuous"])
df["day"] = df["appointment_date_continuous"].dt.day
df["month"] = df["appointment_date_continuous"].dt.month
df["weekday"] = df["appointment_date_continuous"].dt.weekday


In [5]:
# Target encoding
df["no_show"] = df["no_show"].map({"Yes":1, "No":0})


In [7]:
# Save processed files
df.to_csv("../data/processed/classification_data.csv", index=False)

daily = df.groupby("appointment_date_continuous").size().reset_index(name="appointments")
daily.to_csv("../data/processed/forecasting_data.csv", index=False)
