# 02 - EDA & Visualization
This notebook explores patterns in patient no-show behavior using visualizations.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load dataset
df = pd.read_csv('../data/healthcare_noshows_appt.csv')

# Rename columns
df.rename(columns={
    'Hipertension': 'Hypertension',
    'Handcap': 'Handicap',
    'Showed_up': 'ShowedUp',
    'Date.diff': 'LeadTimeDays'
}, inplace=True)

# Convert to datetime
df['ScheduledDay'] = pd.to_datetime(df['ScheduledDay'])
df['AppointmentDay'] = pd.to_datetime(df['AppointmentDay'])

# Feature engineering
df['AppointmentWeekday'] = df['AppointmentDay'].dt.day_name()
df['ScheduledWeekday'] = df['ScheduledDay'].dt.day_name()
df['LeadTime'] = (df['AppointmentDay'] - df['ScheduledDay']).dt.days
df['HourScheduled'] = df['ScheduledDay'].dt.hour


## Gender vs. Showed Up

In [None]:
plt.figure(figsize=(10, 5))
sns.countplot(data=df, x='Gender', hue='ShowedUp')
plt.title('Showed Up vs Gender')
plt.tight_layout()
plt.show()

## Age Distribution by Showed Up

In [None]:
plt.figure(figsize=(12, 6))
sns.histplot(data=df, x='Age', hue='ShowedUp', multiple='stack', bins=30)
plt.title('Age Distribution by Showed Up')
plt.tight_layout()
plt.show()

## Showed Up by Appointment Weekday

In [None]:
plt.figure(figsize=(12, 6))
sns.countplot(data=df, x='AppointmentWeekday', order=[
    'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday'], hue='ShowedUp')
plt.title('Showed Up by Appointment Weekday')
plt.tight_layout()
plt.show()