## Setup and Imports

In [None]:
import pandas as pd
import numpy as np
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans, DBSCAN
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import silhouette_score
import warnings
warnings.filterwarnings('ignore')


## Data Loading and pre-processing

In [None]:
import pandas as pd

def load_data():
    """
    Load and prepare the diabetes dataset from UCI repository
    """
    url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00296/dataset_diabetes.zip"
    df = pd.read_csv(url, compression='zip')
    
    # Extract medication events
    med_events = df[['patient_nbr', 'admission_type_id', 'time_in_hospital', 'medical_specialty']]
    
    # Create prescription dates (simulated)
    base_date = datetime(2020, 1, 1)
    np.random.seed(42)
    dates = [base_date + pd.Timedelta(days=int(x)) for x in np.random.normal(30, 10, len(med_events))]
    med_events['prescription_date'] = dates
    
    # Simulate two medications (A and B)
    med_events['medication'] = np.random.choice(['medA', 'medB'], size=len(med_events))
    
    return med_events

# Load the data
data = load_data()

# Display first few rows and basic information
print("Dataset Overview:")
print(data.head())
print("\nDataset Info:")
print(data.info())
