In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.cluster import KMeans, AgglomerativeClustering, MeanShift
from sklearn.mixture import GaussianMixture
from sklearn.cluster import DBSCAN

# Assume you have a DataFrame 'model_data' with the specified features

# Separate features from the DataFrame
X = model_data[["Age", "Gender", "Years of Experience", "Education Level", "Job Title"]]

# Preprocessing for categorical data (One-hot encoding)
categorical_cols = ["Gender", "Education Level", "Job Title"]
categorical_transformer = OneHotEncoder(handle_unknown="ignore")

# Bundle preprocessing for numerical and categorical data
preprocessor = ColumnTransformer(
    transformers=[
        ("cat", categorical_transformer, categorical_cols),
    ],
    remainder="passthrough"
)

# Apply preprocessing to the features
X_transformed = preprocessor.fit_transform(X)

# Define clustering models
models = [
    ("K-Means", KMeans(n_clusters=5, random_state=42)),
    ("Agglomerative Clustering", AgglomerativeClustering(n_clusters=5)),
    ("Mean Shift", MeanShift()),
    ("Gaussian Mixture", GaussianMixture(n_components=5, random_state=42)),
]

# DBSCAN doesn't have a direct 'predict' method; use the 'fit_predict' method
dbscan_model = DBSCAN(eps=1.0, min_samples=5)
dbscan_cluster_labels = dbscan_model.fit_predict(X_transformed)
model_data["DBSCAN Cluster"] = dbscan_cluster_labels

# Print the number of samples in each cluster (for K-Means as an example)
print("Number of samples in each K-Means cluster:")
print(model_data["K-Means Cluster"].value_counts())
