In [None]:
from sklearn.preprocessing import  StandardScaler
from sklearn.mixture import GaussianMixture
import pandas as pd
data = pd.read_csv('Healthcare Providers.csv')
df = pd.DataFrame(data)

df['Number of Medicare Beneficiaries'] = pd.to_numeric(df['Number of Medicare Beneficiaries'].str.replace(',', ''), errors='coerce')
df['Average Medicare Allowed Amount'] = pd.to_numeric(df['Average Medicare Allowed Amount'].str.replace(',', ''), errors='coerce')
df['Average Medicare Payment Amount'] = pd.to_numeric(df['Average Medicare Payment Amount'].str.replace(',', ''), errors='coerce')

# Remove rows where conversion resulted in NaN
df = df.dropna(subset=['Number of Medicare Beneficiaries', 'Average Medicare Allowed Amount', 'Average Medicare Payment Amount'])

# Filter out rows where 'Number of Medicare Beneficiaries' is zero
df = df[df['Number of Medicare Beneficiaries'] > 0]

# Encode 'Gender of the Provider' for GMM
label_encoder = LabelEncoder()
df['Gender Encoded'] = label_encoder.fit_transform(df['Gender of the Provider'])

# Define the features (X)
X = df[['Average Medicare Allowed Amount', 'Number of Medicare Beneficiaries', 'Gender Encoded']]

# Feature scaling (important for GMM)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Initialize and fit the Gaussian Mixture Model
gmm = GaussianMixture(n_components=2, covariance_type='full', random_state=0)  # Adjust n_components as necessary
gmm.fit(X_scaled)

# Compute log probabilities of each sample and classify anomalies
log_probs = gmm.score_samples(X_scaled)
threshold = np.percentile(log_probs, 2)
anomalies = log_probs < threshold

# Add a column in the dataframe to flag anomalies
df['Anomaly'] = anomalies
print(df['Anomaly'])

# Print the number of anomalies detected
print(f'Number of anomalies detected: {sum(anomalies)}')

# Plotting anomalies vs non-anomalies
plt.scatter(df['Average Medicare Allowed Amount'], df['Average Medicare Payment Amount'], c=anomalies, cmap='coolwarm')
plt.xlabel('Average Medicare Allowed Amount')
plt.ylabel('Average Medicare Payment Amount')
plt.title('GMM Anomaly Detection')
plt.show()
