In [None]:

import pandas as pd
from sklearn.ensemble import IsolationForest
import matplotlib.pyplot as plt

# Load dataset
df = pd.read_csv('/mnt/data/healthcare_dataset.csv')

# Extract Billing Amount
billing = df[['Billing Amount']]

# Train Isolation Forest Model
iso = IsolationForest(contamination=0.02, random_state=42)
df['Anomaly'] = iso.fit_predict(billing)

# Mark anomalies clearly
df['Anomaly_Flag'] = df['Anomaly'].apply(lambda x: 'Anomaly' if x == -1 else 'Normal')

# Extract anomalies
anomalies = df[df['Anomaly_Flag'] == 'Anomaly']

print("Number of Anomalies Detected:", len(anomalies))
print("\nSample Anomalies:")
print(anomalies[['Name', 'Billing Amount', 'Medical Condition', 'Hospital', 'Anomaly_Flag']].head())

# Plot anomalies
plt.figure(figsize=(10,6))
plt.scatter(df.index, df['Billing Amount'], label='Normal Points')
plt.scatter(anomalies.index, anomalies['Billing Amount'], color='red', label='Anomalies')
plt.xlabel('Record Index')
plt.ylabel('Billing Amount')
plt.title('Anomaly Detection in Billing Amounts')
plt.legend()
plt.tight_layout()
plt.show()
