In [None]:
import pandas as pd
import numpy as np
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

# Load the dataset
# For this example, we'll use a sample credit card transaction dataset
# You can replace this with your own dataset
np.random.seed(0)
n_transactions = 1000
n_anomalies = 50
transactions = np.random.rand(n_transactions, 2)
anomalies = np.random.rand(n_anomalies, 2) * 5 + 5
data = np.vstack((transactions, anomalies))

# Scale the data
scaler = StandardScaler()
scaled_data = scaler.fit_transform(data)

# Perform DBSCAN clustering
dbscan = DBSCAN(eps=0.5, min_samples=10)
dbscan_labels = dbscan.fit_predict(scaled_data)

# Identify anomalies
anomaly_indices = np.where(dbscan_labels == -1)[0]

# Visualize the results
plt.scatter(scaled_data[:, 0], scaled_data[:, 1], c=dbscan_labels)
plt.scatter(scaled_data[anomaly_indices, 0], scaled_data[anomaly_indices, 1], c='red', marker='x', s=100, label='Anomalies')
plt.legend()
plt.title("DBSCAN Anomaly Detection")
plt.show()

print("Number of anomalies detected:", len(anomaly_indices))
