In [1]:
import numpy as np
from sklearn.cluster import DBSCAN
import matplotlib.pyplot as plt

# Task 1: DBSCAN on Uniform Data
data = np.array([(-1, -1), (0, 0), (1, 1), (9, 9), (10, 10)])
dbscan = DBSCAN(eps=0.5, min_samples=2)
dbscan_labels = dbscan.fit_predict(data)

print("DBSCAN Labels:")
print(dbscan_labels)

# Identify points treated as noise/outliers
outlier_indices = np.where(dbscan_labels == -1)[0]
print("Outlier Indices:")
print(outlier_indices)

# Task 2: Detecting Anomalies in a Simple Dataset
data = np.array([(0, 0), (0, 1), (1, 0), (10, 10), (11, 11)])
dbscan = DBSCAN(eps=1, min_samples=2)
dbscan_labels = dbscan.fit_predict(data)

print("DBSCAN Labels:")
print(dbscan_labels)

# Analyze the resulting clusters and classify any points identified as outliers
outlier_indices = np.where(dbscan_labels == -1)[0]
print("Outlier Indices:")
print(outlier_indices)

# Visualize the clusters
plt.scatter(data[:, 0], data[:, 1], c=dbscan_labels)
plt.title("DBSCAN Clustering")
plt.show()

# Task 3: Exploring Real World Data
# For demonstration purposes, we'll use a simple simulated dataset
np.random.seed(0)
data = np.random.rand(100, 2)
anomalies = np.array([[5, 5], [6, 6]])
data = np.vstack((data, anomalies))

dbscan = DBSCAN(eps=0.5, min_samples=5)
dbscan_labels = dbscan.fit_predict(data)

# Identify and discuss the points that are marked as noise
outlier_indices = np.where(dbscan_labels == -1)[0]
print("Outlier Indices:")
print(outlier_indices)

# Visualize the clusters
plt.scatter(data[:, 0], data[:, 1], c=dbscan_labels)
plt.title("DBSCAN Clustering on Simulated Real-World Data")
plt.show()

