In [6]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [7]:
# For dataset1.txt
data1 = np.genfromtxt('dataset1.txt',deletechars=',')

In [16]:
from sklearn.cluster import KMeans

sum_of_squared_error = []
for k in range(1, 20):
    kMeans = KMeans(n_clusters = k ,random_state = 0)
    kMeans.fit(data1)
    sum_of_squared_error.append(kMeans.inertia_)

x_labels = [x for x in range(1, 20)]
plt.figure(num=None, figsize=(12, 7), dpi=90, facecolor='w', edgecolor='k')
plt.title("Sum of squared error for various k");
plt.plot(x_labels, sum_of_squared_error, color='#32CD32', marker='+')
plt.xlabel("k")
plt.ylabel("SSE value")
plt.savefig("elbow-curve.png", bbox_inches='tight')
plt.close()

In [18]:
# As we can see from the elbow curve, the optimal k is 6
kMeans = KMeans(n_clusters = 6,random_state = 0)
kMeans.fit(data1)
labels1 = kMeans.labels_
kMean_centres = kMeans.cluster_centers_

In [21]:
plt.figure(num=None, figsize=(12, 7), dpi=90, facecolor='w', edgecolor='k')
plt.xlabel('x')
plt.ylabel('y')

plt.scatter(data1[:,0], data1[:,1], s=50, c=labels1)

plt.scatter(kMean_centres[:, 0], kMean_centres[:, 1], s=50, c='red', marker='D')

plt.savefig("kMeans-dataset1.png", bbox_inches='tight')
plt.close()

In [22]:
def scan_for_each_point(data, ind, eps):
    pts = []
    for count, elem in enumerate(data):
        if np.linalg.norm(data[ind] - elem) < eps:
            pts.append(count)
    return pts

def scan_for_cluster(data, n, ind, min_pts, eps, labels, id):
    for elem in n:
        if labels[elem] == 0:
            labels[elem] = id
            next_n = scan_for_each_point(data, elem, eps)
            if min_pts <= len(next_n):
                n += next_n
        elif labels[elem] == -1:
            labels[elem] = id

def my_dbscan(data, eps, min_pts):
    id = 0
    labels = [0]*len(data)
    for count, elem in enumerate(data):
        if labels[count] == 0:
            n = scan_for_each_point(data, count, eps)
            if min_pts <= len(n):
                id = id + 1
                labels[count] = id
                scan_for_cluster(data, n, count, min_pts, eps, labels, id)
            else:
                labels[count] = -1
    return labels

In [23]:
labels1 = my_dbscan(data1, 0.3, 10)
print labels1

[1, 2, 1, 2, 1, 1, 2, 2, 2, 1, 1, 1, 2, 1, 2, 2, 2, 1, 2, 2, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 2, 1, 2, 2, 2, 1, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 1, 2, 2, 1, 1, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 2, 1, 2, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 2, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 2, 1, 2, 2, 2, 2, 2, 2, 1, 2, 1, 1, 1, 1, 1, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 1, 2, 2, 1, 2, 1, 2, 2, 2, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 2, 2, 1, 1, 1, 2, 1, 2, 1, 1, 2, 2, 2, 1, 2, 2, 2, 2, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 2, 2, 2, 1, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 1, 1, 2, 2, 1, 2, 2, 2, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 2, 2, 2, 1, 1, 1, 2, 1, 2, 1, 2, 1, 2, 2, 1, 1, 1, 2, 2, 1, 1, 1, 2, 2, 1, 1, 1, 1, 2, 1, 1, 2, 2, 2, 2, 1, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1, 1, 1, 2, 1, 1, 2, 2, 1, 2, 2, 1, 2, 2, 1, 1, 2, 2, 1, 2, 2, 1, 2, 1, 2, 2, 2, 1, 2, 2, 1, 1, 1, 2, 2, 1, 1, 2, 1, 2, 1, 2, 2, 1, 1, 2, 2, 

In [24]:
plt.figure(num=None, figsize=(12, 7), dpi=90, facecolor='w', edgecolor='k')
plt.xlabel('x')
plt.ylabel('y')

plt.scatter(data1[:, 0], data1[:, 1], s=50, c=labels1)

plt.savefig("dbscan-dataset1.png", bbox_inches='tight')
plt.close()

In [25]:
# For dataset2.txt
data2 = np.genfromtxt('dataset2.txt',deletechars=',')

In [26]:
kMeans = KMeans(n_clusters = 3, random_state = 0)
kMeans.fit(data2)
labels2 = kMeans.labels_

In [28]:
kMean_centres = kMeans.cluster_centers_

In [30]:
plt.figure(num=None, figsize=(12, 7), dpi=90)
plt.xlabel('x')
plt.ylabel('y')

plt.scatter(data2[:, 0], data2[:, 1], s=50, c=labels2)

plt.scatter(kMean_centres[:, 0], kMean_centres[:, 1], s=50, c='red')

plt.savefig("kMeans-dataset2.png", bbox_inches='tight')
plt.close()

In [31]:
labels2 = my_dbscan(data2, 0.3, 10)
print labels2

[1, -1, -1, 2, -1, -1, -1, -1, -1, -1, 3, -1, 2, -1, -1, -1, -1, -1, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 2, -1, -1, -1, -1, 1, -1, 1, -1, -1, -1, 5, 5, -1, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, 6, -1, 2, -1, -1, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 3, -1, -1, -1, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 2, -1, -1, 1, -1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, 1, -1, 5, -1, -1, -1, -1, -1, -1, -1, -1, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 5, 1, -1, -1, 1, -1, -1, -1, -1, -1, 4, -1, -1, -1, -1, -1, 5, 1, -1, -1, -1, 4, -1, -1, -1, 6, 5, 3, -1, -1, -1, 1, -1, 3, -1, -1, -1, 5, -1, -1, 2, -1, 6, -1, 3, -1, -1, -1, -1, -1, -1, 6, 3, -1, -1, -1, -1, -1, 2, 4, -1, -1, -1, -1, 6, -1, -1, 1, -1, -1, -1, -1, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 5, -1, 1, -1, -1, -1, -1, 3, -1, -1, -1, -1, -1, -1, 4, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1

In [32]:
plt.figure(num=None, figsize=(12, 7), dpi=90, facecolor='w', edgecolor='k')
plt.xlabel('x')
plt.ylabel('y')

plt.scatter(data2[:, 0], data2[:, 1], s=50, c=labels2)

plt.savefig("dbscan-dataset2.png", bbox_inches='tight')
plt.close()