## 9 - DBSCAN Clustering
## Table of Contents
<ul>
<li><a href="#1">Extracting Feature Vectors</a></li>
<li><a href="#2">Prepare data for DBSCAN</a></li>
<li><a href="#2">Implement DBSCAN clustering</a></li>
</ul>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import cv2
from sklearn.cluster import DBSCAN
from sklearn.metrics import silhouette_score

In [None]:
df_fractures = pd.read_csv("./output/df_fractures_4.csv")

df_fractures.head()

In [None]:
# Reading the image grayscale
image = cv2.imread('Global_20190321_ShapeV20_200kfacets_FB3.png', cv2.IMREAD_GRAYSCALE)

image = cv2.flip(image, 0)

<a id='1'></a>
### Extracting Feature Vectors

In [None]:
def crop_hist(index, plot=False):

    Xmin, Xmax, Ymin, Ymax = int(df_fractures['Xmin'][index]), int(df_fractures['Xmax'][index]), int(df_fractures['Ymin'][index]), int(df_fractures['Ymax'][index])

    # Extending the frame 5 pixels if the fracture is completely horizontal or vertical
    fracture_height = int(df_fractures['fracture_height'][index])
    fracture_length = int(df_fractures['fracture_length'][index])

    if fracture_height < 1:
        Ymin = Ymin - 5
        Ymax = Ymax + 5

    if fracture_length < 1:
        Xmin = Xmin - 5
        Xmax = Xmax + 5

    cropped_image = image[Ymin:Ymax, Xmin:Xmax]

    number_of_bins = 10
    hist_values = np.histogram(cropped_image.ravel(), number_of_bins, [0, 256], density=True)[0]

    if plot == True:

        fig = plt.figure(figsize=(9, 3))

        rows, columns = 1, 3

        fig.add_subplot(rows, columns, 1)

        plt.imshow(cropped_image, cmap="gray")
        plt.axis('off')
        plt.title("Cropped Image")

        fig.add_subplot(rows, columns, 2)

        plt.hist(cropped_image.ravel(), number_of_bins, [0, 256])
        plt.title("Histogram")

        fig.add_subplot(rows, columns, 3)

        plt.hist(cropped_image.ravel(), number_of_bins,
                 [0, 256], density=True, color="black")
        plt.title("Normalised Histogram")

        fig.tight_layout()
        plt.show()

    return hist_values

In [None]:
crop_hist(1357, plot = True)

<a id='2'></a>
### Prepare data for DBSCAN

In [None]:
# creating an array to store feature vectors from each fracture image
feature_vectors = np.zeros((df_fractures.shape[0],8), dtype='float32')
feature_vectors.shape

In [None]:
# loop all fracture images, extract histogram and save to the 'feature_vectors' array
for i in range(df_fractures.shape[0]):
    feature_vector = crop_hist(index = i, plot = False)
    
    # remove first two bin values to threshold the shadows and insert into 'feature_vectors' array
    feature_vectors[i, :] = feature_vector[2:]

<a id='3'></a>
### Implement DBSCAN clustering

In [None]:
dbscan = DBSCAN(eps=3, min_samples=2).fit(feature_vectors)

labels = dbscan.labels_

labels

In [None]:
# assign the labels (the id of the cluster) from kmeans to the dataframe as a new column
df_fractures['dbscan_cluster_id'] = dbscan.labels_

df_fractures.head()

In [None]:
df_fractures['dbscan_cluster_id'].value_counts()

In [None]:
df_fractures.head()

### Checking Clusters

In [None]:
df_fractures.groupby('dbscan_cluster_id').w_average_brightness.agg(['mean', 'count'])

In [None]:
plt.figure(figsize=(9, 9))

df_fractures.groupby('dbscan_cluster_id').w_average_brightness.agg(['mean', 'count']).plot(kind='bar', figsize=(12, 6))

plt.show()

### Elbow Method

In [None]:
kmax = 30

wcss = []

for n in range(1, kmax):
    dbscan = DBSCAN(eps=3, min_samples=2).fit(feature_vectors)
    wcss.append(dbscan.inertia_)
