We import the libraries for this notebook

In [1]:
#NumPy - Scientific Computing with Python: http://www.numpy.org/
import numpy as np

#Pickle - Python object serialization: https://docs.python.org/2/library/pickle.html
import pickle

#SciKitLearn - Machine Learning in Python: http://scikit-learn.org/
from sklearn.cluster import DBSCAN
from sklearn import metrics

We compare the expected results and the outputs:

In [2]:
expected_clusters = [1,5,6,9,7,2,2,9,8]

In [3]:
for k in range(9):
    data = pickle.load( open( 'data/numpy_arrays/training' + str(k) + '.npy', "rb" ) )
    db = DBSCAN(eps=6, min_samples=400).fit(data)
    labels = db.labels_
    n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
    print("Training video " + str(k) + ":\n    Expected clusters: {0[0]}\n    DBScan clusters: {0[1]}"
          .format((expected_clusters[k], n_clusters_)))

Training video 0:
    Expected clusters: 1
    DBScan clusters: 1
Training video 1:
    Expected clusters: 5
    DBScan clusters: 4
Training video 2:
    Expected clusters: 6
    DBScan clusters: 0
Training video 3:
    Expected clusters: 9
    DBScan clusters: 1
Training video 4:
    Expected clusters: 7
    DBScan clusters: 2
Training video 5:
    Expected clusters: 2
    DBScan clusters: 0
Training video 6:
    Expected clusters: 2
    DBScan clusters: 0
Training video 7:
    Expected clusters: 9
    DBScan clusters: 24
Training video 8:
    Expected clusters: 8
    DBScan clusters: 12


We calculate the Silhouette Score when possible (`Memory Errror` is possible)

In [None]:
for k in range(9):
    try:
        data = pickle.load( open( 'data/numpy_arrays/training' + str(k) + '.npy', "rb" ) )
        db = DBSCAN(eps=6, min_samples=400).fit(data)
        labels = db.labels_
        n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
        print("Silhouette Score for training video " + str(k) + ": %0.3f" % metrics.silhouette_score(data, labels))
    except:
        pass

Silhouette Score for training video 0: 0.901


We calculate the Calinski-Harabaz Index when possible (`Memory Errror` is possible)

In [None]:
for k in range(9):
    try:
        data = pickle.load( open( 'data/numpy_arrays/training' + str(k) + '.npy', "rb" ) )
        db = DBSCAN(eps=6, min_samples=400).fit(data)
        labels = db.labels_
        n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
        print("Calinski-Harabaz Index for training video " + str(k) + ": %0.3f" % metrics.calinski_harabaz_score(data, labels))
    except:
        pass

Calinski-Harabaz Index for training video 0: 9538.867
Calinski-Harabaz Index for training video 1: 94523.407
Calinski-Harabaz Index for training video 3: 2315.070
Calinski-Harabaz Index for training video 4: 1923.376
Calinski-Harabaz Index for training video 7: 6748.531
