In [23]:
import pandas as pd
import numpy as np
from sklearn.cluster import SpectralClustering

In [24]:
df = pd.read_csv('./datasets/binary_data.csv')

In [25]:
df.head()

Unnamed: 0,id,netflix,spotify,deezer,amazon_prime,star_plus,hbo_max,apple_tv
0,1,1,0,1,1,0,0,0
1,2,0,0,0,1,0,0,0
2,3,0,1,1,0,1,0,1
3,4,0,1,0,1,1,0,1
4,5,1,0,1,1,1,1,1


In [26]:
df.shape

(25, 8)

In [27]:
df_clustering = df.copy()

In [28]:
df_clustering = df_clustering.drop(columns=('id'))

In [29]:
df_clustering.head()

Unnamed: 0,netflix,spotify,deezer,amazon_prime,star_plus,hbo_max,apple_tv
0,1,0,1,1,0,0,0
1,0,0,0,1,0,0,0
2,0,1,1,0,1,0,1
3,0,1,0,1,1,0,1
4,1,0,1,1,1,1,1


In [30]:
df_clustering = df_clustering.values

In [31]:
df_clustering

array([[1, 0, 1, 1, 0, 0, 0],
       [0, 0, 0, 1, 0, 0, 0],
       [0, 1, 1, 0, 1, 0, 1],
       [0, 1, 0, 1, 1, 0, 1],
       [1, 0, 1, 1, 1, 1, 1],
       [0, 0, 0, 1, 0, 0, 1],
       [0, 1, 1, 0, 0, 1, 0],
       [1, 1, 0, 1, 0, 0, 1],
       [0, 1, 0, 1, 0, 0, 1],
       [1, 1, 0, 0, 1, 0, 1],
       [0, 0, 0, 1, 0, 1, 0],
       [0, 0, 1, 0, 1, 0, 0],
       [0, 0, 0, 0, 0, 0, 0],
       [0, 1, 0, 0, 0, 0, 0],
       [1, 0, 1, 0, 1, 1, 1],
       [1, 1, 0, 0, 0, 1, 0],
       [1, 0, 1, 1, 1, 0, 0],
       [0, 0, 0, 0, 1, 0, 1],
       [0, 0, 1, 1, 0, 0, 1],
       [1, 0, 1, 0, 1, 0, 0],
       [1, 1, 1, 0, 0, 0, 0],
       [1, 0, 0, 0, 1, 0, 0],
       [0, 0, 0, 1, 0, 0, 0],
       [1, 1, 0, 1, 0, 1, 1],
       [1, 0, 0, 0, 1, 1, 1]])

In [71]:
model = SpectralClustering(n_clusters=3, assign_labels='discretize', affinity='rbf', random_state=0)

In [72]:
model = model.fit(df_clustering)

#### Valores para o assign_labels
* `kmeans` (default)
* `discretize`
* `cluster_qr`

#### Valores para o affinity
* `nearest_neighbors`: construct the affinity matrix by computing a graph of nearest neighbors.
* `rbf` (default) : construct the affinity matrix using a radial basis function (RBF) kernel.
* `precomputed`: interpret X as a precomputed affinity matrix, where larger values indicate greater similarity between instances.
* `precomputed_nearest_neighbors`: interpret X as a sparse graph of precomputed distances, and construct a binary affinity matrix from the n_neighbors nearest neighbors of each instance.



In [73]:
model.labels_

array([0, 2, 1, 1, 0, 2, 0, 1, 1, 1, 2, 0, 2, 2, 0, 1, 0, 1, 2, 0, 0, 0,
       2, 1, 0])

In [74]:
df['cluster'] = model.labels_

In [75]:
df.cluster.value_counts()

0    10
1     8
2     7
Name: cluster, dtype: int64

In [76]:
df[df['cluster'] == 0]

Unnamed: 0,id,netflix,spotify,deezer,amazon_prime,star_plus,hbo_max,apple_tv,cluster
0,1,1,0,1,1,0,0,0,0
4,5,1,0,1,1,1,1,1,0
6,7,0,1,1,0,0,1,0,0
11,12,0,0,1,0,1,0,0,0
14,15,1,0,1,0,1,1,1,0
16,17,1,0,1,1,1,0,0,0
19,20,1,0,1,0,1,0,0,0
20,21,1,1,1,0,0,0,0,0
21,22,1,0,0,0,1,0,0,0
24,25,1,0,0,0,1,1,1,0


In [77]:
df[df['cluster'] == 1]

Unnamed: 0,id,netflix,spotify,deezer,amazon_prime,star_plus,hbo_max,apple_tv,cluster
2,3,0,1,1,0,1,0,1,1
3,4,0,1,0,1,1,0,1,1
7,8,1,1,0,1,0,0,1,1
8,9,0,1,0,1,0,0,1,1
9,10,1,1,0,0,1,0,1,1
15,16,1,1,0,0,0,1,0,1
17,18,0,0,0,0,1,0,1,1
23,24,1,1,0,1,0,1,1,1


In [78]:
df[df['cluster'] == 2]

Unnamed: 0,id,netflix,spotify,deezer,amazon_prime,star_plus,hbo_max,apple_tv,cluster
1,2,0,0,0,1,0,0,0,2
5,6,0,0,0,1,0,0,1,2
10,11,0,0,0,1,0,1,0,2
12,13,0,0,0,0,0,0,0,2
13,14,0,1,0,0,0,0,0,2
18,19,0,0,1,1,0,0,1,2
22,23,0,0,0,1,0,0,0,2


# Comparação de Afinidade

### Kmeans

In [80]:
model_kmeans = SpectralClustering(n_clusters=3, assign_labels='kmeans', affinity='rbf', random_state=0)

In [81]:
model_kmeans = model_kmeans.fit(df_clustering)

### Discretize

In [82]:
model_discretize = SpectralClustering(n_clusters=3, assign_labels='discretize', affinity='rbf', random_state=0)

In [83]:
model_discretize = model_discretize.fit(df_clustering)

### Cluster QR

In [84]:
model_cluster_qr = SpectralClustering(n_clusters=3, assign_labels='cluster_qr', affinity='rbf', random_state=0)

In [85]:
model_cluster_qr = model_cluster_qr.fit(df_clustering)

## Comparação

In [89]:
model_kmeans.labels_

array([2, 0, 1, 1, 2, 0, 0, 1, 1, 1, 0, 2, 0, 0, 2, 1, 2, 1, 0, 2, 2, 2,
       0, 1, 2], dtype=int32)

In [90]:
model_discretize.labels_

array([0, 2, 1, 1, 0, 2, 0, 1, 1, 1, 2, 0, 2, 2, 0, 1, 0, 1, 2, 0, 0, 0,
       2, 1, 0])

In [91]:
model_cluster_qr.labels_

array([2, 2, 1, 0, 1, 2, 2, 0, 0, 0, 2, 1, 2, 2, 1, 0, 1, 1, 2, 1, 1, 1,
       2, 0, 1])