# Load data

In [None]:
from sklearn import cluster
from neupy import algorithms
import seaborn as sns

import data
import plot
import evaluation
from conf import N_CLUSTERS


data_bin, words, dim = data.load_vectors("binvectors256.vec", read_first_words = 10000, sample_words=1000)

# Cluster using ART1
## Cluster visualization

In [None]:
from conf import ART_VIGILANCE, ART_LEARNING_RATE


artnet = algorithms.ART1(
        step=ART_LEARNING_RATE,
        rho=ART_VIGILANCE,
        n_clusters=N_CLUSTERS,
        shuffle_data=False
    )
evaluation.report_model(artnet, data_bin, words, "ART")

## Explore parameters

In [None]:
results = evaluation.art_explore_parameters(data_bin, vigilances=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0], learning_rates = None, n_clusters_settings=[5,10,20,50, 100, 200])


In [None]:
sns.lineplot(data=results, x="vigilance", y="silhouette_score", style="learning_rate", hue="n_clusters", palette="hls")

In [None]:
sns.lineplot(data=results, x="vigilance", y="min_cluster_size", style="learning_rate", hue="n_clusters", palette="hls")

# Other clustering methods

## Agglomerative clustering

In [None]:

# For binary, manhattan=hamming
agg = cluster.AgglomerativeClustering(n_clusters=N_CLUSTERS, affinity="manhattan", linkage="average")
evaluation.report_model(agg, data_bin, words, "Agglomerative clustering")

## OPTICS

In [None]:

opt = cluster.OPTICS(metric="hamming")
evaluation.report_model(opt, data_bin, words, "OPTICS")

## Affinity propagation (distance metric correct?)

In [None]:
aff = cluster.AffinityPropagation(random_state=None)
evaluation.report_model(aff, data_bin, words, "Affinity propagation")

## SOM (NeuPy)

In [None]:
import numpy as np


som_neupy = algorithms.competitive.sofm.SOFM(n_inputs=dim, n_outputs=N_CLUSTERS)
evaluation.report_model(som_neupy, data_bin, words, "SOM-neupy")

## SOM (MiniSOM)

In [None]:
from minisom import MiniSom
som_minisom = MiniSom(1, N_CLUSTERS, dim, learning_rate=0.1, sigma=0.5)
evaluation.report_model(som_minisom, data_bin, words, "SOM-minisom", n_clusters=N_CLUSTERS)