## Pablo Valdunciel Sánchez 
### 18th October, 2019
Generate k + l (6 + 9) clusters with Kmeans. Consider the k + l centers as isolated examples and using AgglomerativeClustering (ward) reduce them to k centers. 

# Imports

In [1]:
import numpy as np 
import matplotlib.pyplot as plt

from sklearn.cluster import KMeans, AgglomerativeClustering
from sklearn.preprocessing import MinMaxScaler 
from sklearn.model_selection import StratifiedKFold
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score

from scipy.stats import mode

# Data preparation

In [2]:
# Obtener datos
dataset = load_iris() 
X = dataset["data"]
Y = dataset["target"]

# Normalizar atributos 
X = MinMaxScaler().fit_transform(X)

print(X.shape, Y.shape)

(150, 4) (150,)


#  Kmeans & AgglomerativeClustering combination

In [3]:
K = 6
L = 9

In [4]:
def get_indexes(target, myList):
    """ Returns the indexes where the list 'myList' 
        contains the specified element 'target'
    """
    for i in range(len(myList)):
        if myList[i] == target:
            yield i

### Obtain the K+L clusters

In [5]:
kmeans = KMeans(n_clusters=(K+L), random_state=0)

kmeans_prediction = kmeans.fit_predict(X)

kmeans_cluster_centers = kmeans.cluster_centers_
kmeans_cluster_centers

array([[0.62654321, 0.52777778, 0.78907721, 0.93518519],
       [0.20634921, 0.60912698, 0.08071025, 0.07142857],
       [0.421875  , 0.375     , 0.56038136, 0.52083333],
       [0.6547619 , 0.4047619 , 0.75544794, 0.86904762],
       [0.59375   , 0.40104167, 0.72033898, 0.72916667],
       [0.87037037, 0.38425926, 0.89265537, 0.78703704],
       [0.19444444, 0.125     , 0.38559322, 0.38541667],
       [0.10784314, 0.43137255, 0.06779661, 0.03921569],
       [0.36574074, 0.21527778, 0.49717514, 0.4375    ],
       [0.48148148, 0.47916667, 0.6440678 , 0.68055556],
       [0.36111111, 0.27777778, 0.66949153, 0.78472222],
       [0.91666667, 0.72222222, 0.91525424, 0.88888889],
       [0.30324074, 0.80208333, 0.08898305, 0.07291667],
       [0.54166667, 0.20833333, 0.68305085, 0.61666667],
       [0.65555556, 0.42083333, 0.60847458, 0.55      ]])

### Use AgglomerativeClustering (ward) to the K+L centers to get K clusters

In [6]:
agglomerative = AgglomerativeClustering(n_clusters=K, linkage='ward')

agglomerative_prediction = agglomerative.fit_predict(kmeans_cluster_centers)
agglomerative_prediction = list(map(lambda x: agglomerative_prediction[x], kmeans_prediction))

mode_values = [None]*K 

for i in range(K):
    cluster_indexes = list(get_indexes(i, agglomerative_prediction))
    cluster_values = Y[cluster_indexes]
    mode_values[i] = mode(cluster_values)[0]
    
final_prediction = np.fromiter(map(lambda x: mode_values[x], agglomerative_prediction), dtype=np.int)
hit_rate = accuracy_score(Y, final_prediction)


print("Hit rate = {:.3}%".format(hit_rate*100))

Hit rate = 88.0%
