In [22]:
import pandas as pd
import numpy as np
import os
from sklearn.datasets import make_classification

In [2]:
os.chdir('../')

In [3]:
from mc_hammer.cluster_measures import cvnn_sep, scatter, radial_density,dataset_midpoint_dist, dataset_meancenter_dist
from mc_hammer.cluster_measures import mean_center_dist,max_center_dist,max_diam, mean_max_diam,mean_all
from mc_hammer.similarity_functions import IGP, sillhouette_euclidean
from mc_hammer.circular_cluster_generator import generate_data
from mc_hammer.clustering_algorithms import k_means

## Functions

In [None]:
def gouss_dist(dim,seed_n):
    seed(seed_n)
    std = uniform(0.1,0.3)
    null_arr = np.array([np.random.normal(1,std,100) for i in range(dim)])
    return null_arr.T

In [None]:
def get_centers(x,labs):
    centers_list = []
    for k in range(max(labs)+1):
        one_clust = x[labs==k,:]
        center = one_clust.mean(axis = 0)
        centers_list.append(center)
    cent_array = np.array(centers_list)
    return cent_array

In [None]:
def gauss_clust_generator(k,dimx,sep,n,se,noise):
    noise_dim = int(round(dimx*noise))
    inf_dim = dimx - noise_dim
    x,labs = make_classification(
        n_samples = n,
        n_features = dimx,
        n_informative = inf_dim,
        n_redundant = noise_dim,
        n_classes = k,
        n_clusters_per_class = 1,
        class_sep = sep,
        random_state = se
    ) 
    x = x/x.max()
    centers = get_centers(x,labs)
    return x,labs,centers

In [None]:
def get_metrics_results(res_dicts,method,addit = None):
    res_list = [] 
    for i in range(len(res_dicts['x'])):
        x = res_dicts['x'][i]
        labs = res_dicts['labs'][i]
        centers = res_dicts['centers'][i]
        if method in ['mean_center_dist','max_center_dist']:
            res = eval(method + '(x,labs,centers)')
        elif method in ['max_diam','mean_max_diam','mean_all','scatter','cvnn_sep','IGP', 'sillhouette_euclidean']:
            res = eval(method + '(x,labs)')
        elif method in ['dataset_midpoint_dist', 'dataset_meancenter_dist']:
            res = eval(method + '(x,centers)')
        else:
            res = eval(method + '(x,centers,method)')
        res_list.append(res)
    return(res_list)

## Parameters

In [5]:
clusters = [2,3,4]
dim = [5,10,20]
sep = [0.5,1,3]
noise = [0,0.2,0.5]

## Uniform Distribution

In [11]:
uni_dis = {'uni_dis_dim_' +str(i):[np.random.rand(300,i) for j in range(100)] for i in dim}
uni_dis = {k+ '_k_'+str(i):{
    'x':v,
    'labs':[k_means(j,i) for j in v]} for k,v in uni_dis.items() for i in clusters}
uni_dis = {k:{
    'x':v['x'],
    'labs':[i[0] for i in v['labs']],
    'centers':[j[1] for j in v['labs']]} for k,v in uni_dis.items()
}

## Gaussian Distribution

In [None]:
gauss_dis = {'gauss_dis_dim_' +str(i):[np.random.rand(300,i) for j in range(100)] for i in dim}
gauss_dis = {k+ '_k_'+str(i):{
    'x':v,
    'labs':[k_means(j,i) for j in v]} for k,v in gauss_dis.items() for i in clusters}
gauss_dis = {k:{
    'x':v['x'],
    'labs':[i[0] for i in v['labs']],
    'centers':[j[1] for j in v['labs']]} for k,v in gauss_dis.items()
}

## Circle Clusters

In [None]:
circle_clust_equal = {'circ_clust_equal_dim_'+ str(i) + '_k_' +str(j) + '_sep_' + str(m) + '_noise_' +str(n):[generate_data(
    k = i,
    dimx = j,
    sep = m,
    n = int(round(300/i),
    sd =np.random.uniform(0.5,3),
    se = y,
    noise = n
) for y in range(100)] for i in clusters for j in dim for m in sep for n in noise}
circle_clust_equal = {k:{
    'x':[i[0] for i in v],
    'labs':[i[1] for i in v],
    'centers':[i[2] for i in v]
} for k,v in circle_clust_equal.items()}

In [None]:
circle_clust_unequal = {'circ_clust_unequal_dim_'+ str(i) + '_k_' +str(j) + '_sep_' + str(m) + '_noise_' +str(n):[generate_data(
    k = i,
    dimx = j,
    sep = m,
    n = int(round(300/i),
    sd =[np.random.uniform(0.5,3)for i in range(i)],
    se = y,
    noise = n
) for y in range(100)] for i in clusters for j in dim for m in sep for n in noise}
circle_clust_unequal = {k:{
    'x':[i[0] for i in v],
    'labs':[i[1] for i in v],
    'centers':[i[2] for i in v]
} for k,v in circle_clust_unequal.items()}

## Gaussian Clusters

In [38]:
gauss_clust = {'gauss_clust_dim_'+ str(i) + '_k_' +str(j) + '_sep_' + str(m) + '_noise_' +str(n):[gauss_clust_generator(
    k = i,
    dimx = j,
    sep = m,
    n = 300,
    se = y,
    noise = n
) for y in range(100)] for i in clusters for j in dim for m in sep for n in noise}
gauss_clust = {k:{
    'x':[i[0] for i in v],
    'labs':[i[1] for i in v],
    'centers':[i[2] for i in v]
} for k,v in gauss_clust.items()}

## Combine Dicts

In [None]:
full_dict = {**uni_dis,**gauss_dis,**circle_clust_equal,**circle_clust_unequal,**gauss_clust}

## Compactness

In [None]:
compact_list = ['mean_center_dist','max_center_dist','max_diam','mean_max_diam','mean_all']
comp_dict = {i:{k:[get_metrics_results(j,i) for i in v]} for i in compact_list}

In [None]:
radial_list = ['single_cluster_max','single_cluster_mean','ratio']
radial_dict = {'radial_density' +i:{k:[get_metrics_results(j,'radial_density',i) for i in v]} for i in radial_list}

In [None]:
comp_dict = {**comp_dict,**radial_dict}

## Seperation  

In [None]:
seperation_list = ['cvnn_sep', 'scatter' ,'dataset_midpoint_dist', 'dataset_meancenter_dist']
sep_dict = {i:{k:[get_metrics_results(j,i) for i in v]} for i in seperation_list}

## Other

In [None]:
other_list = ['IGP', 'sillhouette_euclidean']
other_dict = {i:{k:[get_metrics_results(j,i) for i in v]} for i in other_list}