In [29]:
import pandas as pd
import numpy as np
import os
import re
from sklearn.datasets import make_classification
import json
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler

In [2]:
os.chdir('../')

In [3]:
from mc_hammer.cluster_measures import cvnn_sep, scatter, radial_density,dataset_midpoint_dist, dataset_meancenter_dist
from mc_hammer.cluster_measures import mean_center_dist,max_center_dist,max_diam, mean_max_diam,mean_all
from mc_hammer.similarity_functions import IGP, sillhouette_euclidean
from mc_hammer.circular_cluster_generator import generate_data
from mc_hammer.clustering_algorithms import k_means

## Functions

In [4]:
def gouss_dist(dim,seed_n):
    seed(seed_n)
    std = uniform(0.1,0.3)
    null_arr = np.array([np.random.normal(1,std,100) for i in range(dim)])
    return null_arr.T

In [5]:
def get_centers(x,labs):
    centers_list = []
    for k in range(max(labs)+1):
        one_clust = x[labs==k,:]
        center = one_clust.mean(axis = 0)
        centers_list.append(center)
    cent_array = np.array(centers_list)
    return cent_array

In [6]:
def gauss_clust_generator(k,dimx,sep,n,se,noise):
    noise_dim = int(round(dimx*noise))
    inf_dim = dimx - noise_dim
    x,labs = make_classification(
        n_samples = n,
        n_features = dimx,
        n_informative = inf_dim,
        n_redundant = noise_dim,
        n_classes = k,
        n_clusters_per_class = 1,
        class_sep = sep,
        random_state = se
    ) 
    x = x/x.max()
    centers = get_centers(x,labs)
    return x,labs,centers

In [7]:
def get_metrics_results(res_dicts,method,addit = None):
    res_list = [] 
    for i in range(len(res_dicts['x'])):
        x = res_dicts['x'][i]
        labs = res_dicts['labs'][i]
        centers = res_dicts['centers'][i]
        if method in ['mean_all','mean_max_diam','max_diam','mean_center_dist','max_center_dist']:
            res = eval(method + '(x,labs,centers)')
        elif method in ['scatter','cvnn_sep','IGP', 'sillhouette_euclidean']:
            res = eval(method + '(x,labs)')
        elif method in ['dataset_midpoint_dist', 'dataset_meancenter_dist']:
            res = eval(method + '(x,centers)')
        else:
            res = eval(method + '(x,centers,labs,addit)')
        res_list.append(res)
    return(res_list)

In [8]:
def get_k(test_string):
    test_2 = re.sub('.*k_','',test_string,)
    k = int(re.sub('_.*','',test_2))
    return k

In [9]:
def list_sorter(metric_list,method):
    if method == 'max':
        new_list = [max(i) for i in metric_list]
    elif method == 'min':
        new_list = [min(i) for i in metric_list]
    else:
        new_list = [np.mean(i) for i in metric_list]
    return new_list

In [10]:
def name_delete(x):
    x = re.sub('_dim_.*_k_[0-9]{1,2}','',x)
    x = re.sub('km_','',x)
    x = re.sub('unequal_','',x)
    x = re.sub('equal_','',x)
    x = re.sub('__','_',x)
    return(x)

In [11]:
def get_grouped(metric_dict,keys_dict):
    new_dict = {k:[] for k in keys_dict.keys()}
    for k,v in keys_dict.items():
        for i in v:
            new_dict[k].append(metric_dict[i])
    new_dict = {k:[j for i in v for j in i] for k,v in new_dict.items()}
    return new_dict

In [12]:
def get_colours(clust_var):
    if ('gauss_dis' in clust_var) or ('uni_dis' in clust_var):
        return 'gray'
    elif 'circ_clust' in clust_var:
        return 'darkviolet'
    else:
        return 'lightseagreen'

In [13]:
def make_df(dist_dict,metric):
    df_dict = {k:pd.DataFrame({
        'clust type':[k for i in range(len(v))],
        'values':v,
        'metric':[metric for i in range(len(v))]}) for k,v in dist_dict.items()}
    test_pd = pd.concat(df_dict.values())
    return test_pd

In [35]:
def std_mean(x,full_v):
    v_list = [j for i in full_v.values() for j in i]
    min_v = min(v_list)
    max_v = max(v_list)
    new_mean = np.mean([(i-min_v)/(max_v-min_v) for i in x])
    return new_mean

In [104]:
def std_mean2(x,full_v):
    v_list = [j for i in full_v.values() for j in i]
    min_v = min(v_list)
    max_v = max(v_list)
    new_mean = [(i-min_v)/(max_v-min_v) for i in x]
    return new_mean

In [50]:
def name_shorten2(x):
    x = re.sub('.*_dim','dim',x)
    x = re.sub('_sep.*','',x)
    return x

In [116]:
def mean_dif(dist_dict):
    null_list = ['uni_dis','gauss_dis']
    new_dict = {}
    for i in null_list:
        idict = {k + '_' + i:v-dist_dict[i + '_' + name_shorten2(k)] for k,v in dist_dict.items() if 'clust' in k}
        new_dict.update(idict)
    return new_dict

In [117]:
def mean_dif2(dist_dict):
    null_list = ['uni_dis','gauss_dis']
    new_dict = {}
    for i in null_list:
        idict = {k + '_' + i:[v2-np.mean(dist_dict[i + '_' + name_shorten2(k)]) for v2 in v] for k,v in dist_dict.items() if 'clust' in k}
        new_dict.update(idict)
    return new_dict

In [150]:
def one_sided_count(test_dict):
    min_count = len([i for i in test_dict if i > 0])/len(test_dict)
    return max([min_count,1-min_count])

In [160]:
def res_dict(x_list):
    return{
        'min':min(x_list),
        'max':max(x_list),
        'range':max(x_list)-min(x_list),
        'mean':abs(np.mean(x_list)),
        'one_side':one_sided_count(x_list)
    }

## Parameters

In [14]:
clusters = [2,3,4]
dim = [5,10,20]
sep = [0.5,1,3]
noise = [0,0.2,0.5]

## Uniform Distribution

In [17]:
uni_dis = {'uni_dis_dim_' +str(i):[np.random.rand(300,i) for j in range(100)] for i in dim}
uni_dis = {k+ '_k_'+str(i):{
    'x':v,
    'labs':[k_means(j,i) for j in v]} for k,v in uni_dis.items() for i in clusters}
uni_dis = {k:{
    'x':v['x'],
    'labs':[i[0] for i in v['labs']],
    'centers':[j[1] for j in v['labs']]} for k,v in uni_dis.items()
}

## Gaussian Distribution

In [18]:
gauss_dis = {'gauss_dis_dim_' +str(i):[np.random.rand(300,i) for j in range(100)] for i in dim}
gauss_dis = {k+ '_k_'+str(i):{
    'x':v,
    'labs':[k_means(j,i) for j in v]} for k,v in gauss_dis.items() for i in clusters}
gauss_dis = {k:{
    'x':v['x'],
    'labs':[i[0] for i in v['labs']],
    'centers':[j[1] for j in v['labs']]} for k,v in gauss_dis.items()
}

## Circle Clusters

In [19]:
circle_clust_equal = {'circ_clust_equal_dim_'+ str(j) + '_k_' +str(i) + '_sep_' + str(m) + '_noise_' +str(n):[generate_data(
    k = i,
    dimx = j,
    sep = m,
    n = int(round(300/i)),
    sd =np.random.uniform(0.5,3),
    se = y,
    noise = n
) for y in range(100)] for i in clusters for j in dim for m in sep for n in noise}

circle_clust_equal = {k:{
    'x':[i[0] for i in v],
    'labs':[i[1] for i in v],
    'centers':[i[2] for i in v]
} for k,v in circle_clust_equal.items()}

circle_clust_equal_km = {'km_' +k:{
    'x':v['x'],
    'labs':[k_means(i,get_k(k)) for i in v['x']]} for k,v in circle_clust_equal.items()
 }

circle_clust_equal_km = {k:{
    'x':v['x'],
    'labs':[i[0] for i in v['labs']],
    'centers':[j[1] for j in v['labs']]} for k,v in circle_clust_equal_km.items()
}

In [20]:
circle_clust_unequal = {'circ_clust_unequal_dim_'+ str(j) + '_k_' +str(i) + '_sep_' + str(m) + '_noise_' +str(n):[generate_data(
    k = i,
    dimx = j,
    sep = m,
    n = int(round(300/i)),
    sd =[np.random.uniform(0.5,3)for i in range(i)],
    se = y,
    noise = n
) for y in range(100)] for i in clusters for j in dim for m in sep for n in noise}

circle_clust_unequal = {k:{
    'x':[i[0] for i in v],
    'labs':[i[1] for i in v],
    'centers':[i[2] for i in v]
} for k,v in circle_clust_unequal.items()}
    
circle_clust_unequal_km = {'km_' +k:{
    'x':v['x'],
    'labs':[k_means(i,get_k(k)) for i in v['x']]} for k,v in circle_clust_unequal.items()
 }
circle_clust_unequal_km = {k:{
    'x':v['x'],
    'labs':[i[0] for i in v['labs']],
    'centers':[j[1] for j in v['labs']]} for k,v in circle_clust_unequal_km.items()
}

## Gaussian Clusters

In [21]:
gauss_clust = {'gauss_clust_dim_'+ str(j) + '_k_' +str(i) + '_sep_' + str(m) + '_noise_' +str(n):[gauss_clust_generator(
    k = i,
    dimx = j,
    sep = m,
    n = 300,
    se = y,
    noise = n
) for y in range(100)] for i in clusters for j in dim for m in sep for n in noise}
gauss_clust = {k:{
    'x':[i[0] for i in v],
    'labs':[i[1] for i in v],
    'centers':[i[2] for i in v]
} for k,v in gauss_clust.items()}

## Combine Dicts

In [22]:
full_dict = {**uni_dis,**gauss_dis,**circle_clust_equal,**circle_clust_equal_km,**circle_clust_unequal,**circle_clust_unequal_km,**gauss_clust}

## Compactness

In [23]:
compact_list = ['mean_center_dist','max_center_dist','max_diam','mean_max_diam','mean_all']
comp_dict = {i:{k:get_metrics_results(v,i) for k,v in full_dict.items()} for i in compact_list}

In [24]:
radial_list = ['single_cluster_max','single_cluster_mean','ratio']
radial_dict = {'radial_density' +i:{k:get_metrics_results(v,'radial_density',i) for k,v in full_dict.items()} for i in radial_list}

In [25]:
comp_dict = {**comp_dict,**radial_dict}

## Seperation  

In [None]:
seperation_list = ['cvnn_sep', 'scatter' ,'dataset_midpoint_dist', 'dataset_meancenter_dist']
sep_dict = {i:{k:get_metrics_results(v,i) for k,v in full_dict.items()} for i in seperation_list}

## Other

In [None]:
other_list = ['IGP', 'sillhouette_euclidean']
other_dict = {i:{k:get_metrics_results(v,i) for k,v in full_dict.items()} for i in other_list}

## Cleaning

In [None]:
res_dicts = {'comp':comp_dict,'sep':sep_dict,'other':other_dict}
for k,v in res_dicts.items():
    with open('data/processed/cluster_measures/'+k+'.json','w') as f:
        json.dump(v,f)

In [None]:
comp2 = comp_dict.copy()
sep2 = sep_dict.copy()
other2 = other_dict.copy()

In [None]:
dict_list = [comp2,sep2,other2]
for i in dict_list:
        test_dict = {k + '_' + j:{k2:list_sorter(v2,j) for k2,v2 in v.items() if isinstance(v2[0],list)}for k,v in i.items()for j in ['max','min','mean']}
        i.update(test_dict)

In [None]:
comp2 = {k:{k2:v2 for k2,v2 in v.items() if isinstance(v2[0],list) == False} for k,v in comp2.items() }
sep2 = {k:{k2:v2 for k2,v2 in v.items() if isinstance(v2[0],list) == False} for k,v in sep2.items() }
other2 = {k:{k2:v2 for k2,v2 in v.items() if isinstance(v2[0],list) == False} for k,v in other2.items() }

In [None]:
comp2 = {k:v for k,v in comp2.items() if len(v) > 0}
sep2 = {k:v for k,v in sep2.items() if len(v) > 0}
other2 = {k:v for k,v in other2.items() if len(v) > 0 }

In [None]:
keys_list = list(full_dict.keys())
keys_list = set([name_delete(i) for i in keys_list])
keys_dict = {i:[] for i in keys_list}
for i in full_dict.keys():
    keys_dict[name_delete(i)].append(i)

In [None]:
comp2 = {k:get_grouped(v,keys_dict) for k,v in comp2.items()}
sep2 = {k:get_grouped(v,keys_dict) for k,v in sep2.items()}
other2 = {k:get_grouped(v,keys_dict) for k,v in other2.items()}

In [None]:
compdf = {k:make_df(v,k) for k,v in comp2.items()}
sepdf = {k:make_df(v,k) for k,v in sep2.items()}
otherdf = {k:make_df(v,k) for k,v in other2.items()}

In [None]:
compdf = pd.concat(compdf.values())
sepdf = pd.concat(sepdf.values())
otherdf = pd.concat(otherdf.values())

In [None]:
compdf

## Plotting

In [None]:
palette = {i:get_colours(i) for i in compdf['clust type'].unique()}


In [None]:
comp_plot = sns.displot(compdf,x = 'values',col = 'metric',hue = 'clust type',kind = 'kde',col_wrap =4,palette=palette)
sep_plot = sns.displot(sepdf,x = 'values',col = 'metric',hue = 'clust type',kind = 'kde',col_wrap =4,palette=palette)
other_plot = sns.displot(otherdf,x = 'values',col = 'metric',hue = 'clust type',kind = 'kde',col_wrap =4,palette=palette)

## Means

In [171]:
comp_means = comp_dict.copy()
sep_means = sep_dict.copy()
other_means = other_dict.copy()

In [170]:
dict_list = [comp_means,sep_means,other_means]
for i in dict_list:
    test_dict = {k + '_' + j:{k2:list_sorter(v2,j) for k2,v2 in v.items() if isinstance(v2[0],list)}for k,v in i.items()for j in ['max','min','mean']}
    i.update(test_dict)
comp_means = {k:{k2:v2 for k2,v2 in v.items() if isinstance(v2[0],list) == False} for k,v in comp_means.items() }
sep_means = {k:{k2:v2 for k2,v2 in v.items() if isinstance(v2[0],list) == False} for k,v in sep_means.items() }
other_means = {k:{k2:v2 for k2,v2 in v.items() if isinstance(v2[0],list) == False} for k,v in other_means.items() }
comp_means = {k:v for k,v in comp_means.items() if len(v) > 0}
sep_means = {k:v for k,v in sep_means.items() if len(v) > 0}
other_means = {k:v for k,v in other_means.items() if len(v) > 0 }

In [138]:
comp_means = {k:{k2:std_mean(v2,v)for k2,v2 in v.items()}  for k,v in comp_means.items()}
sep_means = {k:{k2:std_mean(v2,v) for k2,v2 in v.items()} for k,v in sep_means.items()}
other_means = {k:{k2:std_mean(v2,v) for k2,v2 in v.items()} for k,v in other_means.items()}

comp_dif = {k:mean_dif(v) for k,v in comp_means.items()}
sep_dif = {k:mean_dif(v) for k,v in sep_means.items()}
other_dif = {k:mean_dif(v) for k,v in other_means.items()}
comp_dif = {k:[i for i in v.values()] for k,v in comp_dif.items()}
sep_dif = {k:[i for i in v.values()]  for k,v in sep_dif.items()}
other_dif = {k:[i for i in v.values()] for k,v in other_dif.items()}
comp_dif2 = {k:v for k,v in comp_dif.items() if all(i > 0 for i in v) or all(i < 0 for i in v)} 
sep_dif2 = {k:v for k,v in sep_dif.items() if all(i > 0 for i in v) or all(i < 0 for i in v)} 
other_dif2 = {k:v for k,v in other_dif.items() if all(i > 0 for i in v) or all(i < 0 for i in v)} 
comp_dif2 = {k:min(v) if v[0] > 0 else max(v)for k,v in comp_dif2.items()}
sep_dif = {k:min(v) if v[0] > 0 else max(v) for k,v in sep_dif.items()}

In [139]:
comp_dif = {k:mean_dif(v) for k,v in comp_means.items()}
sep_dif = {k:mean_dif(v) for k,v in sep_means.items()}
other_dif = {k:mean_dif(v) for k,v in other_means.items()}
comp_dif = {k:[i for i in v.values()] for k,v in comp_dif.items()}
sep_dif = {k:[i for i in v.values()]  for k,v in sep_dif.items()}
other_dif = {k:[i for i in v.values()] for k,v in other_dif.items()}

In [162]:
comp_df = {k:res_dict(v) for k,v in comp_dif.items()}
comp_df = pd.DataFrame(comp_df.values())
comp_df['types'] = list(comp_dif.keys())

In [163]:
print(comp_df.sort_values(by = 'one_side',ascending = False).head())
print(comp_df.sort_values(by = 'range').head())
print(comp_df.sort_values(by = 'mean',ascending = False).head())

         min       max     range      mean  one_side                  types
2  -0.746404  0.083183  0.829587  0.201874  0.960494   mean_center_dist_min
14 -0.741620  0.081834  0.823454  0.198764  0.958025           mean_all_min
3  -0.658746  0.087379  0.746126  0.179321  0.944444  mean_center_dist_mean
15 -0.655769  0.086496  0.742266  0.176684  0.940741          mean_all_mean
1  -0.537456  0.094224  0.631680  0.123282  0.886420   mean_center_dist_max
         min       max     range      mean  one_side  \
0  -0.031016  0.041564  0.072580  0.002337  0.669136   
18 -0.015108  0.078542  0.093650  0.001859  0.669136   
17 -0.015108  0.078542  0.093650  0.001859  0.669136   
16 -0.015108  0.078542  0.093650  0.001859  0.669136   
19 -0.015013  0.120578  0.135591  0.002911  0.656790   

                                    types  
0                     radial_densityratio  
18  radial_densitysingle_cluster_max_mean  
17   radial_densitysingle_cluster_max_min  
16   radial_densitysingle_clust

In [164]:
sep_df = {k:res_dict(v) for k,v in sep_dif.items()}
sep_df = pd.DataFrame(sep_df.values())
sep_df['types'] = list(sep_dif.keys())

In [165]:
print(sep_df.sort_values(by = 'one_side',ascending = False).head())
print(sep_df.sort_values(by = 'range').head())
print(sep_df.sort_values(by = 'mean',ascending = False).head())

        min       max     range      mean  one_side  \
0 -0.614775  0.168067  0.782842       NaN  0.918519   
2 -0.070634  0.434558  0.505192  0.098438  0.885185   
4 -0.135353  0.510702  0.646055  0.094372  0.802469   
1 -0.638009  0.131146  0.769155  0.112451  0.775309   
5 -0.159644  0.504987  0.664631  0.085062  0.740741   

                         types  
0                     cvnn_sep  
2    dataset_midpoint_dist_max  
4   dataset_midpoint_dist_mean  
1                      scatter  
5  dataset_meancenter_dist_max  
        min       max     range      mean  one_side  \
2 -0.070634  0.434558  0.505192  0.098438  0.885185   
4 -0.135353  0.510702  0.646055  0.094372  0.802469   
5 -0.159644  0.504987  0.664631  0.085062  0.740741   
7 -0.197401  0.538106  0.735507  0.066839  0.619753   
6 -0.197775  0.559021  0.756796  0.028025  0.533333   

                          types  
2     dataset_midpoint_dist_max  
4    dataset_midpoint_dist_mean  
5   dataset_meancenter_dist_max  
7  d

In [166]:
other_df = {k:res_dict(v) for k,v in other_dif.items()}
other_df = pd.DataFrame(other_df.values())
other_df['types'] = list(other_dif.keys())

In [167]:
print(other_df.sort_values(by = 'one_side',ascending = False).head())
print(other_df.sort_values(by = 'range').head())
print(other_df.sort_values(by = 'mean',ascending = False).head())

        min       max     range      mean  one_side                  types
1 -0.207846  0.671837  0.879683  0.122230  0.746914  sillhouette_euclidean
0 -0.734678  0.480647  1.215324  0.065952  0.714815                    IGP
        min       max     range      mean  one_side                  types
1 -0.207846  0.671837  0.879683  0.122230  0.746914  sillhouette_euclidean
0 -0.734678  0.480647  1.215324  0.065952  0.714815                    IGP
        min       max     range      mean  one_side                  types
1 -0.207846  0.671837  0.879683  0.122230  0.746914  sillhouette_euclidean
0 -0.734678  0.480647  1.215324  0.065952  0.714815                    IGP


In [172]:
dict_list = [comp_means,sep_means,other_means]
for i in dict_list:
    test_dict = {k + '_' + j:{k2:list_sorter(v2,j) for k2,v2 in v.items() if isinstance(v2[0],list)}for k,v in i.items()for j in ['max','min','mean']}
    i.update(test_dict)
comp_means = {k:{k2:v2 for k2,v2 in v.items() if isinstance(v2[0],list) == False} for k,v in comp_means.items() }
sep_means = {k:{k2:v2 for k2,v2 in v.items() if isinstance(v2[0],list) == False} for k,v in sep_means.items() }
other_means = {k:{k2:v2 for k2,v2 in v.items() if isinstance(v2[0],list) == False} for k,v in other_means.items() }

In [194]:
comp = comp_means['mean_center_dist_min']
sep = sep_means['dataset_midpoint_dist_max']
divcomp_dict = {i:[comp[i][j]/sep[i][j] for j in range(len(comp[i]))] for i in comp.keys()}
divsep_dict = {i:[sep[i][j]/comp[i][j] for j in range(len(comp[i]))] for i in comp.keys()}
mult_dict = {i:[comp[i][j]*sep[i][j] for j in range(len(comp[i]))] for i in comp.keys()}
plus_dict = {i:[comp[i][j]+sep[i][j] for j in range(len(comp[i]))] for i in comp.keys()}
full_dict = {'comp/sep':divcomp_dict,'sep/comp':divsep_dict,'mult':mult_dict,'plus':plus_dict}

In [190]:
full_dict = {k:{k2:np.mean(v2) for k2,v2 in v.items()} for k,v in full_dict.items()} 
full_dict = {k:mean_dif(v) for k,v in full_dict.items()}
full_dict = {k:[i for i in v.values()] for k,v in full_dict.items()}

In [196]:
mult_dict

{'uni_dis_dim_5_k_2': [0.1671131205807805,
  0.1541666425127624,
  0.14941718574910634,
  0.15446689772489686,
  0.15189306575762004,
  0.15406611786504304,
  0.1581154474732962,
  0.16446171522698583,
  0.15875240603546986,
  0.14099398021520076,
  0.16413428179377,
  0.1507437343708236,
  0.1628139954152904,
  0.15547501657706436,
  0.17377641903467594,
  0.15922756240409025,
  0.16381599295319427,
  0.15250500235837702,
  0.1549105662144344,
  0.16179237971569302,
  0.1523629520667685,
  0.15831175918967114,
  0.15759441549676192,
  0.16712637952940043,
  0.15866763829823086,
  0.1546955087266498,
  0.15058646919013952,
  0.15366210687561646,
  0.17805996013375044,
  0.16187607900409537,
  0.16785341556866284,
  0.15004246404832572,
  0.16413948688020802,
  0.15942452087698814,
  0.1692789023836558,
  0.1603713459062089,
  0.16384464607861443,
  0.16300494345467822,
  0.15747241525362188,
  0.1517811727613431,
  0.15222663685159254,
  0.1529465885276213,
  0.15749420366009328,
  0.1

## Individual Mean

In [112]:
comp_ind = comp_dict.copy()
sep_ind = sep_dict.copy()
other_ind = other_dict.copy()

In [113]:
dict_list = [comp_ind,sep_ind,other_ind]
for i in dict_list:
    test_dict = {k + '_' + j:{k2:list_sorter(v2,j) for k2,v2 in v.items() if isinstance(v2[0],list)}for k,v in i.items()for j in ['max','min','mean']}
    i.update(test_dict)
comp_ind = {k:{k2:v2 for k2,v2 in v.items() if isinstance(v2[0],list) == False} for k,v in comp_ind.items() }
sep_ind = {k:{k2:v2 for k2,v2 in v.items() if isinstance(v2[0],list) == False} for k,v in sep_ind.items() }
other_ind = {k:{k2:v2 for k2,v2 in v.items() if isinstance(v2[0],list) == False} for k,v in other_ind.items() }
comp_ind = {k:v for k,v in comp_ind.items() if len(v) > 0}
sep_ind = {k:v for k,v in sep_ind.items() if len(v) > 0}
other_ind = {k:v for k,v in other_ind.items() if len(v) > 0 }

In [114]:
comp_ind = {k:{k2:std_mean2(v2,v)for k2,v2 in v.items()}  for k,v in comp_ind.items()}
sep_ind = {k:{k2:std_mean2(v2,v) for k2,v2 in v.items()} for k,v in sep_ind.items()}
other_ind = {k:{k2:std_mean2(v2,v) for k2,v2 in v.items()} for k,v in other_ind.items()}

In [115]:
other_ind

{'IGP': {'uni_dis_dim_5_k_2': [0.887596836538306,
   0.9217002818443395,
   0.9033750940615654,
   0.8916834976215393,
   0.8820888507107084,
   0.9356713709455119,
   0.9349754649990216,
   0.9262908626654124,
   0.8823051948051949,
   0.9037892030105602,
   0.9035686535686537,
   0.8779790364743119,
   0.8756636481721166,
   0.9304926936505884,
   0.8730983302411874,
   0.9220640668009089,
   0.9168281776977429,
   0.9304267161410018,
   0.873744614812459,
   0.9087860929966193,
   0.9047619047619049,
   0.9220167360973619,
   0.9194847020933977,
   0.9221653767108312,
   0.9078801916092015,
   0.9087946867058513,
   0.9049448882596826,
   0.887898530835127,
   0.8996985157699443,
   0.9390376031087934,
   0.9399905649905651,
   0.9084249084249084,
   0.860450000955822,
   0.943793948964211,
   0.9219012908510219,
   0.9121651295564338,
   0.8961570009172125,
   0.8622904872904873,
   0.8642139782742366,
   0.9341954897012887,
   0.9374231965696745,
   0.8917748917748919,
   0.892044

In [111]:
comp_ind = {k:{k2:mean_dif2(v2)for k2,v2 in v.items()} for k,v in comp_ind.items()}
sep_ind = {k:{k2:mean_dif2(v2)for k2,v2 in v.items()} for k,v in sep_ind.items()}
other_ind = {k:{k2:mean_dif2(v2)for k2,v2 in v.items()} for k,v in other_ind.items()}

AttributeError: 'list' object has no attribute 'items'

## testing

In [143]:
test_dict = comp_dif['radial_densityratio']

In [147]:
new_dict = {'zero_count':}

0.6691358024691358

In [26]:
with open('data/processed/cluster_measures/sep.json') as f:
    sep_dict = json.load(f)
with open('data/processed/cluster_measures/other.json') as f:
    other_dict = json.load(f)

In [32]:
full_v = other_means['IGP']

In [34]:
min([j for i in full_v.values() for j in i])

0.23