In [1]:
# To support both python 2 and python 3
from __future__ import division, print_function, unicode_literals

# Common imports
import numpy as np
import os
import pandas as pd

# to make this notebook's output stable across runs
np.random.seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12

# Where to save the figures
PROJECT_ROOT_DIR = "."
CHAPTER_ID = ""
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID)

def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)

In [2]:
birch_ch = pd.read_csv('./birch_tunning/birch_performance_Click_Weighted_L2_CH_branching.csv')
birch_stability = pd.read_csv('./birch_tunning/birch_performance_Click_Weighted_L2_Stability_branching.csv')

In [3]:
birch_ch.columns

Index(['branching_factor', 'threshold', 'n_subclusters', 'n_clusters',
       'CH score'],
      dtype='object')

In [4]:
birch_stability['CH score'] = 0.0
birch_stability['n_subclusters'] = 0
for index, row in birch_stability.iterrows():
    x = birch_ch[(birch_ch.branching_factor==row['branching_factor']) & 
                               (birch_ch.threshold==row['threshold']) & 
                               (birch_ch.n_clusters==row['n_clusters'])]['CH score']
    b = birch_ch[(birch_ch.branching_factor==row['branching_factor']) & 
                               (birch_ch.threshold==row['threshold']) & 
                               (birch_ch.n_clusters==row['n_clusters'])]['n_subclusters']
    if len(x)>0:
        birch_stability.loc[index,'CH score'] = float(x)
        birch_stability.loc[index,'n_subclusters'] = int(b)

In [5]:
birch_stability = birch_stability[['branching_factor', 'threshold', 'n_subclusters', 'n_clusters',
       'CH score','stability_mean','stability_std']]

In [13]:
birch_stability[(birch_stability.n_clusters==6)].sort_values(by=['stability_mean', 'CH score'], ascending=False)

Unnamed: 0,branching_factor,threshold,n_subclusters,n_clusters,CH score,stability_mean,stability_std
11,50,0.44,9,6,5739.61981,0.786462,0.007684
35,100,0.44,9,6,5739.61981,0.786462,0.007684
59,150,0.44,9,6,5739.61981,0.786462,0.007684
83,200,0.44,9,6,5739.61981,0.786462,0.007684
107,250,0.44,9,6,5739.61981,0.786462,0.007684
19,50,0.45,9,6,5791.388025,0.697611,0.069994
43,100,0.45,9,6,5791.388025,0.697611,0.069994
67,150,0.45,9,6,5791.388025,0.697611,0.069994
91,200,0.45,9,6,5791.388025,0.697611,0.069994
115,250,0.45,9,6,5791.388025,0.697611,0.069994
