In [27]:
import pandas as pd
import numpy as np

def view_tests(initializations, algorithms, file_path, style=True):
    """
    Perform tests on the initialization and algorithm methods.
    
    Parameters
    ----------
    initializations : list
        List of initialization methods
    algorithms : list
        List of algorithm methods
    file_path : str
        Path of the file to load
    
    Returns
    -------
    pd.DataFrame
        Table with algorithms as columns, each column divided for initializations and rows as: algorithms, centroid index, time execution
    """

    data_frames = {}

    # read table from path
    name = ''.join(file_path.split('/')[1:]).split('.')[0]


    for algorithm in algorithms:
        test_data = pd.read_csv(f'tests/{name}-{algorithm}.csv', sep=',', engine='python')

        test_data = test_data[test_data["initialization"].isin(initializations)]
        data_frames[algorithm] = test_data.groupby("initialization", sort=False).agg(lambda x: list(x))

        # compute means/maxs/mins and count
        data_frames[algorithm]['count'] = data_frames[algorithm]['TIME_EXECUTION'].apply(lambda x: len(x))
        
        if not data_frames[algorithm]['CENTROID_INDEX'].isna().any():
            data_frames[algorithm]['min_centroid_index'] = data_frames[algorithm]['CENTROID_INDEX'].apply(lambda x: np.min(x))
            data_frames[algorithm]['max_centroid_index'] = data_frames[algorithm]['CENTROID_INDEX'].apply(lambda x: np.max(x))
            data_frames[algorithm]['mean_centroid_index'] = data_frames[algorithm]['CENTROID_INDEX'].apply(lambda x: np.mean(x))
            data_frames[algorithm]['std_centroid_index'] = data_frames[algorithm]['CENTROID_INDEX'].apply(lambda x: np.std(x))
        
        data_frames[algorithm]['mean_time_execution'] = data_frames[algorithm]['TIME_EXECUTION'].apply(lambda x: np.mean(x))
        
        data_frames[algorithm]['min_final_cost'] = data_frames[algorithm]['FINAL_COST'].apply(lambda x: np.min(x))
        data_frames[algorithm]['mean_final_cost'] = data_frames[algorithm]['FINAL_COST'].apply(lambda x: np.mean(x))
        data_frames[algorithm]['max_final_cost'] = data_frames[algorithm]['FINAL_COST'].apply(lambda x: np.max(x))
        data_frames[algorithm]['std_final_cost'] = data_frames[algorithm]['FINAL_COST'].apply(lambda x: np.std(x))
        
        data_frames[algorithm]['mean_safe_iterations'] = data_frames[algorithm]['SAFE_ITERATIONS'].apply(lambda x: np.mean(x))
        data_frames[algorithm]['mean_unsafe_iterations'] = data_frames[algorithm]['UNSAFE_ITERATIONS'].apply(lambda x: np.mean(x))
        data_frames[algorithm]['mean_lloyd_iterations'] = data_frames[algorithm]['LLYOID_ITERATIONS'].apply(lambda x: np.mean(x))
        data_frames[algorithm]['mean_hartigan_iterations'] = data_frames[algorithm]['HARTIGAN_ITERATIONS'].apply(lambda x: np.mean(x))
        data_frames[algorithm]['mean_binary_iterations'] = data_frames[algorithm]['BINARY_ITERATIONS'].apply(lambda x: np.mean(x))
        data_frames[algorithm]['mean_tot_iterations'] = data_frames[algorithm]['SAFE_ITERATIONS'].apply(lambda x: np.mean(x)) + data_frames[algorithm]['UNSAFE_ITERATIONS'].apply(lambda x: np.mean(x)) + data_frames[algorithm]['LLYOID_ITERATIONS'].apply(lambda x: np.mean(x)) + data_frames[algorithm]['HARTIGAN_ITERATIONS'].apply(lambda x: np.mean(x)) + data_frames[algorithm]['BINARY_ITERATIONS'].apply(lambda x: np.mean(x))
        
        data_frames[algorithm]['mean_init_norm_calculations'] = data_frames[algorithm]['INIT_NORM_CALCULATIONS'].apply(lambda x: np.mean(x))
        data_frames[algorithm]['mean_norm_calculations'] = data_frames[algorithm]['NORM_CALCULATIONS'].apply(lambda x: np.mean(x))
        data_frames[algorithm]['mean_tot_norm_calculations'] = data_frames[algorithm]['INIT_NORM_CALCULATIONS'].apply(lambda x: np.mean(x)) + data_frames[algorithm]['NORM_CALCULATIONS'].apply(lambda x: np.mean(x))

        # drop columns
        data_frames[algorithm].drop(columns=['CENTROID_INDEX', 'TIME_EXECUTION', 'FINAL_COST', 'SAFE_ITERATIONS', 'UNSAFE_ITERATIONS', 'LLYOID_ITERATIONS', 'HARTIGAN_ITERATIONS', 'BINARY_ITERATIONS', 'INIT_NORM_CALCULATIONS', 'NORM_CALCULATIONS'], inplace=True)

    result_df = pd.concat(data_frames, axis=0, keys=algorithms, names=["Algorithm", "Initialization"])
    
    def color_specific_columns(x):
        df_styled = pd.DataFrame('', index=x.index, columns=x.columns)
        df_styled.loc[:, ['min_final_cost', 'mean_tot_iterations', 'mean_tot_norm_calculations', 'mean_centroid_index']] = 'font-weight: bold'
        return df_styled

    def add_vertical_lines(x):
        df_styled = pd.DataFrame('', index=x.index, columns=x.columns)
        # Add borders after columns B and C
        df_styled.loc[:, ['count', 'min_centroid_index', 'mean_time_execution', 'min_final_cost', 'mean_safe_iterations', 'mean_init_norm_calculations']] = 'border-left: 1px solid gray'
        return df_styled

    if style:
        # Apply the styles to the DataFrame
        result_df = result_df.style.apply(color_specific_columns, axis=None).apply(add_vertical_lines, axis=None)

    return result_df

def min_found(initializations, algorithms, file_path, treshold):
    """
    Returns the percentage of times a minimum (under a certain treshold) was found for each initialization and algorithm.


    Parameters
    ----------
    initializations : list
        List of initialization methods
    algorithms : list
        List of algorithm methods
    file_path : str
        Path of the file to load
    treshold : float
        Treshold for the minimum


    Returns
    -------
    pd.DataFrame
        Table with algorithms as columns, each column divided for initializations and rows as: algorithms, centroid index, time execution
    """
    # read table from path
    name = ''.join(file_path.split('/')[1:]).split('.')[0]

    dict_data = {}
    for algorithm in algorithms:
        data = pd.read_csv(f'tests/{name}-{algorithm}.csv', sep=',', engine='python')
        data = data[data["initialization"].isin(initializations)]
        data = data.groupby("initialization", sort=False).agg(lambda x: list(x))
        dict_data[algorithm] = data

    result_df = pd.DataFrame(columns=initializations, index=algorithms)

    for algorithm, data in dict_data.items():
        for initialization in initializations:
            # calculate the percentage of times a minimum was found
            min_found = np.sum(np.array(data['FINAL_COST'][initialization]) <= treshold) / len(data['FINAL_COST'][initialization])
            result_df.loc[algorithm, initialization] = min_found
    
    # all entries must have 3 numbers after the comma
    result_df = result_df.map(lambda x: f'{x:.3f}')
    
    return result_df

## A SETS

### A1

In [29]:
view_tests(['maximin', 'k-means++', 'greedy-k-means++'], ['lloyd', 'hartigan', 'extended-hartigan', 'mixed-hartigan', 'mixed-extended-hartigan'], 'data/A-Sets/a1.txt')

Unnamed: 0_level_0,Unnamed: 1_level_0,count,min_centroid_index,max_centroid_index,mean_centroid_index,std_centroid_index,mean_time_execution,min_final_cost,mean_final_cost,max_final_cost,std_final_cost,mean_safe_iterations,mean_unsafe_iterations,mean_lloyd_iterations,mean_hartigan_iterations,mean_binary_iterations,mean_tot_iterations,mean_init_norm_calculations,mean_norm_calculations,mean_tot_norm_calculations
Algorithm,Initialization,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
lloyd,maximin,1000,0,3,1.014,0.577758,0.0171,12146257522.258896,14841946964.405014,21841814398.116207,1657080428.488605,0.0,0.0,18.937,0.0,0.0,18.937,10.5,18.937,29.437
lloyd,k-means++,1000,0,4,1.532,0.764837,0.016652,12146257522.258896,16417649238.3536,25163192245.000164,2316354400.425482,0.0,0.0,16.501,0.0,0.0,16.501,10.5,16.501,27.001
lloyd,greedy-k-means++,1000,0,2,0.628,0.579324,0.061193,12146257522.258896,13717924072.308132,18879378855.630257,1499700904.516583,0.0,0.0,10.18,0.0,0.0,10.18,90.3,10.18,100.48
hartigan,maximin,1000,0,3,1.051,0.595314,1.241783,12146257522.258894,14916636982.657885,21606378507.168976,1701562833.030088,0.0,0.0,0.0,11.67,0.0,11.67,10.5,45.2951,55.7951
hartigan,k-means++,1000,0,4,1.51,0.768049,1.142173,12146257522.258896,16295308066.882206,26169761656.456,2317829227.62859,0.0,0.0,0.0,10.755,0.0,10.755,10.5,40.27775,50.77775
hartigan,greedy-k-means++,1000,0,2,0.639,0.569806,0.719298,12146257522.258894,13742493183.545908,19084660192.08032,1482971666.062825,0.0,0.0,0.0,7.196,0.0,7.196,90.3,19.9594,110.2594
extended-hartigan,maximin,1000,0,3,1.005,0.548612,1.484507,12146257522.258894,14767844303.212078,21606075084.30271,1557628467.297744,0.02,20.218,0.0,0.0,0.0,20.238,10.5,22.1618,32.6618
extended-hartigan,k-means++,1000,0,4,1.455,0.752313,1.397253,12146257522.258896,16162358519.17918,25306232141.39209,2277324643.338295,0.062,19.278,0.0,0.0,0.0,19.34,10.5,21.174,31.674
extended-hartigan,greedy-k-means++,1000,0,2,0.646,0.587098,0.925653,12146257522.258894,13737587933.751692,19084323233.64444,1519376173.215931,0.034,12.127,0.0,0.0,0.0,12.161,90.3,13.2771,103.5771
mixed-hartigan,maximin,1000,0,2,1.019,0.544646,0.382909,12146257522.258894,14805167169.49054,19033383097.766228,1518771315.064257,0.0,0.0,19.858,4.881,0.0,24.739,10.5,26.25405,36.75405


In [3]:
min_found(['maximin', 'k-means++', 'greedy-k-means++'], ['lloyd', 'hartigan', 'extended-hartigan', 'mixed-hartigan', 'mixed-extended-hartigan'], 'data/A-Sets/a1.txt', 12146257522.258896)

Unnamed: 0,maximin,k-means++,greedy-k-means++
lloyd,0.006,0.002,0.017
hartigan,0.012,0.004,0.045
extended-hartigan,0.018,0.003,0.029
mixed-hartigan,0.016,0.004,0.042
mixed-extended-hartigan,0.013,0.01,0.044


### A2

In [5]:
view_tests(['random', 'maximin', 'k-means++', 'greedy-k-means++'], ['lloyd', 'hartigan', 'extended-hartigan', 'mixed-hartigan', 'mixed-extended-hartigan'], 'data/A-Sets/a2.txt')

Unnamed: 0_level_0,Unnamed: 1_level_0,count,min_centroid_index,max_centroid_index,mean_centroid_index,std_centroid_index,mean_time_execution,min_final_cost,mean_final_cost,max_final_cost,std_final_cost,mean_safe_iterations,mean_unsafe_iterations,mean_lloyd_iterations,mean_hartigan_iterations,mean_binary_iterations,mean_tot_iterations,mean_init_norm_calculations,mean_norm_calculations,mean_tot_norm_calculations
Algorithm,Initialization,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
lloyd,maximin,1000,0,5,2.597,0.931982,0.042052,20286736641.652187,26843756945.622456,35414539297.90866,2464843591.951113,0.0,0.0,17.186,0.0,0.0,17.186,18.0,17.186,35.186
lloyd,k-means++,1000,0,6,2.905,1.034396,0.0471,20286817823.75128,27966703202.7594,38493380306.01308,3004942091.734379,0.0,0.0,19.434,0.0,0.0,19.434,18.0,19.434,37.434
lloyd,greedy-k-means++,1000,0,3,1.147,0.699565,0.251885,20286736641.652184,22953883768.901768,30052880407.23616,1695268897.218208,0.0,0.0,12.169,0.0,0.0,12.169,192.857143,12.169,205.026143
hartigan,maximin,1000,0,5,2.716,0.954643,2.676538,20286736641.65218,27208756265.97428,35410827796.57879,2544490310.177399,0.0,0.0,0.0,12.117,0.0,12.117,18.0,43.416229,61.416229
hartigan,k-means++,1000,0,6,2.885,1.04488,2.548427,20286736641.652187,27984463659.314816,38546834166.08014,3178070475.933357,0.0,0.0,0.0,11.663,0.0,11.663,18.0,40.924771,58.924771
hartigan,greedy-k-means++,1000,0,4,1.157,0.707355,1.576575,20286736641.65218,22935564915.09805,30471804871.576126,1682961032.380597,0.0,0.0,0.0,7.639,0.0,7.639,192.857143,18.830857,211.688
extended-hartigan,maximin,1000,0,5,2.613,0.935538,2.586507,20286736641.652184,26858530523.04812,33849993499.323875,2442177550.299162,0.07,21.204,0.0,0.0,0.0,21.274,18.0,22.432514,40.432514
extended-hartigan,k-means++,1000,0,6,2.925,1.005671,2.550372,20286736641.652184,28013447199.545464,38249025036.74405,2983727665.410038,0.08,20.955,0.0,0.0,0.0,21.035,18.0,22.179856,40.179856
extended-hartigan,greedy-k-means++,1000,0,4,1.152,0.689127,1.893244,20286736641.65218,22953918313.35801,28896676854.01431,1641912124.019049,0.049,13.7,0.0,0.0,0.0,13.749,192.857143,14.477514,207.334657
mixed-hartigan,maximin,1000,0,5,2.596,0.956443,0.884733,20286736641.65218,26809230626.549976,33761932752.446327,2507135288.406207,0.0,0.0,17.053,6.553,0.0,23.606,18.0,25.246486,43.246486


In [5]:
min_found(['maximin', 'k-means++', 'greedy-k-means++'], ['lloyd', 'hartigan', 'extended-hartigan', 'mixed-hartigan', 'mixed-extended-hartigan'], 'data/A-Sets/a2.txt', 20286736641.652190)

Unnamed: 0,maximin,k-means++,greedy-k-means++
lloyd,0.001,0.0,0.045
hartigan,0.008,0.002,0.156
extended-hartigan,0.006,0.003,0.147
mixed-hartigan,0.009,0.001,0.15
mixed-extended-hartigan,0.007,0.003,0.136


### A3

In [6]:
view_tests(['random', 'maximin', 'k-means++', 'greedy-k-means++'], ['lloyd', 'hartigan', 'extended-hartigan', 'mixed-hartigan', 'mixed-extended-hartigan'], 'data/A-Sets/a3.txt')

Unnamed: 0_level_0,Unnamed: 1_level_0,count,min_centroid_index,max_centroid_index,mean_centroid_index,std_centroid_index,mean_time_execution,min_final_cost,mean_final_cost,max_final_cost,std_final_cost,mean_safe_iterations,mean_unsafe_iterations,mean_lloyd_iterations,mean_hartigan_iterations,mean_binary_iterations,mean_tot_iterations,mean_init_norm_calculations,mean_norm_calculations,mean_tot_norm_calculations
Algorithm,Initialization,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
lloyd,maximin,1000,8,12,9.035,0.809799,0.046199,164585240119.08307,175851968870.9568,196778358020.06458,5695546516.435332,0.0,0.0,29.724,0.0,0.0,29.724,10.5,29.724,40.224
lloyd,k-means++,1000,6,12,9.35,0.991716,0.050354,164585237049.64572,180559176610.9741,211957316123.2877,7908215216.789349,0.0,0.0,33.091,0.0,0.0,33.091,10.5,33.091,43.591
lloyd,greedy-k-means++,1000,7,12,9.205,0.852628,0.133168,164582037167.3196,175698889771.94388,200809591260.65173,6225166647.956117,0.0,0.0,27.543,0.0,0.0,27.543,90.3,27.543,117.843
hartigan,maximin,1000,8,12,9.057,0.823256,5.303919,164577196497.5241,175887388928.6993,191879710583.69537,6152988650.011116,0.0,0.0,0.0,16.359,0.0,16.359,10.5,149.17445,159.67445
hartigan,k-means++,1000,7,12,9.317,1.022013,5.426811,164585137087.47238,179992630553.45337,219954135317.10205,7530812271.813953,0.0,0.0,0.0,17.714,0.0,17.714,10.5,145.3515,155.8515
hartigan,greedy-k-means++,1000,7,12,9.129,0.865077,4.296868,164576921621.3024,175545698775.40994,205323767865.7364,6650822837.906878,0.0,0.0,0.0,14.848,0.0,14.848,90.3,103.57595,193.87595
extended-hartigan,maximin,1000,8,12,8.988,0.801159,5.121286,164578137294.4276,175582838992.36612,196755883367.68976,5848637598.538556,0.03,29.675,0.0,0.0,0.0,29.705,10.5,32.5755,43.0755
extended-hartigan,k-means++,1000,7,12,9.307,1.019191,5.612993,164577225870.32077,180064903523.1966,211415777039.77344,8012556932.862264,0.043,32.601,0.0,0.0,0.0,32.644,10.5,35.8084,46.3084
extended-hartigan,greedy-k-means++,1000,7,12,9.062,0.849798,4.894619,164578166667.22424,175516295997.757,201759795165.71863,6191232050.592412,0.046,27.85,0.0,0.0,0.0,27.896,90.3,30.5856,120.8856
mixed-hartigan,maximin,1000,7,12,9.007,0.850265,0.751901,164578137294.4276,175840629579.6136,192530187476.57217,5701431051.910794,0.0,0.0,29.588,3.981,0.0,33.569,10.5,34.8856,45.3856


In [6]:
min_found(['maximin', 'k-means++', 'greedy-k-means++'], ['lloyd', 'hartigan', 'extended-hartigan', 'mixed-hartigan', 'mixed-extended-hartigan'], 'data/A-Sets/a3.txt', 164577200000)

Unnamed: 0,maximin,k-means++,greedy-k-means++
lloyd,0.0,0.0,0.0
hartigan,0.001,0.0,0.002
extended-hartigan,0.0,0.0,0.0
mixed-hartigan,0.0,0.0,0.002
mixed-extended-hartigan,0.0,0.001,0.002


In [7]:
min_found(['maximin', 'k-means++', 'greedy-k-means++'], ['lloyd', 'hartigan', 'extended-hartigan', 'mixed-hartigan', 'mixed-extended-hartigan'], 'data/A-Sets/a3.txt', 164588000000)


Unnamed: 0,maximin,k-means++,greedy-k-means++
lloyd,0.012,0.003,0.009
hartigan,0.011,0.003,0.006
extended-hartigan,0.015,0.001,0.005
mixed-hartigan,0.01,0.002,0.007
mixed-extended-hartigan,0.008,0.003,0.009


## BRIDGE

### $k = 5$

In [7]:
view_tests(['random', 'k-means++', 'maximin', 'greedy-k-means++'], ['lloyd', 'hartigan', 'extended-hartigan', 'mixed-hartigan', 'mixed-extended-hartigan'], 'data/bridge5.txt')

Unnamed: 0_level_0,Unnamed: 1_level_0,count,min_centroid_index,max_centroid_index,mean_centroid_index,std_centroid_index,mean_time_execution,min_final_cost,mean_final_cost,max_final_cost,std_final_cost,mean_safe_iterations,mean_unsafe_iterations,mean_lloyd_iterations,mean_hartigan_iterations,mean_binary_iterations,mean_tot_iterations,mean_init_norm_calculations,mean_norm_calculations,mean_tot_norm_calculations
Algorithm,Initialization,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
lloyd,maximin,1000,,,,,0.028214,37450555.033932,37453268.670207,37459577.740492,4023.292673,0.0,0.0,35.875,0.0,0.0,35.875,3.0,35.875,38.875
lloyd,k-means++,1000,,,,,0.025432,37450555.033932,37454942.018448,37459577.740492,4395.479542,0.0,0.0,32.809,0.0,0.0,32.809,3.0,32.809,35.809
lloyd,greedy-k-means++,1000,,,,,0.033547,37450555.033932,37454799.548028,37459577.740492,4389.563704,0.0,0.0,30.857,0.0,0.0,30.857,17.4,30.857,48.257
hartigan,maximin,1000,,,,,3.776096,37450552.016165,37450697.770185,37451188.496601,267.442233,0.0,0.0,0.0,21.956,0.0,21.956,3.0,738.0368,741.0368
hartigan,k-means++,1000,,,,,2.953359,37450552.016165,37450849.252528,37451188.496601,317.546334,0.0,0.0,0.0,20.302,0.0,20.302,3.0,491.9212,494.9212
hartigan,greedy-k-means++,1000,,,,,2.53046,37450552.016165,37450815.592782,37451262.213444,313.593815,0.0,0.0,0.0,18.693,0.0,18.693,17.4,371.5724,388.9724
extended-hartigan,maximin,1000,,,,,3.397551,37450552.016165,37450749.96158,37451188.496601,294.628574,0.0,39.52,0.0,0.0,0.0,39.52,3.0,54.928,57.928
extended-hartigan,k-means++,1000,,,,,3.341998,37450552.016165,37450895.7156,37451188.496601,317.220215,0.0,38.847,0.0,0.0,0.0,38.847,3.0,53.9858,56.9858
extended-hartigan,greedy-k-means++,1000,,,,,3.03895,37450552.016165,37450858.799735,37451188.496601,318.033931,0.002,34.914,0.0,0.0,0.0,34.916,17.4,48.4824,65.8824
mixed-hartigan,maximin,1000,,,,,0.520348,37450552.016165,37450736.595491,37451188.496601,288.810669,0.0,0.0,36.318,5.424,0.0,41.742,3.0,48.2196,51.2196


In [8]:
min_found(['maximin', 'k-means++', 'greedy-k-means++'], ['lloyd', 'hartigan', 'extended-hartigan', 'mixed-hartigan', 'mixed-extended-hartigan'], 'data/bridge5.txt', 37450552.1)

Unnamed: 0,maximin,k-means++,greedy-k-means++
lloyd,0.0,0.0,0.0
hartigan,0.771,0.533,0.586
extended-hartigan,0.689,0.46,0.518
mixed-hartigan,0.71,0.493,0.53
mixed-extended-hartigan,0.695,0.481,0.504


In [9]:
min_found(['maximin', 'k-means++', 'greedy-k-means++'], ['lloyd', 'hartigan', 'extended-hartigan', 'mixed-hartigan', 'mixed-extended-hartigan'], 'data/bridge5.txt', 37450555.1)

Unnamed: 0,maximin,k-means++,greedy-k-means++
lloyd,0.001,0.003,0.003
hartigan,0.771,0.533,0.586
extended-hartigan,0.689,0.46,0.518
mixed-hartigan,0.71,0.493,0.53
mixed-extended-hartigan,0.695,0.481,0.504


### $k=10$

In [8]:
view_tests(['random', 'k-means++', 'maximin', 'greedy-k-means++'], ['lloyd', 'hartigan', 'extended-hartigan', 'mixed-hartigan', 'mixed-extended-hartigan'], 'data/bridge.txt')

Unnamed: 0_level_0,Unnamed: 1_level_0,count,min_centroid_index,max_centroid_index,mean_centroid_index,std_centroid_index,mean_time_execution,min_final_cost,mean_final_cost,max_final_cost,std_final_cost,mean_safe_iterations,mean_unsafe_iterations,mean_lloyd_iterations,mean_hartigan_iterations,mean_binary_iterations,mean_tot_iterations,mean_init_norm_calculations,mean_norm_calculations,mean_tot_norm_calculations
Algorithm,Initialization,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
lloyd,maximin,1000,,,,,0.087036,29225968.220905,29531987.93478,30178156.723077,224223.172988,0.0,0.0,77.787,0.0,0.0,77.787,5.5,77.787,83.287
lloyd,k-means++,1000,,,,,0.069326,29225517.831373,29423289.949534,30510623.159897,201326.256425,0.0,0.0,59.219,0.0,0.0,59.219,5.5,59.219,64.719
lloyd,greedy-k-means++,1000,,,,,0.101128,29225342.919201,29411300.847063,30051638.981498,174425.267391,0.0,0.0,53.947,0.0,0.0,53.947,45.1,53.947,99.047
hartigan,maximin,1000,,,,,6.97077,29225354.03101,29466744.544103,30508125.546515,254432.618091,0.0,0.0,0.0,43.054,0.0,43.054,5.5,571.1634,576.6634
hartigan,k-means++,1000,,,,,4.896726,29224830.737353,29377827.453603,30031714.582518,166925.584188,0.0,0.0,0.0,32.019,0.0,32.019,5.5,381.3793,386.8793
hartigan,greedy-k-means++,1000,,,,,4.163406,29224840.566727,29372348.494121,30080292.338818,153329.252328,0.0,0.0,0.0,28.767,0.0,28.767,45.1,301.8495,346.9495
extended-hartigan,maximin,1000,,,,,6.838203,29225272.132593,29447786.726762,30036350.440556,207296.960477,0.117,79.45,0.0,0.0,0.0,79.567,5.5,95.2804,100.7804
extended-hartigan,k-means++,1000,,,,,5.060571,29224830.737353,29384879.138948,30441145.688745,167009.198763,0.146,58.318,0.0,0.0,0.0,58.464,5.5,69.956799,75.456799
extended-hartigan,greedy-k-means++,1000,,,,,4.839962,29224840.566727,29365669.732266,30033309.678663,152888.976124,0.13,55.57,0.0,0.0,0.0,55.7,45.1,66.64,111.74
mixed-hartigan,maximin,1000,,,,,1.459823,29225210.340865,29455132.684482,30464815.930057,216009.374226,0.0,0.0,79.689,14.009,0.0,93.698,5.5,121.5725,127.0725


In [10]:
min_found(['maximin', 'k-means++', 'greedy-k-means++'], ['lloyd', 'hartigan', 'extended-hartigan', 'mixed-hartigan', 'mixed-extended-hartigan'], 'data/bridge.txt', 29225400)

Unnamed: 0,maximin,k-means++,greedy-k-means++
lloyd,0.0,0.0,0.001
hartigan,0.002,0.061,0.045
extended-hartigan,0.008,0.058,0.045
mixed-hartigan,0.005,0.025,0.034
mixed-extended-hartigan,0.008,0.041,0.029


### $k = 100$

In [9]:
view_tests(['random', 'k-means++', 'maximin', 'greedy-k-means++'], ['lloyd', 'hartigan', 'extended-hartigan', 'mixed-hartigan', 'mixed-extended-hartigan'], 'data/bridge100.txt')

Unnamed: 0_level_0,Unnamed: 1_level_0,count,min_centroid_index,max_centroid_index,mean_centroid_index,std_centroid_index,mean_time_execution,min_final_cost,mean_final_cost,max_final_cost,std_final_cost,mean_safe_iterations,mean_unsafe_iterations,mean_lloyd_iterations,mean_hartigan_iterations,mean_binary_iterations,mean_tot_iterations,mean_init_norm_calculations,mean_norm_calculations,mean_tot_norm_calculations
Algorithm,Initialization,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
lloyd,maximin,1000,,,,,1.826454,15169044.143402,15413526.054082,15686285.11746,72752.984775,0.0,0.0,36.112,0.0,0.0,36.112,50.5,36.112,86.612
lloyd,k-means++,1000,,,,,1.81428,15006885.207483,15261330.634641,15637575.306265,93823.7056,0.0,0.0,31.943,0.0,0.0,31.943,50.5,31.943,82.443
lloyd,greedy-k-means++,1000,,,,,5.46633,14888158.119967,15080490.512997,15270653.332074,62057.236196,0.0,0.0,29.249,0.0,0.0,29.249,650.44,29.249,679.689
hartigan,maximin,1000,,,,,17.133349,14790342.159888,14950466.975632,15160982.251727,56336.430898,0.0,0.0,0.0,28.417,0.0,28.417,50.5,74.41258,124.91258
hartigan,k-means++,1000,,,,,15.168771,14715851.007302,14866516.266135,15142030.234913,61845.890402,0.0,0.0,0.0,26.137,0.0,26.137,50.5,65.579562,116.079562
hartigan,greedy-k-means++,1000,,,,,16.723263,14688170.172373,14804686.358514,14965883.122493,44555.312414,0.0,0.0,0.0,25.378,0.0,25.378,650.44,57.090485,707.530485
extended-hartigan,maximin,1000,,,,,8.090345,14734885.809208,14909871.736426,15130673.611199,54121.611623,2.492,54.952,0.0,0.0,0.0,57.444,50.5,58.550307,109.050307
extended-hartigan,k-means++,1000,,,,,7.812182,14720083.494351,14863172.313783,15136376.454066,67804.969647,2.494,52.163,0.0,0.0,0.0,54.657,50.5,55.72881,106.22881
extended-hartigan,greedy-k-means++,1000,,,,,11.356657,14660559.967231,14795875.915432,14954653.49646,42421.681991,2.619,51.183,0.0,0.0,0.0,53.802,650.44,54.857492,705.297492
mixed-hartigan,maximin,1000,,,,,8.200985,14790186.811389,14971580.971431,15171001.384794,56890.040181,0.0,0.0,36.65,24.421,0.0,61.071,50.5,74.423139,124.923139


In [11]:
min_found(['maximin', 'k-means++', 'greedy-k-means++'], ['lloyd', 'hartigan', 'extended-hartigan', 'mixed-hartigan', 'mixed-extended-hartigan'], 'data/bridge100.txt', 14700000)

Unnamed: 0,maximin,k-means++,greedy-k-means++
lloyd,0.0,0.0,0.0
hartigan,0.0,0.0,0.002
extended-hartigan,0.0,0.0,0.008
mixed-hartigan,0.0,0.0,0.0
mixed-extended-hartigan,0.0,0.0,0.007


In [12]:
min_found(['maximin', 'k-means++', 'greedy-k-means++'], ['lloyd', 'hartigan', 'extended-hartigan', 'mixed-hartigan', 'mixed-extended-hartigan'], 'data/bridge100.txt', 14770000)

Unnamed: 0,maximin,k-means++,greedy-k-means++
lloyd,0.0,0.0,0.0
hartigan,0.0,0.04,0.215
extended-hartigan,0.001,0.058,0.275
mixed-hartigan,0.0,0.02,0.21
mixed-extended-hartigan,0.0,0.041,0.244


In [13]:
min_found(['maximin', 'k-means++', 'greedy-k-means++'], ['lloyd', 'hartigan', 'extended-hartigan', 'mixed-hartigan', 'mixed-extended-hartigan'], 'data/bridge100.txt', 14910000)

Unnamed: 0,maximin,k-means++,greedy-k-means++
lloyd,0.0,0.0,0.003
hartigan,0.238,0.782,0.984
extended-hartigan,0.523,0.779,0.992
mixed-hartigan,0.134,0.693,0.987
mixed-extended-hartigan,0.232,0.773,0.997


## HOUSE

### $k=50$

In [10]:
view_tests(['random', 'k-means++', 'maximin', 'greedy-k-means++'], ['lloyd', 'hartigan', 'extended-hartigan', 'mixed-hartigan', 'mixed-extended-hartigan'], 'data/housec5.txt')

Unnamed: 0_level_0,Unnamed: 1_level_0,count,min_centroid_index,max_centroid_index,mean_centroid_index,std_centroid_index,mean_time_execution,min_final_cost,mean_final_cost,max_final_cost,std_final_cost,mean_safe_iterations,mean_unsafe_iterations,mean_lloyd_iterations,mean_hartigan_iterations,mean_binary_iterations,mean_tot_iterations,mean_init_norm_calculations,mean_norm_calculations,mean_tot_norm_calculations
Algorithm,Initialization,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
lloyd,maximin,1000,,,,,0.04983,371278.303206,382715.960453,395631.437444,4118.852818,0.0,0.0,22.487,0.0,0.0,22.487,25.5,22.487,47.987
lloyd,k-means++,1000,,,,,0.050114,370417.853799,380862.655932,402226.464774,4453.177247,0.0,0.0,25.127,0.0,0.0,25.127,25.5,25.127,50.627
lloyd,greedy-k-means++,1000,,,,,0.305142,370071.31556,378008.734612,390312.404085,3180.683966,0.0,0.0,22.077,0.0,0.0,22.077,275.4,22.077,297.477
hartigan,maximin,1000,,,,,2.120665,366520.755445,373468.045605,385246.417067,2978.59339,0.0,0.0,0.0,18.052,0.0,18.052,25.5,38.93813,64.43813
hartigan,k-means++,1000,,,,,2.278674,366595.165011,374296.035692,387971.630651,3389.335372,0.0,0.0,0.0,19.178,0.0,19.178,25.5,42.599092,68.099092
hartigan,greedy-k-means++,1000,,,,,2.520386,365974.942987,372578.933177,380593.031789,2535.719573,0.0,0.0,0.0,17.326,0.0,17.326,275.4,35.292574,310.692574
extended-hartigan,maximin,1000,,,,,2.298797,366674.505745,373218.323535,383225.645717,2731.395745,0.792,33.885,0.0,0.0,0.0,34.677,25.5,36.023144,61.523144
extended-hartigan,k-means++,1000,,,,,2.411476,366107.320671,373996.104347,391138.410258,3458.082248,0.834,36.615,0.0,0.0,0.0,37.449,25.5,38.906857,64.406857
extended-hartigan,greedy-k-means++,1000,,,,,2.580764,366018.516758,372496.688959,385531.234817,2525.460599,0.844,32.349,0.0,0.0,0.0,33.193,275.4,34.480703,309.880703
mixed-hartigan,maximin,1000,,,,,1.356694,366040.94744,373758.809011,385127.58505,3060.351235,0.0,0.0,22.368,15.137,0.0,37.505,25.5,43.534909,69.034909


In [14]:
min_found(['maximin', 'k-means++', 'greedy-k-means++'], ['lloyd', 'hartigan', 'extended-hartigan', 'mixed-hartigan', 'mixed-extended-hartigan'], 'data/housec5.txt', 370000)

Unnamed: 0,maximin,k-means++,greedy-k-means++
lloyd,0.0,0.0,0.0
hartigan,0.125,0.071,0.149
extended-hartigan,0.108,0.107,0.165
mixed-hartigan,0.103,0.079,0.138
mixed-extended-hartigan,0.105,0.085,0.167


In [15]:
min_found(['maximin', 'k-means++', 'greedy-k-means++'], ['lloyd', 'hartigan', 'extended-hartigan', 'mixed-hartigan', 'mixed-extended-hartigan'], 'data/housec5.txt', 371000)

Unnamed: 0,maximin,k-means++,greedy-k-means++
lloyd,0.0,0.001,0.006
hartigan,0.207,0.152,0.291
extended-hartigan,0.224,0.182,0.298
mixed-hartigan,0.198,0.147,0.268
mixed-extended-hartigan,0.195,0.156,0.308
