## Imports

In [1]:
import sys
!{sys.executable} -m pip install numpy matplotlib pandas scikit-learn tabulate

You should consider upgrading via the '/home/patrick/git/google-review-analysis-proj/venv/bin/python -m pip install --upgrade pip' command.[0m


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tabulate import tabulate 

from sklearn.cluster import KMeans
from sklearn.decomposition import PCA

## Data Processing

In [3]:
rdata = pd.read_csv("google_review_ratings.csv")
rdata = rdata.drop(["Unnamed: 25"], axis=1)
rdata.columns = ["ID", "churches", "resorts", "beaches", "parks", "theatres", "museums", "malls", "zoo", "restaurants", "pubs/bars", "local services", "burger/pizza shops", "hotels/other lodgings", "juice bars", "art galleries", "dance clubs", "swimming pools", "gyms", "bakeries", "beauty & spas", "cafes", "view points", "monuments", "gardens"]
pdata = rdata.drop(["ID"], axis=1)

In [4]:
rdata

Unnamed: 0,ID,churches,resorts,beaches,parks,theatres,museums,malls,zoo,restaurants,...,art galleries,dance clubs,swimming pools,gyms,bakeries,beauty & spas,cafes,view points,monuments,gardens
0,User 1,0.00,0.00,3.63,3.65,5.00,2.92,5.00,2.35,2.33,...,1.74,0.59,0.50,0.00,0.50,0.00,0.00,0.0,0.0,0.00
1,User 2,0.00,0.00,3.63,3.65,5.00,2.92,5.00,2.64,2.33,...,1.74,0.59,0.50,0.00,0.50,0.00,0.00,0.0,0.0,0.00
2,User 3,0.00,0.00,3.63,3.63,5.00,2.92,5.00,2.64,2.33,...,1.74,0.59,0.50,0.00,0.50,0.00,0.00,0.0,0.0,0.00
3,User 4,0.00,0.50,3.63,3.63,5.00,2.92,5.00,2.35,2.33,...,1.74,0.59,0.50,0.00,0.50,0.00,0.00,0.0,0.0,0.00
4,User 5,0.00,0.00,3.63,3.63,5.00,2.92,5.00,2.64,2.33,...,1.74,0.59,0.50,0.00,0.50,0.00,0.00,0.0,0.0,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5451,User 5452,0.91,5.00,4.00,2.79,2.77,2.57,2.43,1.09,1.77,...,5.00,0.66,0.65,0.66,0.69,5.00,1.05,5.0,5.0,1.56
5452,User 5453,0.93,5.00,4.02,2.79,2.78,2.57,1.77,1.07,1.76,...,0.89,0.65,0.64,0.65,1.59,1.62,1.06,5.0,5.0,1.09
5453,User 5454,0.94,5.00,4.03,2.80,2.78,2.57,1.75,1.05,1.75,...,0.87,0.65,0.63,0.64,0.74,5.00,1.07,5.0,5.0,1.11
5454,User 5455,0.95,4.05,4.05,2.81,2.79,2.44,1.76,1.03,1.74,...,5.00,0.64,0.63,0.64,0.75,5.00,1.08,5.0,5.0,1.12


## Algorithm Construction

In [5]:
# from http://godsnotwheregodsnot.blogspot.com/2013/11/kmeans-color-quantization-seeding.html
# color set specific designed for displaying K-means clustering on graph
LABEL_COLOR_MAP={0: "#000000", 1: "#FFFF00", 2: "#1CE6FF", 3: "#FF34FF", 4: "#FF4A46", 5: "#008941", 6: "#006FA6", 7: "#A30059", 8: "#FFDBE5", 9: "#7A4900", 10: "#0000A6", 11: "#63FFAC", 12: "#B79762", 13: "#004D43", 14: "#8FB0FF", 15: "#997D87", 16: "#5A0007", 17: "#809693", 18: "#FEFFE6", 19: "#1B4400", 20: "#4FC601", 21: "#3B5DFF", 22: "#4A3B53", 23: "#FF2F80", 24: "#61615A", 25: "#BA0900", 26: "#6B7900", 27: "#00C2A0", 28: "#FFAA92", 29: "#FF90C9", 30: "#B903AA", 31: "#D16100", 32: "#DDEFFF", 33: "#000035", 34: "#7B4F4B", 35: "#A1C299", 36: "#300018", 37: "#0AA6D8", 38: "#013349", 39: "#00846F", 40: "#372101", 41: "#FFB500", 42: "#C2FFED", 43: "#A079BF", 44: "#CC0744", 45: "#C0B9B2", 46: "#C2FF99", 47: "#001E09", 48: "#00489C", 49: "#6F0062", 50: "#0CBD66", 51: "#EEC3FF", 52: "#456D75", 53: "#B77B68", 54: "#7A87A1", 55: "#788D66", 56: "#885578", 57: "#FAD09F", 58: "#FF8A9A", 59: "#D157A0", 60: "#BEC459", 61: "#456648", 62: "#0086ED", 63: "#886F4C", 64: "#34362D", 65: "#B4A8BD", 66: "#00A6AA", 67: "#452C2C", 68: "#636375", 69: "#A3C8C9", 70: "#FF913F", 71: "#938A81", 72: "#575329", 73: "#00FECF", 74: "#B05B6F", 75: "#8CD0FF", 76: "#3B9700", 77: "#04F757", 78: "#C8A1A1", 79: "#1E6E00", 80: "#7900D7", 81: "#A77500", 82: "#6367A9", 83: "#A05837", 84: "#6B002C", 85: "#772600", 86: "#D790FF", 87: "#9B9700", 88: "#549E79", 89: "#FFF69F", 90: "#201625", 91: "#72418F", 92: "#BC23FF", 93: "#99ADC0", 94: "#3A2465", 95: "#922329", 96: "#5B4534", 97: "#FDE8DC", 98: "#404E55", 99: "#0089A3", 100: "#CB7E98", 101: "#A4E804", 102: "#324E72", 103: "#6A3A4C", 104: "#83AB58", 105: "#001C1E", 106: "#D1F7CE", 107: "#004B28", 108: "#C8D0F6", 109: "#A3A489", 110: "#806C66", 111: "#222800", 112: "#BF5650", 113: "#E83000", 114: "#66796D", 115: "#DA007C", 116: "#FF1A59", 117: "#8ADBB4", 118: "#1E0200", 119: "#5B4E51", 120: "#C895C5", 121: "#320033", 122: "#FF6832", 123: "#66E1D3", 124: "#CFCDAC", 125: "#D0AC94", 126: "#7ED379", 127: "#012C58", 128: "#7A7BFF", 129: "#D68E01", 130: "#353339", 131: "#78AFA1", 132: "#FEB2C6", 133: "#75797C", 134: "#837393", 135: "#943A4D", 136: "#B5F4FF", 137: "#D2DCD5", 138: "#9556BD", 139: "#6A714A", 140: "#001325", 141: "#02525F", 142: "#0AA3F7", 143: "#E98176", 144: "#DBD5DD", 145: "#5EBCD1", 146: "#3D4F44", 147: "#7E6405", 148: "#02684E", 149: "#962B75", 150: "#8D8546", 151: "#9695C5", 152: "#E773CE", 153: "#D86A78", 154: "#3E89BE", 155: "#CA834E", 156: "#518A87", 157: "#5B113C", 158: "#55813B", 159: "#E704C4", 160: "#00005F", 161: "#A97399", 162: "#4B8160", 163: "#59738A", 164: "#FF5DA7", 165: "#F7C9BF", 166: "#643127", 167: "#513A01", 168: "#6B94AA", 169: "#51A058", 170: "#A45B02", 171: "#1D1702", 172: "#E20027", 173: "#E7AB63", 174: "#4C6001", 175: "#9C6966", 176: "#64547B", 177: "#97979E", 178: "#006A66", 179: "#391406", 180: "#F4D749", 181: "#0045D2", 182: "#006C31", 183: "#DDB6D0", 184: "#7C6571", 185: "#9FB2A4", 186: "#00D891", 187: "#15A08A", 188: "#BC65E9", 189: "#FFFFFE", 190: "#C6DC99", 191: "#203B3C", 192: "#671190", 193: "#6B3A64", 194: "#F5E1FF", 195: "#FFA0F2", 196: "#CCAA35", 197: "#374527", 198: "#8BB400", 199: "#797868", 200: "#C6005A", 201: "#3B000A", 202: "#C86240", 203: "#29607C", 204: "#402334", 205: "#7D5A44", 206: "#CCB87C", 207: "#B88183", 208: "#AA5199", 209: "#B5D6C3", 210: "#A38469", 211: "#9F94F0", 212: "#A74571", 213: "#B894A6", 214: "#71BB8C", 215: "#00B433", 216: "#789EC9", 217: "#6D80BA", 218: "#953F00", 219: "#5EFF03", 220: "#E4FFFC", 221: "#1BE177", 222: "#BCB1E5", 223: "#76912F", 224: "#003109", 225: "#0060CD", 226: "#D20096", 227: "#895563", 228: "#29201D", 229: "#5B3213", 230: "#A76F42", 231: "#89412E", 232: "#1A3A2A", 233: "#494B5A", 234: "#A88C85", 235: "#F4ABAA", 236: "#A3F3AB", 237: "#00C6C8", 238: "#EA8B66", 239: "#958A9F", 240: "#BDC9D2", 241: "#9FA064", 242: "#BE4700", 243: "#658188", 244: "#83A485", 245: "#453C23", 246: "#47675D", 247: "#3A3F00", 248: "#061203", 249: "#DFFB71", 250: "#868E7E", 251: "#98D058", 252: "#6C8F7D", 253: "#D7BFC2", 254: "#3C3E6E", 255: "#D83D66", 256: "#2F5D9B", 257: "#6C5E46", 258: "#D25B88", 259: "#5B656C", 260: "#00B57F", 261: "#545C46", 262: "#866097", 263: "#365D25", 264: "#252F99", 265: "#00CCFF", 266: "#674E60", 267: "#FC009C", 268: "#92896B"}

In [6]:
# method from https://stackoverflow.com/questions/36195457/python-sklearn-kmeans-how-to-get-the-samples-points-in-each-clusters#43696871
def ClusterIndices(clustNum, labels_array):
    return np.where(labels_array == clustNum)[0]


best_var_dict = {}
    
for i in range(3, 26):
        
    print("## Wild K-Means Analysis with c="+str(i))
        
    kmeans = KMeans(n_clusters=i).fit(pdata)
    centroids = kmeans.cluster_centers_

    labels = kmeans.predict(pdata)
    C = kmeans.cluster_centers_  
            
    pca = PCA(n_components=2)
    pca_fit = pca.fit(pdata)
    principalComponents = pca_fit.transform(pdata)
    principalDf = pd.DataFrame(data = principalComponents, columns = ['x', 'y'])
        
        
    label_color = [LABEL_COLOR_MAP[l] for l in labels]
    
    fig = plt.figure(figsize = (10,10))
    ax = fig.add_subplot(1,1,1)
    plt.scatter(principalDf['x'], principalDf['y'],c=label_color, s=50, alpha=0.5)

    C_transformed = pca_fit.transform(C)
    plt.scatter(C_transformed[:, 0], C_transformed[:, 1], s=200, marker=('x'))
    plt.savefig("./img/"+str(i-2)+".png")
    plt.close()
    print("![fig."+str(i-2)+" Greedy K-Mean Clustering Graph with c="+str(i)+"](./img/"+str(i-2)+".png)")
    print()
        
    t_var = []
        
    for j in range(len(C)):
        print()
        print("### Cluster "+str(j+1))
        print()
        print("Centroid: "+ str(C[j]))
        print()
        var = pdata.loc[ClusterIndices(j, kmeans.labels_)].var()
        t_var.append(var.mean()) 
        dmeans = pdata.loc[ClusterIndices(j, kmeans.labels_)].mean().to_frame(name="mean").transpose()
        dvar = pdata.loc[ClusterIndices(j, kmeans.labels_)].var().to_frame(name="variance").transpose()
        d = pd.concat([dmeans, dvar]).transpose()
        print(tabulate(d, headers='keys', tablefmt='github'))
            
    best_var_dict[i] = np.mean(t_var)

## Wild K-Means Analysis with c=3
![fig.1 Greedy K-Mean Clustering Graph with c=3](./img/1.png)


### Cluster 1

Centroid: [2.25493519 2.71833333 2.56810185 2.36301852 2.0894537  1.91106481
 2.00037963 1.58012963 1.89081481 1.61237037 1.55577778 1.42199074
 1.50976852 1.84366667 2.35897222 1.68923148 1.78537963 1.81214815
 2.29741667 2.13661111 1.81018519 2.73103704 2.37507407 2.46707407]

|                       |    mean |   variance |
|-----------------------|---------|------------|
| churches              | 2.25269 |   0.76832  |
| resorts               | 2.717   |   1.00223  |
| beaches               | 2.57172 |   0.915354 |
| parks                 | 2.36591 |   0.778003 |
| theatres              | 2.08972 |   0.309992 |
| museums               | 1.91156 |   0.244622 |
| malls                 | 2.00047 |   0.723042 |
| zoo                   | 1.58018 |   0.167467 |
| restaurants           | 1.89335 |   0.82452  |
| pubs/bars             | 1.61539 |   0.2902   |
| local services   

![fig.3 Greedy K-Mean Clustering Graph with c=5](./img/3.png)


### Cluster 1

Centroid: [1.63314094 2.22304698 3.11257718 4.29531544 4.07797315 3.1866443
 2.72312752 2.19012081 2.65692617 2.71104698 2.684      1.59542282
 1.84455034 1.23002685 1.16048322 1.20908725 0.86826846 0.67261745
 0.61868456 0.97072483 1.03426846 4.72649664 2.54640268 1.78332886]

|                       |     mean |   variance |
|-----------------------|----------|------------|
| churches              | 1.63314  |  0.359983  |
| resorts               | 2.22305  |  1.0057    |
| beaches               | 3.11258  |  2.24504   |
| parks                 | 4.29532  |  1.27734   |
| theatres              | 4.07797  |  1.29874   |
| museums               | 3.18664  |  1.23538   |
| malls                 | 2.72313  |  1.00843   |
| zoo                   | 2.19012  |  0.432645  |
| restaurants           | 2.65693  |  1.43464   |
| pubs/bars             | 2.71105  |  1.42583   |
| local services        | 2.684    |  2.38

![fig.5 Greedy K-Mean Clustering Graph with c=7](./img/5.png)


### Cluster 1

Centroid: [2.33036055 2.62969247 2.42846235 2.19038176 2.01664899 1.85097561
 1.95147402 1.58772004 1.75697773 1.60793213 1.59622481 1.4659597
 1.57019088 1.9252386  2.41932131 1.73763521 1.9164263  1.94865323
 2.46387063 2.08283139 1.87937434 2.64239661 2.45450689 2.60149523]

|                       |    mean |   variance |
|-----------------------|---------|------------|
| churches              | 2.33036 |   0.658046 |
| resorts               | 2.62969 |   0.848005 |
| beaches               | 2.42846 |   0.607171 |
| parks                 | 2.19038 |   0.440731 |
| theatres              | 2.01665 |   0.242848 |
| museums               | 1.85098 |   0.213481 |
| malls                 | 1.95147 |   0.725194 |
| zoo                   | 1.58772 |   0.192462 |
| restaurants           | 1.75698 |   0.437765 |
| pubs/bars             | 1.60793 |   0.152331 |
| local services        | 1.59622 |   0.23751  |
| bur

![fig.6 Greedy K-Mean Clustering Graph with c=8](./img/6.png)


### Cluster 1

Centroid: [1.35530612 2.7227708  3.15417582 3.693281   4.42747253 3.88744113
 3.71854788 2.47459969 2.60443485 2.48897959 2.10736264 2.09750392
 2.0072135  1.69167975 1.31472527 1.03866562 0.66320251 0.4822449
 0.54271586 0.60580848 0.74300628 0.84321036 1.50761381 1.69383046]

|                       |     mean |   variance |
|-----------------------|----------|------------|
| churches              | 1.35531  |   0.457025 |
| resorts               | 2.72277  |   2.39707  |
| beaches               | 3.15418  |   1.75731  |
| parks                 | 3.69328  |   1.34543  |
| theatres              | 4.42747  |   0.725566 |
| museums               | 3.88744  |   1.29948  |
| malls                 | 3.71855  |   1.71086  |
| zoo                   | 2.4746   |   0.788501 |
| restaurants           | 2.60443  |   1.03047  |
| pubs/bars             | 2.48898  |   0.993337 |
| local services        | 2.10736  |   1.0

![fig.7 Greedy K-Mean Clustering Graph with c=9](./img/7.png)


### Cluster 1

Centroid: [0.64173278 0.90244259 1.58471816 1.56693111 1.58269311 1.60204593
 2.9077453  1.94160752 2.73519833 2.94484342 3.29757829 4.31845511
 4.58617954 4.97455115 3.90427975 0.77465553 0.80167015 0.92776618
 1.38133612 1.10104384 0.58427975 0.55929019 0.54085595 0.65329854]

|                       |     mean |   variance |
|-----------------------|----------|------------|
| churches              | 0.641733 |  0.286421  |
| resorts               | 0.902443 |  0.3423    |
| beaches               | 1.58472  |  0.135725  |
| parks                 | 1.56693  |  0.0342757 |
| theatres              | 1.58269  |  0.101927  |
| museums               | 1.60205  |  0.0941038 |
| malls                 | 2.90775  |  2.5943    |
| zoo                   | 1.94161  |  0.292478  |
| restaurants           | 2.7352   |  0.134855  |
| pubs/bars             | 2.94484  |  0.146978  |
| local services        | 3.29758  |  0.6

![fig.8 Greedy K-Mean Clustering Graph with c=10](./img/8.png)


### Cluster 1

Centroid: [1.64686695 2.23038627 3.16549356 4.34967096 4.11319027 3.21042918
 2.69469242 2.1648927  2.63739628 2.68811159 2.66164521 1.58082976
 1.82866953 1.27310443 1.18397711 1.20559371 0.87783977 0.66715308
 0.62081545 0.97       1.02160229 4.80904149 2.46839771 1.7358083 ]

|                       |     mean |   variance |
|-----------------------|----------|------------|
| churches              | 1.64687  |  0.397754  |
| resorts               | 2.23039  |  0.997135  |
| beaches               | 3.16549  |  2.27284   |
| parks                 | 4.34967  |  1.19311   |
| theatres              | 4.11319  |  1.2461    |
| museums               | 3.21043  |  1.23738   |
| malls                 | 2.69469  |  0.991796  |
| zoo                   | 2.16489  |  0.393526  |
| restaurants           | 2.6374   |  1.40914   |
| pubs/bars             | 2.68811  |  1.40676   |
| local services        | 2.66165  |  2.

![fig.9 Greedy K-Mean Clustering Graph with c=11](./img/9.png)


### Cluster 1

Centroid: [1.60555076 2.20507559 2.9375162  4.49701944 4.15082073 3.31149028
 2.5612095  2.05414687 2.5938013  2.63362851 2.45019438 1.64097192
 1.69431965 1.31926566 1.21658747 1.33365011 0.81546436 0.62488121
 0.57816415 1.10220302 0.98568035 4.84781857 1.11330454 1.63330454]

|                       |     mean |   variance |
|-----------------------|----------|------------|
| churches              | 1.60555  |  0.432771  |
| resorts               | 2.20508  |  1.21121   |
| beaches               | 2.93752  |  2.43474   |
| parks                 | 4.49702  |  0.938452  |
| theatres              | 4.15082  |  1.16791   |
| museums               | 3.31149  |  1.41672   |
| malls                 | 2.56121  |  1.01292   |
| zoo                   | 2.05415  |  0.374179  |
| restaurants           | 2.5938   |  1.56562   |
| pubs/bars             | 2.63363  |  1.52183   |
| local services        | 2.45019  |  2.

![fig.10 Greedy K-Mean Clustering Graph with c=12](./img/10.png)


### Cluster 1

Centroid: [1.42975728 1.64893204 1.97004854 2.12757282 2.11213592 2.0218932
 3.49145631 2.23208738 2.70223301 2.54208738 2.61907767 2.38995146
 2.48       3.29713592 4.15296117 4.79194175 3.17936893 1.89417476
 0.6426699  0.62033981 1.15325243 1.35679612 1.32436893 1.36228155]

|                       |    mean |   variance |
|-----------------------|---------|------------|
| churches              | 1.42976 |   0.079978 |
| resorts               | 1.64893 |   0.140345 |
| beaches               | 1.97005 |   0.544552 |
| parks                 | 2.12757 |   0.438915 |
| theatres              | 2.11214 |   0.465813 |
| museums               | 2.02189 |   0.139101 |
| malls                 | 3.49146 |   2.35069  |
| zoo                   | 2.23209 |   0.32532  |
| restaurants           | 2.70223 |   0.335875 |
| pubs/bars             | 2.54209 |   0.3128   |
| local services        | 2.61908 |   0.918476 |
| 

![fig.11 Greedy K-Mean Clustering Graph with c=13](./img/11.png)


### Cluster 1

Centroid: [1.43970443 1.65384236 1.97576355 2.13014778 2.09403941 2.01778325
 3.50655172 2.22591133 2.67546798 2.50576355 2.58389163 2.3829064
 2.4555665  3.33078818 4.1679803  4.80147783 3.17394089 1.9164532
 0.64901478 0.6262069  1.16748768 1.35044335 1.33596059 1.37389163]

|                       |     mean |   variance |
|-----------------------|----------|------------|
| churches              | 1.4397   |  0.0729346 |
| resorts               | 1.65384  |  0.140694  |
| beaches               | 1.97576  |  0.549215  |
| parks                 | 2.13015  |  0.440497  |
| theatres              | 2.09404  |  0.418568  |
| museums               | 2.01778  |  0.134474  |
| malls                 | 3.50655  |  2.3691    |
| zoo                   | 2.22591  |  0.32352   |
| restaurants           | 2.67547  |  0.284394  |
| pubs/bars             | 2.50576  |  0.226397  |
| local services        | 2.58389  |  0.

![fig.12 Greedy K-Mean Clustering Graph with c=14](./img/12.png)


### Cluster 1

Centroid: [1.29772321 1.70026786 1.76004464 2.77709821 2.72848214 3.22080357
 3.91303571 3.75316964 4.87366071 4.99410714 1.36946429 1.32370536
 1.24147321 1.245625   0.92276786 1.133125   0.4115625  0.27629464
 0.34790179 1.32611607 1.15589286 1.71325893 1.33433036 1.28383929]

|                       |     mean |    variance |
|-----------------------|----------|-------------|
| churches              | 1.29772  | 0.0195612   |
| resorts               | 1.70027  | 0.953534    |
| beaches               | 1.76004  | 0.622386    |
| parks                 | 2.7771   | 1.82547     |
| theatres              | 2.72848  | 0.933841    |
| museums               | 3.2208   | 0.383577    |
| malls                 | 3.91304  | 1.14315     |
| zoo                   | 3.75317  | 1.14141     |
| restaurants           | 4.87366  | 0.383338    |
| pubs/bars             | 4.99411  | 0.000283504 |
| local services        | 

![fig.13 Greedy K-Mean Clustering Graph with c=15](./img/13.png)


### Cluster 1

Centroid: [1.29468944 4.90804348 2.65673913 2.46686335 2.59487578 3.04304348
 4.36459627 2.9607764  3.94913043 3.04090062 2.6218323  2.36326087
 1.975      3.8318323  3.13639752 0.8368323  0.71767081 0.72509317
 0.62785714 0.90012422 0.73223602 0.77136646 0.83732919 1.02652174]

|                       |     mean |   variance |
|-----------------------|----------|------------|
| churches              | 1.29469  |  1.13173   |
| resorts               | 4.90804  |  0.212739  |
| beaches               | 2.65674  |  1.52292   |
| parks                 | 2.46686  |  0.788987  |
| theatres              | 2.59488  |  0.812258  |
| museums               | 3.04304  |  1.61362   |
| malls                 | 4.3646   |  1.33171   |
| zoo                   | 2.96078  |  1.26355   |
| restaurants           | 3.94913  |  0.903108  |
| pubs/bars             | 3.0409   |  0.962435  |
| local services        | 2.62183  |  

![fig.14 Greedy K-Mean Clustering Graph with c=16](./img/14.png)


### Cluster 1

Centroid: [2.48935897 2.45429487 2.21160256 1.74923077 1.69       1.36205128
 1.31903846 1.28762821 1.50826923 1.54923077 1.5724359  0.94410256
 0.935      1.33115385 4.13955128 1.97705128 1.40826923 1.29980769
 1.63551282 2.62660256 3.68       4.02955128 3.50339744 2.8424359 ]

|                       |     mean |   variance |
|-----------------------|----------|------------|
| churches              | 2.48936  | 0.114542   |
| resorts               | 2.45429  | 0.127217   |
| beaches               | 2.2116   | 0.116069   |
| parks                 | 1.74923  | 0.155716   |
| theatres              | 1.69     | 0.287071   |
| museums               | 1.36205  | 0.152498   |
| malls                 | 1.31904  | 0.121286   |
| zoo                   | 1.28763  | 0.00588789 |
| restaurants           | 1.50827  | 0.185746   |
| pubs/bars             | 1.54923  | 0.0251117  |
| local services        | 1.57244  | 0

![fig.15 Greedy K-Mean Clustering Graph with c=17](./img/15.png)


### Cluster 1

Centroid: [0.47032172 0.75506702 1.49316354 1.51050938 1.52380697 1.55434316
 2.69546917 1.91986595 2.72892761 3.00616622 3.48613941 4.80262735
 4.79351206 4.91190349 3.58378016 0.78707775 0.89072386 1.06316354
 1.53297587 1.03155496 0.41790885 0.39364611 0.38369973 0.49892761]

|                       |     mean |   variance |
|-----------------------|----------|------------|
| churches              | 0.470322 |  0.17206   |
| resorts               | 0.755067 |  0.289187  |
| beaches               | 1.49316  |  0.0909109 |
| parks                 | 1.51051  |  0.0215919 |
| theatres              | 1.52381  |  0.075007  |
| museums               | 1.55434  |  0.0999607 |
| malls                 | 2.69547  |  2.42527   |
| zoo                   | 1.91987  |  0.298359  |
| restaurants           | 2.72893  |  0.093723  |
| pubs/bars             | 3.00617  |  0.131584  |
| local services        | 3.48614  |  

![fig.16 Greedy K-Mean Clustering Graph with c=18](./img/16.png)


### Cluster 1

Centroid: [0.35952703 0.90858108 1.47804054 1.47439189 1.61608108 1.63851351
 2.96432432 2.35378378 3.02290541 3.22959459 4.25418919 4.71337838
 4.88054054 4.28810811 0.86506757 0.92493243 1.20527027 1.36864865
 1.66709459 0.76121622 0.21932432 0.21297297 0.22959459 0.45972973]

|                       |     mean |   variance |
|-----------------------|----------|------------|
| churches              | 0.359527 |  0.161147  |
| resorts               | 0.908581 |  0.305966  |
| beaches               | 1.47804  |  0.0629179 |
| parks                 | 1.47439  |  0.0370112 |
| theatres              | 1.61608  |  0.436352  |
| museums               | 1.63851  |  0.382519  |
| malls                 | 2.96432  |  2.12267   |
| zoo                   | 2.35378  |  0.325356  |
| restaurants           | 3.02291  |  0.303225  |
| pubs/bars             | 3.22959  |  0.315747  |
| local services        | 4.25419  |  

![fig.17 Greedy K-Mean Clustering Graph with c=19](./img/17.png)


### Cluster 1

Centroid: [1.87074866 2.11101604 2.58       3.7359893  3.74229947 2.76251337
 3.26149733 2.9857754  3.7484492  3.48775401 4.36026738 1.8715508
 4.86631016 2.20101604 1.45016043 1.0426738  0.89508021 0.76695187
 0.68962567 0.67807487 0.69171123 4.38588235 3.01925134 1.70743316]

|                       |     mean |   variance |
|-----------------------|----------|------------|
| churches              | 1.87075  | 0.179885   |
| resorts               | 2.11102  | 0.0479457  |
| beaches               | 2.58     | 1.1708     |
| parks                 | 3.73599  | 2.13708    |
| theatres              | 3.7423   | 1.45783    |
| museums               | 2.76251  | 0.407037   |
| malls                 | 3.2615   | 0.181447   |
| zoo                   | 2.98578  | 0.0245923  |
| restaurants           | 3.74845  | 0.601263   |
| pubs/bars             | 3.48775  | 0.38687    |
| local services        | 4.36027  | 0.

![fig.18 Greedy K-Mean Clustering Graph with c=20](./img/18.png)


### Cluster 1

Centroid: [1.05918182 1.27939394 2.33839394 3.02290909 4.17527273 4.32984848
 4.44075758 2.44312121 2.16912121 2.14630303 1.95739394 1.7670303
 1.88006061 2.08354545 1.40778788 0.79912121 0.62142424 0.45924242
 0.56257576 0.55015152 0.56078788 0.76821212 0.89790909 0.84639394]

|                       |     mean |   variance |
|-----------------------|----------|------------|
| churches              | 1.05918  |   0.500324 |
| resorts               | 1.27939  |   0.441043 |
| beaches               | 2.33839  |   1.43157  |
| parks                 | 3.02291  |   0.910974 |
| theatres              | 4.17527  |   0.871573 |
| museums               | 4.32985  |   0.926584 |
| malls                 | 4.44076  |   1.00856  |
| zoo                   | 2.44312  |   0.950784 |
| restaurants           | 2.16912  |   0.479326 |
| pubs/bars             | 2.1463   |   0.410199 |
| local services        | 1.95739  |   

![fig.19 Greedy K-Mean Clustering Graph with c=21](./img/19.png)


### Cluster 1

Centroid: [2.09470046 2.36539171 2.07230415 1.96456221 1.92585253 1.81036866
 1.99082949 1.63129032 1.76875576 1.59589862 1.58009217 1.63640553
 2.07396313 3.00737327 1.9418894  1.62857143 2.7140553  3.48658986
 3.86718894 1.86829493 1.86612903 2.32285714 2.17373272 2.40797235]

|                       |    mean |   variance |
|-----------------------|---------|------------|
| churches              | 2.0947  |   0.129492 |
| resorts               | 2.36539 |   0.808248 |
| beaches               | 2.0723  |   0.223432 |
| parks                 | 1.96456 |   0.191998 |
| theatres              | 1.92585 |   0.308024 |
| museums               | 1.81037 |   0.116058 |
| malls                 | 1.99083 |   0.801319 |
| zoo                   | 1.63129 |   0.167549 |
| restaurants           | 1.76876 |   0.313979 |
| pubs/bars             | 1.5959  |   0.21626  |
| local services        | 1.58009 |   0.183408 |
|

![fig.20 Greedy K-Mean Clustering Graph with c=22](./img/20.png)


### Cluster 1

Centroid: [1.21834123 1.51293839 2.06559242 2.45492891 2.91720379 4.21805687
 4.85668246 2.89469194 4.57753555 2.20104265 1.61540284 1.607109
 2.6921327  3.52478673 4.8036019  0.64270142 0.62478673 0.63369668
 0.71236967 0.70383886 0.93061611 0.98004739 0.98990521 1.08729858]

|                       |     mean |   variance |
|-----------------------|----------|------------|
| churches              | 1.21834  |  0.647242  |
| resorts               | 1.51294  |  0.746303  |
| beaches               | 2.06559  |  0.56883   |
| parks                 | 2.45493  |  0.192356  |
| theatres              | 2.9172   |  0.214698  |
| museums               | 4.21806  |  1.01185   |
| malls                 | 4.85668  |  0.329321  |
| zoo                   | 2.89469  |  2.38733   |
| restaurants           | 4.57754  |  0.760824  |
| pubs/bars             | 2.20104  |  1.65371   |
| local services        | 1.6154   |  0.

![fig.21 Greedy K-Mean Clustering Graph with c=23](./img/21.png)


### Cluster 1

Centroid: [1.68230769 2.46840659 2.36978022 3.91445055 4.5782967  3.52956044
 3.66802198 2.7510989  2.64423077 2.52285714 2.50653846 3.0543956
 2.08505495 1.22175824 1.09032967 0.84505495 0.82258242 0.82076923
 0.76483516 0.73406593 0.76664835 0.94576923 4.88016484 2.77450549]

|                       |     mean |   variance |
|-----------------------|----------|------------|
| churches              | 1.68231  |  0.233746  |
| resorts               | 2.46841  |  1.10278   |
| beaches               | 2.36978  |  0.483676  |
| parks                 | 3.91445  |  1.79903   |
| theatres              | 4.5783   |  0.696371  |
| museums               | 3.52956  |  1.1958    |
| malls                 | 3.66802  |  0.919205  |
| zoo                   | 2.7511   |  0.370003  |
| restaurants           | 2.64423  |  0.411822  |
| pubs/bars             | 2.52286  |  0.392961  |
| local services        | 2.50654  |  1

![fig.22 Greedy K-Mean Clustering Graph with c=24](./img/22.png)


### Cluster 1

Centroid: [1.50466667 2.27779487 2.51015385 3.41087179 4.92430769 4.05005128
 3.28866667 2.26933333 2.00307692 1.95184615 2.0374359  2.60128205
 2.63733333 1.86502564 0.874      0.89328205 0.81353846 0.89676923
 0.96297436 0.79005128 0.71158974 0.74148718 0.79558974 0.86548718]

|                       |     mean |   variance |
|-----------------------|----------|------------|
| churches              | 1.50467  |  0.888887  |
| resorts               | 2.27779  |  0.785108  |
| beaches               | 2.51015  |  0.275559  |
| parks                 | 3.41087  |  1.67355   |
| theatres              | 4.92431  |  0.102169  |
| museums               | 4.05005  |  1.44806   |
| malls                 | 3.28867  |  2.217     |
| zoo                   | 2.26933  |  1.05238   |
| restaurants           | 2.00308  |  0.422961  |
| pubs/bars             | 1.95185  |  0.337867  |
| local services        | 2.03744  |  

![fig.23 Greedy K-Mean Clustering Graph with c=25](./img/23.png)


### Cluster 1

Centroid: [1.49781065 1.74940828 1.74591716 4.5704142  4.51278107 3.81029586
 2.33923077 2.01994083 2.0439645  2.25715976 2.38207101 2.3435503
 1.53420118 1.19502959 1.2539645  1.24177515 0.97431953 0.61650888
 0.52609467 0.49597633 0.53236686 4.77142012 0.9504142  1.47745562]

|                       |     mean |   variance |
|-----------------------|----------|------------|
| churches              | 1.49781  |  0.209417  |
| resorts               | 1.74941  |  0.357265  |
| beaches               | 1.74592  |  0.260454  |
| parks                 | 4.57041  |  0.869262  |
| theatres              | 4.51278  |  0.553686  |
| museums               | 3.8103   |  1.46693   |
| malls                 | 2.33923  |  1.15809   |
| zoo                   | 2.01994  |  0.280789  |
| restaurants           | 2.04396  |  0.31018   |
| pubs/bars             | 2.25716  |  0.388891  |
| local services        | 2.38207  |  1

In [7]:
print(tabulate(sorted(best_var_dict.items(), key=lambda x: x[1]), headers=['c', 'optimal variance'], tablefmt='github'))

|   c |   optimal variance |
|-----|--------------------|
|  25 |           0.697503 |
|  24 |           0.712905 |
|  23 |           0.714411 |
|  22 |           0.726475 |
|  21 |           0.727228 |
|  20 |           0.741221 |
|  19 |           0.753468 |
|  18 |           0.772525 |
|  17 |           0.802054 |
|  16 |           0.809744 |
|  14 |           0.816135 |
|  15 |           0.820285 |
|  13 |           0.843161 |
|  12 |           0.878095 |
|  11 |           0.886975 |
|  10 |           0.905164 |
|   9 |           0.915949 |
|   8 |           0.950119 |
|   7 |           0.967824 |
|   6 |           1.02608  |
|   5 |           1.0913   |
|   4 |           1.15933  |
|   3 |           1.27029  |
