# Main Function

In [1]:
from data_prep import load_rating_data
from cluster import kmeans_clustering
from evaluate import evaluate
from IPython.display import clear_output

In [2]:
def run_test_kmeans(n_clusters, rating_path = 'dataset_5_min_100/', item_path = 'comic_genre.csv'):

    total_mae = []

    # 10-Fold Cross-Validation
    for i in range(1, 11):
        # load dataset
        print('K-Means (k={}) | Fold = {}'.format(n_clusters, i))
        print('Loading Data...')
        train_data, cluster_data = load_rating_data('train', number=i, rating_path=rating_path, item_path=item_path)
        test_data = load_rating_data('test', number=i, rating_path=rating_path, item_path=item_path)
        print('Data Loaded...')

        # perform clustering
        print('Clustering Starts')
        ratings_cluster, cluster_centroids = kmeans_clustering(n_clusters, cluster_data, train_data)
        print('Clustering Finished')

        # predict and evaluate
        print('Predicting and Evaluating Starts')
        mae = evaluate(test_data, ratings_cluster, cluster_centroids)
        total_mae.append(mae)
        print('Predicting and Evaluating Finished')

        clear_output(wait=True)
        print('K = {} | Fold = {} | MAE = {}\n'.format(n_clusters, i, mae))


    clear_output(wait=True)
    for i in range(len(total_mae)):
        print('K = {} | Fold = {} | MAE = {}'.format(n_clusters, i+1, total_mae[i]))
    avg_mae = sum(total_mae)/10
    print('K = {} | Average MAE = {}'.format(n_clusters, avg_mae))

    # return avg_mae, total_mae

# No Clustering

In [42]:
run_test_kmeans(1)

K = 1 | Fold = 1 | MAE = 0.2711706846105243
K = 1 | Fold = 2 | MAE = 0.2436201682113505
K = 1 | Fold = 3 | MAE = 0.24793435049546214
K = 1 | Fold = 4 | MAE = 0.2478566843362659
K = 1 | Fold = 5 | MAE = 0.2171740422178029
K = 1 | Fold = 6 | MAE = 0.2522172240932635
K = 1 | Fold = 7 | MAE = 0.27439575329373034
K = 1 | Fold = 8 | MAE = 0.24072770463853455
K = 1 | Fold = 9 | MAE = 0.23272546173621997
K = 1 | Fold = 10 | MAE = 0.23472213696823346
K = 1 | Average MAE = 0.24625442106013873


(0.24625442106013873,
 [0.2711706846105243,
  0.2436201682113505,
  0.24793435049546214,
  0.2478566843362659,
  0.2171740422178029,
  0.2522172240932635,
  0.27439575329373034,
  0.24072770463853455,
  0.23272546173621997,
  0.23472213696823346])

In [43]:
run_test_kmeans(1, rating_path='dataset_5_min_75/')

K = 1 | Fold = 1 | MAE = 0.3194271715388204
K = 1 | Fold = 2 | MAE = 0.3126145112122271
K = 1 | Fold = 3 | MAE = 0.3371930533738793
K = 1 | Fold = 4 | MAE = 0.3243633678289324
K = 1 | Fold = 5 | MAE = 0.3158410096007955
K = 1 | Fold = 6 | MAE = 0.33816343440311175
K = 1 | Fold = 7 | MAE = 0.312900461434167
K = 1 | Fold = 8 | MAE = 0.3071062341684496
K = 1 | Fold = 9 | MAE = 0.3363108550447118
K = 1 | Fold = 10 | MAE = 0.3194980483575356
K = 1 | Average MAE = 0.32234181469626305


(0.32234181469626305,
 [0.3194271715388204,
  0.3126145112122271,
  0.3371930533738793,
  0.3243633678289324,
  0.3158410096007955,
  0.33816343440311175,
  0.312900461434167,
  0.3071062341684496,
  0.3363108550447118,
  0.3194980483575356])

# K-Means K=2 - K=20 DATASET_MIN_100 (RATING 1-5)

In [3]:
run_test_kmeans(2)

K = 2 | Fold = 1 | MAE = 0.2873209800770044
K = 2 | Fold = 2 | MAE = 0.26603306237109586
K = 2 | Fold = 3 | MAE = 0.26041808732284577
K = 2 | Fold = 4 | MAE = 0.26482222755403306
K = 2 | Fold = 5 | MAE = 0.23188836024476253
K = 2 | Fold = 6 | MAE = 0.27079361118627326
K = 2 | Fold = 7 | MAE = 0.28588416834960223
K = 2 | Fold = 8 | MAE = 0.25504346879431883
K = 2 | Fold = 9 | MAE = 0.2447583425032367
K = 2 | Fold = 10 | MAE = 0.2578478453190397
K = 2 | Average MAE = 0.2624810153722212


(0.2624810153722212,
 [0.2873209800770044,
  0.26603306237109586,
  0.26041808732284577,
  0.26482222755403306,
  0.23188836024476253,
  0.27079361118627326,
  0.28588416834960223,
  0.25504346879431883,
  0.2447583425032367,
  0.2578478453190397])

In [4]:
run_test_kmeans(3)

K = 3 | Fold = 1 | MAE = 0.34798853047890044
K = 3 | Fold = 2 | MAE = 0.3261538995070379
K = 3 | Fold = 3 | MAE = 0.3398124310596404
K = 3 | Fold = 4 | MAE = 0.3555459624549784
K = 3 | Fold = 5 | MAE = 0.28824506847956466
K = 3 | Fold = 6 | MAE = 0.3397255872412453
K = 3 | Fold = 7 | MAE = 0.3689580523762031
K = 3 | Fold = 8 | MAE = 0.3396811227804064
K = 3 | Fold = 9 | MAE = 0.3311430503575267
K = 3 | Fold = 10 | MAE = 0.3429137728056443
K = 3 | Average MAE = 0.3380167477541147


(0.3380167477541147,
 [0.34798853047890044,
  0.3261538995070379,
  0.3398124310596404,
  0.3555459624549784,
  0.28824506847956466,
  0.3397255872412453,
  0.3689580523762031,
  0.3396811227804064,
  0.3311430503575267,
  0.3429137728056443])

In [5]:
run_test_kmeans(4)

K = 4 | Fold = 1 | MAE = 0.35124850456736234
K = 4 | Fold = 2 | MAE = 0.32860018286011383
K = 4 | Fold = 3 | MAE = 0.33723463757191313
K = 4 | Fold = 4 | MAE = 0.36851826864285103
K = 4 | Fold = 5 | MAE = 0.3068606098713134
K = 4 | Fold = 6 | MAE = 0.34315163525731546
K = 4 | Fold = 7 | MAE = 0.36674155891349036
K = 4 | Fold = 8 | MAE = 0.35143811444863465
K = 4 | Fold = 9 | MAE = 0.3311734704809455
K = 4 | Fold = 10 | MAE = 0.33312283908742013
K = 4 | Average MAE = 0.34180898217013594


(0.34180898217013594,
 [0.35124850456736234,
  0.32860018286011383,
  0.33723463757191313,
  0.36851826864285103,
  0.3068606098713134,
  0.34315163525731546,
  0.36674155891349036,
  0.35143811444863465,
  0.3311734704809455,
  0.33312283908742013])

In [6]:
run_test_kmeans(5)

K = 5 | Fold = 1 | MAE = 0.373106092810928
K = 5 | Fold = 2 | MAE = 0.31540959214300907
K = 5 | Fold = 3 | MAE = 0.3295672859174648
K = 5 | Fold = 4 | MAE = 0.33815615854883685
K = 5 | Fold = 5 | MAE = 0.327613408741389
K = 5 | Fold = 6 | MAE = 0.362858242544526
K = 5 | Fold = 7 | MAE = 0.3618109529348461
K = 5 | Fold = 8 | MAE = 0.3592257464374811
K = 5 | Fold = 9 | MAE = 0.3361078793210253
K = 5 | Fold = 10 | MAE = 0.3266987640120026
K = 5 | Average MAE = 0.3430554123411509


(0.3430554123411509,
 [0.373106092810928,
  0.31540959214300907,
  0.3295672859174648,
  0.33815615854883685,
  0.327613408741389,
  0.362858242544526,
  0.3618109529348461,
  0.3592257464374811,
  0.3361078793210253,
  0.3266987640120026])

In [7]:
run_test_kmeans(6)

K = 6 | Fold = 1 | MAE = 0.37056769646836396
K = 6 | Fold = 2 | MAE = 0.35956780261186
K = 6 | Fold = 3 | MAE = 0.33887053997064587
K = 6 | Fold = 4 | MAE = 0.3778011137691808
K = 6 | Fold = 5 | MAE = 0.3239997705113885
K = 6 | Fold = 6 | MAE = 0.3754160262233176
K = 6 | Fold = 7 | MAE = 0.3716064851751875
K = 6 | Fold = 8 | MAE = 0.37355213976148816
K = 6 | Fold = 9 | MAE = 0.33545379026754585
K = 6 | Fold = 10 | MAE = 0.3222689363394727
K = 6 | Average MAE = 0.35491043010984513


(0.35491043010984513,
 [0.37056769646836396,
  0.35956780261186,
  0.33887053997064587,
  0.3778011137691808,
  0.3239997705113885,
  0.3754160262233176,
  0.3716064851751875,
  0.37355213976148816,
  0.33545379026754585,
  0.3222689363394727])

In [8]:
run_test_kmeans(7)

K = 7 | Fold = 1 | MAE = 0.3817285260529814
K = 7 | Fold = 2 | MAE = 0.34355178929139096
K = 7 | Fold = 3 | MAE = 0.3569651834027499
K = 7 | Fold = 4 | MAE = 0.3689909837133892
K = 7 | Fold = 5 | MAE = 0.352881391464582
K = 7 | Fold = 6 | MAE = 0.39742347647130566
K = 7 | Fold = 7 | MAE = 0.3751022622017403
K = 7 | Fold = 8 | MAE = 0.389095620425185
K = 7 | Fold = 9 | MAE = 0.36800725158122294
K = 7 | Fold = 10 | MAE = 0.3487383069533741
K = 7 | Average MAE = 0.3682484791557922


(0.3682484791557922,
 [0.3817285260529814,
  0.34355178929139096,
  0.3569651834027499,
  0.3689909837133892,
  0.352881391464582,
  0.39742347647130566,
  0.3751022622017403,
  0.389095620425185,
  0.36800725158122294,
  0.3487383069533741])

In [9]:
run_test_kmeans(8)

K = 8 | Fold = 1 | MAE = 0.39924381063749126
K = 8 | Fold = 2 | MAE = 0.3805642937103147
K = 8 | Fold = 3 | MAE = 0.3313140055791499
K = 8 | Fold = 4 | MAE = 0.39428315016468723
K = 8 | Fold = 5 | MAE = 0.35935163013305466
K = 8 | Fold = 6 | MAE = 0.39527491156407746
K = 8 | Fold = 7 | MAE = 0.41043995957874835
K = 8 | Fold = 8 | MAE = 0.3992856835977131
K = 8 | Fold = 9 | MAE = 0.35478448197184487
K = 8 | Fold = 10 | MAE = 0.3839715919455477
K = 8 | Average MAE = 0.3808513518882629


(0.3808513518882629,
 [0.39924381063749126,
  0.3805642937103147,
  0.3313140055791499,
  0.39428315016468723,
  0.35935163013305466,
  0.39527491156407746,
  0.41043995957874835,
  0.3992856835977131,
  0.35478448197184487,
  0.3839715919455477])

In [10]:
run_test_kmeans(9)

K = 9 | Fold = 1 | MAE = 0.3954976607217739
K = 9 | Fold = 2 | MAE = 0.4010419116813777
K = 9 | Fold = 3 | MAE = 0.39290648776018366
K = 9 | Fold = 4 | MAE = 0.39347073482836215
K = 9 | Fold = 5 | MAE = 0.34570866617864704
K = 9 | Fold = 6 | MAE = 0.3923353580235377
K = 9 | Fold = 7 | MAE = 0.3880160705549012
K = 9 | Fold = 8 | MAE = 0.39594168025173393
K = 9 | Fold = 9 | MAE = 0.3992902015362486
K = 9 | Fold = 10 | MAE = 0.37448871013264523
K = 9 | Average MAE = 0.38786974816694114


(0.38786974816694114,
 [0.3954976607217739,
  0.4010419116813777,
  0.39290648776018366,
  0.39347073482836215,
  0.34570866617864704,
  0.3923353580235377,
  0.3880160705549012,
  0.39594168025173393,
  0.3992902015362486,
  0.37448871013264523])

In [11]:
run_test_kmeans(10)

K = 10 | Fold = 1 | MAE = 0.407175797183838
K = 10 | Fold = 2 | MAE = 0.403881499689244
K = 10 | Fold = 3 | MAE = 0.3685962865654824
K = 10 | Fold = 4 | MAE = 0.39382547072549196
K = 10 | Fold = 5 | MAE = 0.3895603609598704
K = 10 | Fold = 6 | MAE = 0.39631788424504905
K = 10 | Fold = 7 | MAE = 0.4328462828448401
K = 10 | Fold = 8 | MAE = 0.3905902982106709
K = 10 | Fold = 9 | MAE = 0.3888046526090611
K = 10 | Fold = 10 | MAE = 0.40802279747526415
K = 10 | Average MAE = 0.39796213305088124


(0.39796213305088124,
 [0.407175797183838,
  0.403881499689244,
  0.3685962865654824,
  0.39382547072549196,
  0.3895603609598704,
  0.39631788424504905,
  0.4328462828448401,
  0.3905902982106709,
  0.3888046526090611,
  0.40802279747526415])

In [12]:
run_test_kmeans(11)

K = 11 | Fold = 1 | MAE = 0.40250669188677723
K = 11 | Fold = 2 | MAE = 0.4047654328409819
K = 11 | Fold = 3 | MAE = 0.3838743285009442
K = 11 | Fold = 4 | MAE = 0.3947554374606811
K = 11 | Fold = 5 | MAE = 0.38295878390453447
K = 11 | Fold = 6 | MAE = 0.4062246766528812
K = 11 | Fold = 7 | MAE = 0.4314209664994534
K = 11 | Fold = 8 | MAE = 0.4030182218572983
K = 11 | Fold = 9 | MAE = 0.39394690176237607
K = 11 | Fold = 10 | MAE = 0.41789523022099345
K = 11 | Average MAE = 0.40213666715869206


(0.40213666715869206,
 [0.40250669188677723,
  0.4047654328409819,
  0.3838743285009442,
  0.3947554374606811,
  0.38295878390453447,
  0.4062246766528812,
  0.4314209664994534,
  0.4030182218572983,
  0.39394690176237607,
  0.41789523022099345])

In [13]:
run_test_kmeans(12)

K = 12 | Fold = 1 | MAE = 0.4379607206793346
K = 12 | Fold = 2 | MAE = 0.40581625670751315
K = 12 | Fold = 3 | MAE = 0.37862380689475783
K = 12 | Fold = 4 | MAE = 0.4434536198224261
K = 12 | Fold = 5 | MAE = 0.3870897937785764
K = 12 | Fold = 6 | MAE = 0.4362092754891861
K = 12 | Fold = 7 | MAE = 0.4250397059482612
K = 12 | Fold = 8 | MAE = 0.4262563958128036
K = 12 | Fold = 9 | MAE = 0.4087976645619142
K = 12 | Fold = 10 | MAE = 0.4354766866317032
K = 12 | Average MAE = 0.4184723926326477


(0.4184723926326477,
 [0.4379607206793346,
  0.40581625670751315,
  0.37862380689475783,
  0.4434536198224261,
  0.3870897937785764,
  0.4362092754891861,
  0.4250397059482612,
  0.4262563958128036,
  0.4087976645619142,
  0.4354766866317032])

In [14]:
run_test_kmeans(13)

K = 13 | Fold = 1 | MAE = 0.42911975996678486
K = 13 | Fold = 2 | MAE = 0.39731372773513396
K = 13 | Fold = 3 | MAE = 0.3926160525206323
K = 13 | Fold = 4 | MAE = 0.39051223077219127
K = 13 | Fold = 5 | MAE = 0.3915553785922829
K = 13 | Fold = 6 | MAE = 0.41158545352205755
K = 13 | Fold = 7 | MAE = 0.45887769532997175
K = 13 | Fold = 8 | MAE = 0.4185478604414493
K = 13 | Fold = 9 | MAE = 0.43481918141767595
K = 13 | Fold = 10 | MAE = 0.4201524914918192
K = 13 | Average MAE = 0.41450998317899995


(0.41450998317899995,
 [0.42911975996678486,
  0.39731372773513396,
  0.3926160525206323,
  0.39051223077219127,
  0.3915553785922829,
  0.41158545352205755,
  0.45887769532997175,
  0.4185478604414493,
  0.43481918141767595,
  0.4201524914918192])

In [15]:
run_test_kmeans(14)

K = 14 | Fold = 1 | MAE = 0.4270470512051668
K = 14 | Fold = 2 | MAE = 0.4139786589151332
K = 14 | Fold = 3 | MAE = 0.4132416201997493
K = 14 | Fold = 4 | MAE = 0.46062227387982146
K = 14 | Fold = 5 | MAE = 0.4196024107274679
K = 14 | Fold = 6 | MAE = 0.4310329603872896
K = 14 | Fold = 7 | MAE = 0.42761675673605815
K = 14 | Fold = 8 | MAE = 0.45063462416513533
K = 14 | Fold = 9 | MAE = 0.4184644625596841
K = 14 | Fold = 10 | MAE = 0.3967682324585508
K = 14 | Average MAE = 0.42590090512340567


(0.42590090512340567,
 [0.4270470512051668,
  0.4139786589151332,
  0.4132416201997493,
  0.46062227387982146,
  0.4196024107274679,
  0.4310329603872896,
  0.42761675673605815,
  0.45063462416513533,
  0.4184644625596841,
  0.3967682324585508])

In [16]:
run_test_kmeans(15)

K = 15 | Fold = 1 | MAE = 0.4732320878028201
K = 15 | Fold = 2 | MAE = 0.44833565059658037
K = 15 | Fold = 3 | MAE = 0.4167822144243299
K = 15 | Fold = 4 | MAE = 0.41467910741794073
K = 15 | Fold = 5 | MAE = 0.38105250519353545
K = 15 | Fold = 6 | MAE = 0.44748959634915586
K = 15 | Fold = 7 | MAE = 0.44802559466849856
K = 15 | Fold = 8 | MAE = 0.4188101128956369
K = 15 | Fold = 9 | MAE = 0.41773873508372095
K = 15 | Fold = 10 | MAE = 0.4334454959558419
K = 15 | Average MAE = 0.4299591100388061


(0.4299591100388061,
 [0.4732320878028201,
  0.44833565059658037,
  0.4167822144243299,
  0.41467910741794073,
  0.38105250519353545,
  0.44748959634915586,
  0.44802559466849856,
  0.4188101128956369,
  0.41773873508372095,
  0.4334454959558419])

In [17]:
run_test_kmeans(16)


K = 16 | Fold = 1 | MAE = 0.4361731949300879
K = 16 | Fold = 2 | MAE = 0.4160336989795681
K = 16 | Fold = 3 | MAE = 0.44120302597172467
K = 16 | Fold = 4 | MAE = 0.4429255377047962
K = 16 | Fold = 5 | MAE = 0.39436144875750573
K = 16 | Fold = 6 | MAE = 0.4553554839872888
K = 16 | Fold = 7 | MAE = 0.46633295743062403
K = 16 | Fold = 8 | MAE = 0.4412968159849209
K = 16 | Fold = 9 | MAE = 0.4227127803518394
K = 16 | Fold = 10 | MAE = 0.4129061969631709
K = 16 | Average MAE = 0.4329301141061527


(0.4329301141061527,
 [0.4361731949300879,
  0.4160336989795681,
  0.44120302597172467,
  0.4429255377047962,
  0.39436144875750573,
  0.4553554839872888,
  0.46633295743062403,
  0.4412968159849209,
  0.4227127803518394,
  0.4129061969631709])

In [18]:
run_test_kmeans(17)


K = 17 | Fold = 1 | MAE = 0.4334235655703928
K = 17 | Fold = 2 | MAE = 0.4483502268281737
K = 17 | Fold = 3 | MAE = 0.42750797015500747
K = 17 | Fold = 4 | MAE = 0.40966544795183235
K = 17 | Fold = 5 | MAE = 0.39173956793596393
K = 17 | Fold = 6 | MAE = 0.4540756966421831
K = 17 | Fold = 7 | MAE = 0.47871525569867446
K = 17 | Fold = 8 | MAE = 0.42173892223820425
K = 17 | Fold = 9 | MAE = 0.4115043690628026
K = 17 | Fold = 10 | MAE = 0.41782332874202543
K = 17 | Average MAE = 0.42945443508252606


(0.42945443508252606,
 [0.4334235655703928,
  0.4483502268281737,
  0.42750797015500747,
  0.40966544795183235,
  0.39173956793596393,
  0.4540756966421831,
  0.47871525569867446,
  0.42173892223820425,
  0.4115043690628026,
  0.41782332874202543])

In [19]:
run_test_kmeans(18)


K = 18 | Fold = 1 | MAE = 0.4670191285276326
K = 18 | Fold = 2 | MAE = 0.41609657878598943
K = 18 | Fold = 3 | MAE = 0.41173631717988374
K = 18 | Fold = 4 | MAE = 0.44690849825172985
K = 18 | Fold = 5 | MAE = 0.36076615848967986
K = 18 | Fold = 6 | MAE = 0.4469310443594707
K = 18 | Fold = 7 | MAE = 0.4635907185406279
K = 18 | Fold = 8 | MAE = 0.4389204291023312
K = 18 | Fold = 9 | MAE = 0.42308124824241866
K = 18 | Fold = 10 | MAE = 0.40557423916930374
K = 18 | Average MAE = 0.42806243606490674


(0.42806243606490674,
 [0.4670191285276326,
  0.41609657878598943,
  0.41173631717988374,
  0.44690849825172985,
  0.36076615848967986,
  0.4469310443594707,
  0.4635907185406279,
  0.4389204291023312,
  0.42308124824241866,
  0.40557423916930374])

In [20]:
run_test_kmeans(19)


K = 19 | Fold = 1 | MAE = 0.4641285405634272
K = 19 | Fold = 2 | MAE = 0.42563861072071596
K = 19 | Fold = 3 | MAE = 0.4349678215021687
K = 19 | Fold = 4 | MAE = 0.48346914867745755
K = 19 | Fold = 5 | MAE = 0.4115618053340786
K = 19 | Fold = 6 | MAE = 0.4378048203671502
K = 19 | Fold = 7 | MAE = 0.4780087428622007
K = 19 | Fold = 8 | MAE = 0.46227703031762124
K = 19 | Fold = 9 | MAE = 0.43548014125335427
K = 19 | Fold = 10 | MAE = 0.430748445368754
K = 19 | Average MAE = 0.44640851069669274


(0.44640851069669274,
 [0.4641285405634272,
  0.42563861072071596,
  0.4349678215021687,
  0.48346914867745755,
  0.4115618053340786,
  0.4378048203671502,
  0.4780087428622007,
  0.46227703031762124,
  0.43548014125335427,
  0.430748445368754])

In [21]:
run_test_kmeans(20)


K = 20 | Fold = 1 | MAE = 0.48130803673290307
K = 20 | Fold = 2 | MAE = 0.4553798366679371
K = 20 | Fold = 3 | MAE = 0.455965572096049
K = 20 | Fold = 4 | MAE = 0.4373405629746447
K = 20 | Fold = 5 | MAE = 0.42163183364989404
K = 20 | Fold = 6 | MAE = 0.47401105435495444
K = 20 | Fold = 7 | MAE = 0.4696158967423548
K = 20 | Fold = 8 | MAE = 0.4411986843432948
K = 20 | Fold = 9 | MAE = 0.44910704356209413
K = 20 | Fold = 10 | MAE = 0.441307325479819
K = 20 | Average MAE = 0.4526865846603945


(0.4526865846603945,
 [0.48130803673290307,
  0.4553798366679371,
  0.455965572096049,
  0.4373405629746447,
  0.42163183364989404,
  0.47401105435495444,
  0.4696158967423548,
  0.4411986843432948,
  0.44910704356209413,
  0.441307325479819])

# K-MEANS K=2 - K=20 DATASET_MIN_75 (RATING 1-5)

In [22]:
run_test_kmeans(2, rating_path='dataset_5_min_75/')

K = 2 | Fold = 1 | MAE = 0.4396851364321037
K = 2 | Fold = 2 | MAE = 0.4292427148803832
K = 2 | Fold = 3 | MAE = 0.43530153262090104
K = 2 | Fold = 4 | MAE = 0.4487263132897809
K = 2 | Fold = 5 | MAE = 0.41364654574544085
K = 2 | Fold = 6 | MAE = 0.44086966701277336
K = 2 | Fold = 7 | MAE = 0.42402515724817086
K = 2 | Fold = 8 | MAE = 0.42074765876685066
K = 2 | Fold = 9 | MAE = 0.43527756463204054
K = 2 | Fold = 10 | MAE = 0.4329875509880375
K = 2 | Average MAE = 0.4320509841616483


(0.4320509841616483,
 [0.4396851364321037,
  0.4292427148803832,
  0.43530153262090104,
  0.4487263132897809,
  0.41364654574544085,
  0.44086966701277336,
  0.42402515724817086,
  0.42074765876685066,
  0.43527756463204054,
  0.4329875509880375])

In [23]:
run_test_kmeans(3, rating_path='dataset_5_min_75/')

K = 3 | Fold = 1 | MAE = 0.4381391970398593
K = 3 | Fold = 2 | MAE = 0.4357269918591443
K = 3 | Fold = 3 | MAE = 0.4386880572379504
K = 3 | Fold = 4 | MAE = 0.4533323039763089
K = 3 | Fold = 5 | MAE = 0.4305074514228763
K = 3 | Fold = 6 | MAE = 0.4459978177964431
K = 3 | Fold = 7 | MAE = 0.43597314436989953
K = 3 | Fold = 8 | MAE = 0.40721362639578323
K = 3 | Fold = 9 | MAE = 0.4393386661188274
K = 3 | Fold = 10 | MAE = 0.4339471127193858
K = 3 | Average MAE = 0.4358864368936478


(0.4358864368936478,
 [0.4381391970398593,
  0.4357269918591443,
  0.4386880572379504,
  0.4533323039763089,
  0.4305074514228763,
  0.4459978177964431,
  0.43597314436989953,
  0.40721362639578323,
  0.4393386661188274,
  0.4339471127193858])

In [24]:
run_test_kmeans(4, rating_path='dataset_5_min_75/')

K = 4 | Fold = 1 | MAE = 0.4400598128284627
K = 4 | Fold = 2 | MAE = 0.4464362634318924
K = 4 | Fold = 3 | MAE = 0.44011512379750867
K = 4 | Fold = 4 | MAE = 0.44689046192439286
K = 4 | Fold = 5 | MAE = 0.4287115642594075
K = 4 | Fold = 6 | MAE = 0.4521033368352809
K = 4 | Fold = 7 | MAE = 0.43255691049819006
K = 4 | Fold = 8 | MAE = 0.41247715431551635
K = 4 | Fold = 9 | MAE = 0.4413018961475906
K = 4 | Fold = 10 | MAE = 0.44546030782372853
K = 4 | Average MAE = 0.438611283186197


(0.438611283186197,
 [0.4400598128284627,
  0.4464362634318924,
  0.44011512379750867,
  0.44689046192439286,
  0.4287115642594075,
  0.4521033368352809,
  0.43255691049819006,
  0.41247715431551635,
  0.4413018961475906,
  0.44546030782372853])

In [25]:
run_test_kmeans(5, rating_path='dataset_5_min_75/')

K = 5 | Fold = 1 | MAE = 0.4434084196868154
K = 5 | Fold = 2 | MAE = 0.45084238794522086
K = 5 | Fold = 3 | MAE = 0.44248844759652733
K = 5 | Fold = 4 | MAE = 0.4779166596523829
K = 5 | Fold = 5 | MAE = 0.455101006661046
K = 5 | Fold = 6 | MAE = 0.47242538689755764
K = 5 | Fold = 7 | MAE = 0.45969884762690516
K = 5 | Fold = 8 | MAE = 0.4422462425242046
K = 5 | Fold = 9 | MAE = 0.4346146081066997
K = 5 | Fold = 10 | MAE = 0.4515672932159219
K = 5 | Average MAE = 0.4530309299913281


(0.4530309299913281,
 [0.4434084196868154,
  0.45084238794522086,
  0.44248844759652733,
  0.4779166596523829,
  0.455101006661046,
  0.47242538689755764,
  0.45969884762690516,
  0.4422462425242046,
  0.4346146081066997,
  0.4515672932159219])

In [26]:
run_test_kmeans(6  , rating_path='dataset_5_min_75/')

K = 6 | Fold = 1 | MAE = 0.4612213162917317
K = 6 | Fold = 2 | MAE = 0.4462733419602551
K = 6 | Fold = 3 | MAE = 0.47842446852139997
K = 6 | Fold = 4 | MAE = 0.47103588338739966
K = 6 | Fold = 5 | MAE = 0.466378723592095
K = 6 | Fold = 6 | MAE = 0.4816758650245841
K = 6 | Fold = 7 | MAE = 0.47136731128680964
K = 6 | Fold = 8 | MAE = 0.45884262689644884
K = 6 | Fold = 9 | MAE = 0.49153474327155655
K = 6 | Fold = 10 | MAE = 0.461252606555281
K = 6 | Average MAE = 0.46880068867875613


(0.46880068867875613,
 [0.4612213162917317,
  0.4462733419602551,
  0.47842446852139997,
  0.47103588338739966,
  0.466378723592095,
  0.4816758650245841,
  0.47136731128680964,
  0.45884262689644884,
  0.49153474327155655,
  0.461252606555281])

In [27]:
run_test_kmeans(7  , rating_path='dataset_5_min_75/')

K = 7 | Fold = 1 | MAE = 0.488126839814199
K = 7 | Fold = 2 | MAE = 0.4505681710043391
K = 7 | Fold = 3 | MAE = 0.4943855913056742
K = 7 | Fold = 4 | MAE = 0.48478848015765974
K = 7 | Fold = 5 | MAE = 0.4780917461007546
K = 7 | Fold = 6 | MAE = 0.4867321794662039
K = 7 | Fold = 7 | MAE = 0.4783612451771053
K = 7 | Fold = 8 | MAE = 0.45812472678626043
K = 7 | Fold = 9 | MAE = 0.4884610076860928
K = 7 | Fold = 10 | MAE = 0.4701197463127728
K = 7 | Average MAE = 0.4777759733811062


(0.4777759733811062,
 [0.488126839814199,
  0.4505681710043391,
  0.4943855913056742,
  0.48478848015765974,
  0.4780917461007546,
  0.4867321794662039,
  0.4783612451771053,
  0.45812472678626043,
  0.4884610076860928,
  0.4701197463127728])

In [28]:
run_test_kmeans(8  , rating_path='dataset_5_min_75/')

K = 8 | Fold = 1 | MAE = 0.4918597070621065
K = 8 | Fold = 2 | MAE = 0.4684236119395122
K = 8 | Fold = 3 | MAE = 0.5158036703651998
K = 8 | Fold = 4 | MAE = 0.5152044932080644
K = 8 | Fold = 5 | MAE = 0.47005802089748644
K = 8 | Fold = 6 | MAE = 0.49518558632185194
K = 8 | Fold = 7 | MAE = 0.4827791773283008
K = 8 | Fold = 8 | MAE = 0.4572252090765064
K = 8 | Fold = 9 | MAE = 0.5194987979580723
K = 8 | Fold = 10 | MAE = 0.47116028047635333
K = 8 | Average MAE = 0.48871985546334545


(0.48871985546334545,
 [0.4918597070621065,
  0.4684236119395122,
  0.5158036703651998,
  0.5152044932080644,
  0.47005802089748644,
  0.49518558632185194,
  0.4827791773283008,
  0.4572252090765064,
  0.5194987979580723,
  0.47116028047635333])

In [29]:
run_test_kmeans(9  , rating_path='dataset_5_min_75/')

K = 9 | Fold = 1 | MAE = 0.518024169216164
K = 9 | Fold = 2 | MAE = 0.4964958578029401
K = 9 | Fold = 3 | MAE = 0.530408856044543
K = 9 | Fold = 4 | MAE = 0.5065185958570884
K = 9 | Fold = 5 | MAE = 0.5015141571741563
K = 9 | Fold = 6 | MAE = 0.5415018262863482
K = 9 | Fold = 7 | MAE = 0.4867985777717323
K = 9 | Fold = 8 | MAE = 0.48434165294867676
K = 9 | Fold = 9 | MAE = 0.4774559607404197
K = 9 | Fold = 10 | MAE = 0.49506026982379747
K = 9 | Average MAE = 0.5038119923665867


(0.5038119923665867,
 [0.518024169216164,
  0.4964958578029401,
  0.530408856044543,
  0.5065185958570884,
  0.5015141571741563,
  0.5415018262863482,
  0.4867985777717323,
  0.48434165294867676,
  0.4774559607404197,
  0.49506026982379747])

In [30]:
run_test_kmeans(10  , rating_path='dataset_5_min_75/')

K = 10 | Fold = 1 | MAE = 0.5069768022577482
K = 10 | Fold = 2 | MAE = 0.4889491002909593
K = 10 | Fold = 3 | MAE = 0.5338154395572631
K = 10 | Fold = 4 | MAE = 0.5081040502140202
K = 10 | Fold = 5 | MAE = 0.459713499734259
K = 10 | Fold = 6 | MAE = 0.5300336332661921
K = 10 | Fold = 7 | MAE = 0.5239399026581644
K = 10 | Fold = 8 | MAE = 0.5059106253357734
K = 10 | Fold = 9 | MAE = 0.5270579109055988
K = 10 | Fold = 10 | MAE = 0.4936685839717453
K = 10 | Average MAE = 0.5078169548191724


(0.5078169548191724,
 [0.5069768022577482,
  0.4889491002909593,
  0.5338154395572631,
  0.5081040502140202,
  0.459713499734259,
  0.5300336332661921,
  0.5239399026581644,
  0.5059106253357734,
  0.5270579109055988,
  0.4936685839717453])

In [31]:
run_test_kmeans(11  , rating_path='dataset_5_min_75/')

K = 11 | Fold = 1 | MAE = 0.5294658404805365
K = 11 | Fold = 2 | MAE = 0.47923647361391514
K = 11 | Fold = 3 | MAE = 0.5266182601251199
K = 11 | Fold = 4 | MAE = 0.52956172683791
K = 11 | Fold = 5 | MAE = 0.49338260423469193
K = 11 | Fold = 6 | MAE = 0.5359405386404994
K = 11 | Fold = 7 | MAE = 0.5138672658446524
K = 11 | Fold = 8 | MAE = 0.4994054654993989
K = 11 | Fold = 9 | MAE = 0.519305419607184
K = 11 | Fold = 10 | MAE = 0.5178744088775965
K = 11 | Average MAE = 0.5144658003761504


(0.5144658003761504,
 [0.5294658404805365,
  0.47923647361391514,
  0.5266182601251199,
  0.52956172683791,
  0.49338260423469193,
  0.5359405386404994,
  0.5138672658446524,
  0.4994054654993989,
  0.519305419607184,
  0.5178744088775965])

In [32]:
run_test_kmeans(12  , rating_path='dataset_5_min_75/')

K = 12 | Fold = 1 | MAE = 0.517862370415351
K = 12 | Fold = 2 | MAE = 0.530065071019886
K = 12 | Fold = 3 | MAE = 0.5257243320674492
K = 12 | Fold = 4 | MAE = 0.550115640201473
K = 12 | Fold = 5 | MAE = 0.5143554170489746
K = 12 | Fold = 6 | MAE = 0.5491322689365143
K = 12 | Fold = 7 | MAE = 0.5325366025345745
K = 12 | Fold = 8 | MAE = 0.5079669346983278
K = 12 | Fold = 9 | MAE = 0.5116740778888442
K = 12 | Fold = 10 | MAE = 0.5165805991664782
K = 12 | Average MAE = 0.5256013313977872


(0.5256013313977872,
 [0.517862370415351,
  0.530065071019886,
  0.5257243320674492,
  0.550115640201473,
  0.5143554170489746,
  0.5491322689365143,
  0.5325366025345745,
  0.5079669346983278,
  0.5116740778888442,
  0.5165805991664782])

In [33]:
run_test_kmeans(13  , rating_path='dataset_5_min_75/')

K = 13 | Fold = 1 | MAE = 0.5247143767996653
K = 13 | Fold = 2 | MAE = 0.5007951680645956
K = 13 | Fold = 3 | MAE = 0.546449247182198
K = 13 | Fold = 4 | MAE = 0.5245940877860118
K = 13 | Fold = 5 | MAE = 0.5109370236907443
K = 13 | Fold = 6 | MAE = 0.5646958256092788
K = 13 | Fold = 7 | MAE = 0.5141276992994482
K = 13 | Fold = 8 | MAE = 0.5315710612191842
K = 13 | Fold = 9 | MAE = 0.5237816958554998
K = 13 | Fold = 10 | MAE = 0.5058047449920725
K = 13 | Average MAE = 0.5247470930498699


(0.5247470930498699,
 [0.5247143767996653,
  0.5007951680645956,
  0.546449247182198,
  0.5245940877860118,
  0.5109370236907443,
  0.5646958256092788,
  0.5141276992994482,
  0.5315710612191842,
  0.5237816958554998,
  0.5058047449920725])

In [34]:
run_test_kmeans(14  , rating_path='dataset_5_min_75/')

K = 14 | Fold = 1 | MAE = 0.5483064855624753
K = 14 | Fold = 2 | MAE = 0.5225449595533048
K = 14 | Fold = 3 | MAE = 0.548298542582218
K = 14 | Fold = 4 | MAE = 0.5410259587831767
K = 14 | Fold = 5 | MAE = 0.5321012647211115
K = 14 | Fold = 6 | MAE = 0.5390129380856704
K = 14 | Fold = 7 | MAE = 0.5370195459742159
K = 14 | Fold = 8 | MAE = 0.5465749206736471
K = 14 | Fold = 9 | MAE = 0.5379188579858523
K = 14 | Fold = 10 | MAE = 0.5530471818385472
K = 14 | Average MAE = 0.5405850655760219


(0.5405850655760219,
 [0.5483064855624753,
  0.5225449595533048,
  0.548298542582218,
  0.5410259587831767,
  0.5321012647211115,
  0.5390129380856704,
  0.5370195459742159,
  0.5465749206736471,
  0.5379188579858523,
  0.5530471818385472])

In [35]:
run_test_kmeans(15  , rating_path='dataset_5_min_75/')

K = 15 | Fold = 1 | MAE = 0.5230441147463418
K = 15 | Fold = 2 | MAE = 0.5379234105146571
K = 15 | Fold = 3 | MAE = 0.5403629611769499
K = 15 | Fold = 4 | MAE = 0.5384754119593412
K = 15 | Fold = 5 | MAE = 0.5296092962637492
K = 15 | Fold = 6 | MAE = 0.5409685261882339
K = 15 | Fold = 7 | MAE = 0.5334269614765749
K = 15 | Fold = 8 | MAE = 0.5048830052472344
K = 15 | Fold = 9 | MAE = 0.5335247520796743
K = 15 | Fold = 10 | MAE = 0.513731180309726
K = 15 | Average MAE = 0.5295949619962482


(0.5295949619962482,
 [0.5230441147463418,
  0.5379234105146571,
  0.5403629611769499,
  0.5384754119593412,
  0.5296092962637492,
  0.5409685261882339,
  0.5334269614765749,
  0.5048830052472344,
  0.5335247520796743,
  0.513731180309726])

In [36]:
run_test_kmeans(16  , rating_path='dataset_5_min_75/')

K = 16 | Fold = 1 | MAE = 0.5392714388971228
K = 16 | Fold = 2 | MAE = 0.5256242530774029
K = 16 | Fold = 3 | MAE = 0.5535114951785042
K = 16 | Fold = 4 | MAE = 0.532863798543893
K = 16 | Fold = 5 | MAE = 0.5318914416281016
K = 16 | Fold = 6 | MAE = 0.5535022334377493
K = 16 | Fold = 7 | MAE = 0.5417899656891763
K = 16 | Fold = 8 | MAE = 0.5277425925394343
K = 16 | Fold = 9 | MAE = 0.5550180303994802
K = 16 | Fold = 10 | MAE = 0.5291043316089961
K = 16 | Average MAE = 0.5390319580999859


(0.5390319580999859,
 [0.5392714388971228,
  0.5256242530774029,
  0.5535114951785042,
  0.532863798543893,
  0.5318914416281016,
  0.5535022334377493,
  0.5417899656891763,
  0.5277425925394343,
  0.5550180303994802,
  0.5291043316089961])

In [37]:
run_test_kmeans(17  , rating_path='dataset_5_min_75/')

K = 17 | Fold = 1 | MAE = 0.5527316131125778
K = 17 | Fold = 2 | MAE = 0.5394824381543252
K = 17 | Fold = 3 | MAE = 0.5627157923962955
K = 17 | Fold = 4 | MAE = 0.5653650254170405
K = 17 | Fold = 5 | MAE = 0.5235132813790987
K = 17 | Fold = 6 | MAE = 0.5643734982302033
K = 17 | Fold = 7 | MAE = 0.5564219214824719
K = 17 | Fold = 8 | MAE = 0.5258778172871961
K = 17 | Fold = 9 | MAE = 0.5608451538327273
K = 17 | Fold = 10 | MAE = 0.5178669221545517
K = 17 | Average MAE = 0.5469193463446488


(0.5469193463446488,
 [0.5527316131125778,
  0.5394824381543252,
  0.5627157923962955,
  0.5653650254170405,
  0.5235132813790987,
  0.5643734982302033,
  0.5564219214824719,
  0.5258778172871961,
  0.5608451538327273,
  0.5178669221545517])

In [38]:
run_test_kmeans(18  , rating_path='dataset_5_min_75/')

K = 18 | Fold = 1 | MAE = 0.5568388955400155
K = 18 | Fold = 2 | MAE = 0.5475718553809551
K = 18 | Fold = 3 | MAE = 0.5530702872406094
K = 18 | Fold = 4 | MAE = 0.5530802288482657
K = 18 | Fold = 5 | MAE = 0.5454080209234478
K = 18 | Fold = 6 | MAE = 0.5561041653388764
K = 18 | Fold = 7 | MAE = 0.5572444183281089
K = 18 | Fold = 8 | MAE = 0.5482444279324776
K = 18 | Fold = 9 | MAE = 0.5597296254508153
K = 18 | Fold = 10 | MAE = 0.543344849581046
K = 18 | Average MAE = 0.5520636774564618


(0.5520636774564618,
 [0.5568388955400155,
  0.5475718553809551,
  0.5530702872406094,
  0.5530802288482657,
  0.5454080209234478,
  0.5561041653388764,
  0.5572444183281089,
  0.5482444279324776,
  0.5597296254508153,
  0.543344849581046])

In [39]:
run_test_kmeans(19  , rating_path='dataset_5_min_75/')

K = 19 | Fold = 1 | MAE = 0.5369402956243594
K = 19 | Fold = 2 | MAE = 0.5295697752152191
K = 19 | Fold = 3 | MAE = 0.5695092936982309
K = 19 | Fold = 4 | MAE = 0.5575303610132736
K = 19 | Fold = 5 | MAE = 0.5291421369950762
K = 19 | Fold = 6 | MAE = 0.5858301898133796
K = 19 | Fold = 7 | MAE = 0.5580769555729703
K = 19 | Fold = 8 | MAE = 0.5639878497057698
K = 19 | Fold = 9 | MAE = 0.5552993210403772
K = 19 | Fold = 10 | MAE = 0.5194006127323831
K = 19 | Average MAE = 0.5505286791411039


(0.5505286791411039,
 [0.5369402956243594,
  0.5295697752152191,
  0.5695092936982309,
  0.5575303610132736,
  0.5291421369950762,
  0.5858301898133796,
  0.5580769555729703,
  0.5639878497057698,
  0.5552993210403772,
  0.5194006127323831])

In [40]:
run_test_kmeans(20  , rating_path='dataset_5_min_75/')

K = 20 | Fold = 1 | MAE = 0.5421944564887871
K = 20 | Fold = 2 | MAE = 0.5436573541736316
K = 20 | Fold = 3 | MAE = 0.5893892782976768
K = 20 | Fold = 4 | MAE = 0.5781461537255062
K = 20 | Fold = 5 | MAE = 0.5452472097693645
K = 20 | Fold = 6 | MAE = 0.5887272761320671
K = 20 | Fold = 7 | MAE = 0.5521924286847029
K = 20 | Fold = 8 | MAE = 0.5511924669607274
K = 20 | Fold = 9 | MAE = 0.5531886205144056
K = 20 | Fold = 10 | MAE = 0.5497676773647759
K = 20 | Average MAE = 0.5593702922111644


(0.5593702922111644,
 [0.5421944564887871,
  0.5436573541736316,
  0.5893892782976768,
  0.5781461537255062,
  0.5452472097693645,
  0.5887272761320671,
  0.5521924286847029,
  0.5511924669607274,
  0.5531886205144056,
  0.5497676773647759])