# Main Function

In [3]:
from data_prep import load_rating_data
from cluster import kmeans_clustering
from evaluate import time_evaluate
from IPython.display import clear_output

In [4]:
def run_test_kmeans(n_clusters, rating_path = 'dataset_5_min_100/', item_path = 'comic_genre.csv'):

    total_mae = []
    total_time = []

    # 10-Fold Cross-Validation
    for i in range(1, 11):
        # load dataset
        print('K-Means (k={}) | Fold = {}'.format(n_clusters, i))
        print('Loading Data...')
        train_data, cluster_data = load_rating_data('train', number=i, rating_path=rating_path, item_path=item_path)
        test_data = load_rating_data('test', number=i, rating_path=rating_path, item_path=item_path)
        print('Data Loaded...')

        # perform clustering
        print('Clustering Starts')
        ratings_cluster, cluster_centroids = kmeans_clustering(n_clusters, cluster_data, train_data)
        print('Clustering Finished')

        # predict and evaluate
        print('Predicting and Evaluating Starts')
        mae, time = time_evaluate(test_data, ratings_cluster, cluster_centroids)
        total_mae.append(mae)
        total_time.append(time)
        print('Predicting and Evaluating Finished')

        clear_output(wait=True)
        print('K = {} | Fold = {} | MAE = {} | Time Elapsed = {}\n'.format(n_clusters, i, mae, time))


    clear_output(wait=True)
    for i in range(len(total_mae)):
        print('K = {} | Fold = {} | MAE = {} | Time Elapsed = {}'.format(n_clusters, i+1, total_mae[i], total_time[i]))
    avg_mae = sum(total_mae)/10
    avg_time = sum(total_time)/10
    print('K = {} | Average MAE = {} | Average Time = {}'.format(n_clusters, avg_mae, avg_time))

    # return avg_mae, total_mae

# No Clustering

In [3]:
run_test_kmeans(1)

K = 1 | Fold = 1 | MAE = 0.27117068461052446 | Time Elapsed = 0.02652138757077761
K = 1 | Fold = 2 | MAE = 0.24362016821135046 | Time Elapsed = 0.02177645889901778
K = 1 | Fold = 3 | MAE = 0.24793435049546259 | Time Elapsed = 0.026263751746634208
K = 1 | Fold = 4 | MAE = 0.24785668433626568 | Time Elapsed = 0.021590590549837405
K = 1 | Fold = 5 | MAE = 0.21717404221780298 | Time Elapsed = 0.021723206828241477
K = 1 | Fold = 6 | MAE = 0.25221722409326347 | Time Elapsed = 0.021692419603195883
K = 1 | Fold = 7 | MAE = 0.2743957532937302 | Time Elapsed = 0.026220317852930945
K = 1 | Fold = 8 | MAE = 0.24072770463853435 | Time Elapsed = 0.0216292358670004
K = 1 | Fold = 9 | MAE = 0.23272546173622036 | Time Elapsed = 0.025533237941221528
K = 1 | Fold = 10 | MAE = 0.23472213696823366 | Time Elapsed = 0.02163223467625145
K = 1 | Average MAE = 0.2462544210601388 | Average Time = 0.02345828415351087


In [23]:
run_test_kmeans(1, rating_path='dataset_5_min_75/')

K = 1 | Fold = 1 | MAE = 0.319427171538821 | Time Elapsed = 0.05370250975591779
K = 1 | Fold = 2 | MAE = 0.3126145112122268 | Time Elapsed = 0.04499888897615417
K = 1 | Fold = 3 | MAE = 0.33719305337387945 | Time Elapsed = 0.054495495782973864
K = 1 | Fold = 4 | MAE = 0.32436336782893227 | Time Elapsed = 0.04473383218283313
K = 1 | Fold = 5 | MAE = 0.31584100960079514 | Time Elapsed = 0.04472876518471333
K = 1 | Fold = 6 | MAE = 0.33816343440311186 | Time Elapsed = 0.053936394655973025
K = 1 | Fold = 7 | MAE = 0.3129004614341672 | Time Elapsed = 0.04461234232480975
K = 1 | Fold = 8 | MAE = 0.30710623416844957 | Time Elapsed = 0.054174588738077796
K = 1 | Fold = 9 | MAE = 0.336310855044712 | Time Elapsed = 0.04467793597898042
K = 1 | Fold = 10 | MAE = 0.31949804835753526 | Time Elapsed = 0.0541342783733902
K = 1 | Average MAE = 0.32234181469626305 | Average Time = 0.04941950319538234


# K-Means K=2 - K=20 DATASET_MIN_100 (RATING 1-5)

In [4]:
run_test_kmeans(2)

K = 2 | Fold = 1 | MAE = 0.2873209800770044 | Time Elapsed = 0.02251476711222346
K = 2 | Fold = 2 | MAE = 0.26603306237109564 | Time Elapsed = 0.018174899963708333
K = 2 | Fold = 3 | MAE = 0.26041808732284605 | Time Elapsed = 0.01805056831935867
K = 2 | Fold = 4 | MAE = 0.2648222275540328 | Time Elapsed = 0.018024305989283132
K = 2 | Fold = 5 | MAE = 0.23188836024476234 | Time Elapsed = 0.018152793441902165
K = 2 | Fold = 6 | MAE = 0.2707936111862734 | Time Elapsed = 0.01803150833497313
K = 2 | Fold = 7 | MAE = 0.2858841683496023 | Time Elapsed = 0.01787707093207831
K = 2 | Fold = 8 | MAE = 0.2550434687943186 | Time Elapsed = 0.017971916585879898
K = 2 | Fold = 9 | MAE = 0.24475834250323705 | Time Elapsed = 0.017995220939938935
K = 2 | Fold = 10 | MAE = 0.2578478453190399 | Time Elapsed = 0.018044268288079597
K = 2 | Average MAE = 0.2624810153722213 | Average Time = 0.018483731990742564


In [5]:
run_test_kmeans(3)

K = 3 | Fold = 1 | MAE = 0.3479885304789007 | Time Elapsed = 0.01678464632219451
K = 3 | Fold = 2 | MAE = 0.32615389950703777 | Time Elapsed = 0.016964706178584046
K = 3 | Fold = 3 | MAE = 0.3398124310596401 | Time Elapsed = 0.01691216470311474
K = 3 | Fold = 4 | MAE = 0.3555459624549781 | Time Elapsed = 0.016948383231390016
K = 3 | Fold = 5 | MAE = 0.2882450684795647 | Time Elapsed = 0.016888138456746332
K = 3 | Fold = 6 | MAE = 0.3397255872412454 | Time Elapsed = 0.016922035788087302
K = 3 | Fold = 7 | MAE = 0.368958052376203 | Time Elapsed = 0.016703742679375876
K = 3 | Fold = 8 | MAE = 0.33968112278040624 | Time Elapsed = 0.0170090625596649
K = 3 | Fold = 9 | MAE = 0.33114305035752667 | Time Elapsed = 0.01681757649973867
K = 3 | Fold = 10 | MAE = 0.3429137728056439 | Time Elapsed = 0.016934704532803695
K = 3 | Average MAE = 0.3380167477541146 | Average Time = 0.016888516095170007


In [6]:
run_test_kmeans(4)

K = 4 | Fold = 1 | MAE = 0.35124850456736245 | Time Elapsed = 0.01636103563880967
K = 4 | Fold = 2 | MAE = 0.32860018286011344 | Time Elapsed = 0.0166385545846052
K = 4 | Fold = 3 | MAE = 0.3372346375719132 | Time Elapsed = 0.01634437689603365
K = 4 | Fold = 4 | MAE = 0.3685182686428512 | Time Elapsed = 0.01650602091193074
K = 4 | Fold = 5 | MAE = 0.3068606098713128 | Time Elapsed = 0.016316508118678515
K = 4 | Fold = 6 | MAE = 0.34315163525731507 | Time Elapsed = 0.016478602541968805
K = 4 | Fold = 7 | MAE = 0.36674155891349 | Time Elapsed = 0.01645742328577594
K = 4 | Fold = 8 | MAE = 0.3514381144486344 | Time Elapsed = 0.016358037076176144
K = 4 | Fold = 9 | MAE = 0.33117347048094575 | Time Elapsed = 0.016538682261945527
K = 4 | Fold = 10 | MAE = 0.3331228390874202 | Time Elapsed = 0.01681642975155211
K = 4 | Average MAE = 0.3418089821701359 | Average Time = 0.016481567106747634


In [7]:
run_test_kmeans(5)

K = 5 | Fold = 1 | MAE = 0.3731060928109282 | Time Elapsed = 0.01701163584956363
K = 5 | Fold = 2 | MAE = 0.3154095921430088 | Time Elapsed = 0.015875230110663472
K = 5 | Fold = 3 | MAE = 0.32956728591746515 | Time Elapsed = 0.015642230420842375
K = 5 | Fold = 4 | MAE = 0.33815615854883696 | Time Elapsed = 0.015646431773130506
K = 5 | Fold = 5 | MAE = 0.3276134087413893 | Time Elapsed = 0.015770718854083668
K = 5 | Fold = 6 | MAE = 0.36285824254452587 | Time Elapsed = 0.01561340211879754
K = 5 | Fold = 7 | MAE = 0.3618109529348462 | Time Elapsed = 0.01658234247636611
K = 5 | Fold = 8 | MAE = 0.3592257464374811 | Time Elapsed = 0.015602090526350133
K = 5 | Fold = 9 | MAE = 0.3361078793210257 | Time Elapsed = 0.01568527031206752
K = 5 | Fold = 10 | MAE = 0.3266987640120029 | Time Elapsed = 0.015566718127570923
K = 5 | Average MAE = 0.343055412341151 | Average Time = 0.01589960705694359


In [8]:
run_test_kmeans(6)

K = 6 | Fold = 1 | MAE = 0.370567696468364 | Time Elapsed = 0.015486507639186518
K = 6 | Fold = 2 | MAE = 0.3595678026118599 | Time Elapsed = 0.015463294814053332
K = 6 | Fold = 3 | MAE = 0.3388705399706461 | Time Elapsed = 0.015955471489535106
K = 6 | Fold = 4 | MAE = 0.37780111376918096 | Time Elapsed = 0.01529715939732609
K = 6 | Fold = 5 | MAE = 0.3239997705113883 | Time Elapsed = 0.015740547631859572
K = 6 | Fold = 6 | MAE = 0.3754160262233173 | Time Elapsed = 0.015443906935833793
K = 6 | Fold = 7 | MAE = 0.3716064851751872 | Time Elapsed = 0.01630588342122895
K = 6 | Fold = 8 | MAE = 0.3735521397614887 | Time Elapsed = 0.015731237885212284
K = 6 | Fold = 9 | MAE = 0.3354537902675461 | Time Elapsed = 0.015846416632024258
K = 6 | Fold = 10 | MAE = 0.32226893633947307 | Time Elapsed = 0.01573723993091098
K = 6 | Average MAE = 0.3549104301098452 | Average Time = 0.015700766577717087


In [9]:
run_test_kmeans(7)

K = 7 | Fold = 1 | MAE = 0.3817285260529816 | Time Elapsed = 0.015485079751786633
K = 7 | Fold = 2 | MAE = 0.34355178929139074 | Time Elapsed = 0.01519720201694212
K = 7 | Fold = 3 | MAE = 0.35696518340275024 | Time Elapsed = 0.01502273547354085
K = 7 | Fold = 4 | MAE = 0.36899098371338845 | Time Elapsed = 0.015570047791312797
K = 7 | Fold = 5 | MAE = 0.3528813914645823 | Time Elapsed = 0.015498106297999198
K = 7 | Fold = 6 | MAE = 0.39742347647130594 | Time Elapsed = 0.015654147903549617
K = 7 | Fold = 7 | MAE = 0.3751022622017404 | Time Elapsed = 0.015557149280075164
K = 7 | Fold = 8 | MAE = 0.3890956204251854 | Time Elapsed = 0.01578174531883738
K = 7 | Fold = 9 | MAE = 0.3680072515812233 | Time Elapsed = 0.015475901153704515
K = 7 | Fold = 10 | MAE = 0.34873830695337404 | Time Elapsed = 0.015572783485405889
K = 7 | Average MAE = 0.3682484791557923 | Average Time = 0.015481489847315417


In [10]:
run_test_kmeans(8)

K = 8 | Fold = 1 | MAE = 0.39924381063749204 | Time Elapsed = 0.015090756617122536
K = 8 | Fold = 2 | MAE = 0.3805642937103144 | Time Elapsed = 0.015657519957911023
K = 8 | Fold = 3 | MAE = 0.33131400557915 | Time Elapsed = 0.015630962327467978
K = 8 | Fold = 4 | MAE = 0.39428315016468735 | Time Elapsed = 0.015246687606398338
K = 8 | Fold = 5 | MAE = 0.35935163013305466 | Time Elapsed = 0.015014210253133296
K = 8 | Fold = 6 | MAE = 0.3952749115640774 | Time Elapsed = 0.015161638129907147
K = 8 | Fold = 7 | MAE = 0.41043995957874846 | Time Elapsed = 0.015208638456493919
K = 8 | Fold = 8 | MAE = 0.39928568359771355 | Time Elapsed = 0.014917124252592714
K = 8 | Fold = 9 | MAE = 0.3547844819718455 | Time Elapsed = 0.015109044707040827
K = 8 | Fold = 10 | MAE = 0.3839715919455476 | Time Elapsed = 0.01519514912760726
K = 8 | Average MAE = 0.380851351888263 | Average Time = 0.015223173143567503


In [11]:
run_test_kmeans(9)

K = 9 | Fold = 1 | MAE = 0.3954976607217745 | Time Elapsed = 0.015314067391554925
K = 9 | Fold = 2 | MAE = 0.40104191168137765 | Time Elapsed = 0.014819927248589269
K = 9 | Fold = 3 | MAE = 0.3929064877601839 | Time Elapsed = 0.014549709031912281
K = 9 | Fold = 4 | MAE = 0.39347073482836215 | Time Elapsed = 0.015272765870079164
K = 9 | Fold = 5 | MAE = 0.34570866617864665 | Time Elapsed = 0.01502011492537477
K = 9 | Fold = 6 | MAE = 0.3923353580235376 | Time Elapsed = 0.014952776475332233
K = 9 | Fold = 7 | MAE = 0.3880160705549011 | Time Elapsed = 0.015093722715680692
K = 9 | Fold = 8 | MAE = 0.3959416802517339 | Time Elapsed = 0.014771270855886478
K = 9 | Fold = 9 | MAE = 0.39929020153624883 | Time Elapsed = 0.0156008406016228
K = 9 | Fold = 10 | MAE = 0.37448871013264545 | Time Elapsed = 0.015703025621127575
K = 9 | Average MAE = 0.3878697481669411 | Average Time = 0.01510982207371602


In [12]:
run_test_kmeans(10)

K = 10 | Fold = 1 | MAE = 0.407175797183838 | Time Elapsed = 0.0149916813376631
K = 10 | Fold = 2 | MAE = 0.40388149968924364 | Time Elapsed = 0.015463409442544568
K = 10 | Fold = 3 | MAE = 0.36859628656548193 | Time Elapsed = 0.014998656919912666
K = 10 | Fold = 4 | MAE = 0.3938254707254916 | Time Elapsed = 0.014836018693811109
K = 10 | Fold = 5 | MAE = 0.3895603609598705 | Time Elapsed = 0.015492677925483481
K = 10 | Fold = 6 | MAE = 0.396317884245049 | Time Elapsed = 0.015065805562890298
K = 10 | Fold = 7 | MAE = 0.43284628284483984 | Time Elapsed = 0.015597253067133535
K = 10 | Fold = 8 | MAE = 0.3905902982106709 | Time Elapsed = 0.014866168614039622
K = 10 | Fold = 9 | MAE = 0.3888046526090613 | Time Elapsed = 0.014987808727607074
K = 10 | Fold = 10 | MAE = 0.4080227974752642 | Time Elapsed = 0.015527281719797873
K = 10 | Average MAE = 0.39796213305088113 | Average Time = 0.015182676201088333


In [13]:
run_test_kmeans(11)

K = 11 | Fold = 1 | MAE = 0.40250669188677707 | Time Elapsed = 0.015298660006970085
K = 11 | Fold = 2 | MAE = 0.40476543284098193 | Time Elapsed = 0.015277346765617589
K = 11 | Fold = 3 | MAE = 0.38387432850094416 | Time Elapsed = 0.015136836721038821
K = 11 | Fold = 4 | MAE = 0.39475543746068087 | Time Elapsed = 0.01521026569901331
K = 11 | Fold = 5 | MAE = 0.38295878390453447 | Time Elapsed = 0.014643853355220348
K = 11 | Fold = 6 | MAE = 0.40622467665288114 | Time Elapsed = 0.015557694937624472
K = 11 | Fold = 7 | MAE = 0.43142096649945355 | Time Elapsed = 0.01480560383651414
K = 11 | Fold = 8 | MAE = 0.40301822185729863 | Time Elapsed = 0.015365201117380975
K = 11 | Fold = 9 | MAE = 0.3939469017623765 | Time Elapsed = 0.015297613012394441
K = 11 | Fold = 10 | MAE = 0.41789523022099323 | Time Elapsed = 0.015191141401981793
K = 11 | Average MAE = 0.4021366671586922 | Average Time = 0.015178421685375598


In [14]:
run_test_kmeans(12)

K = 12 | Fold = 1 | MAE = 0.43796072067933395 | Time Elapsed = 0.015488689028231424
K = 12 | Fold = 2 | MAE = 0.4058162567075128 | Time Elapsed = 0.015266900147862956
K = 12 | Fold = 3 | MAE = 0.3786238068947581 | Time Elapsed = 0.015210031313523163
K = 12 | Fold = 4 | MAE = 0.4434536198224261 | Time Elapsed = 0.014861258194358951
K = 12 | Fold = 5 | MAE = 0.38708979377857605 | Time Elapsed = 0.014967395742778273
K = 12 | Fold = 6 | MAE = 0.4362092754891862 | Time Elapsed = 0.015043453726129207
K = 12 | Fold = 7 | MAE = 0.425039705948261 | Time Elapsed = 0.015508076341196247
K = 12 | Fold = 8 | MAE = 0.42625639581280345 | Time Elapsed = 0.015134353281105403
K = 12 | Fold = 9 | MAE = 0.40879766456191485 | Time Elapsed = 0.015246232772523117
K = 12 | Fold = 10 | MAE = 0.43547668663170375 | Time Elapsed = 0.015594297083045288
K = 12 | Average MAE = 0.41847239263264757 | Average Time = 0.015232068763075406


In [15]:
run_test_kmeans(13)

K = 13 | Fold = 1 | MAE = 0.4291197599667845 | Time Elapsed = 0.015039366386748151
K = 13 | Fold = 2 | MAE = 0.3973137277351344 | Time Elapsed = 0.015058496394205166
K = 13 | Fold = 3 | MAE = 0.3926160525206326 | Time Elapsed = 0.01507791934798939
K = 13 | Fold = 4 | MAE = 0.3905122307721916 | Time Elapsed = 0.015188747868793212
K = 13 | Fold = 5 | MAE = 0.39155537859228307 | Time Elapsed = 0.015582872695922316
K = 13 | Fold = 6 | MAE = 0.41158545352205755 | Time Elapsed = 0.015289298686465266
K = 13 | Fold = 7 | MAE = 0.458877695329972 | Time Elapsed = 0.01506535884028082
K = 13 | Fold = 8 | MAE = 0.4185478604414492 | Time Elapsed = 0.01536475873952681
K = 13 | Fold = 9 | MAE = 0.43481918141767656 | Time Elapsed = 0.015604022428201265
K = 13 | Fold = 10 | MAE = 0.42015249149181944 | Time Elapsed = 0.014686749419795453
K = 13 | Average MAE = 0.4145099831790001 | Average Time = 0.015195759080792784


In [16]:
run_test_kmeans(14)

K = 14 | Fold = 1 | MAE = 0.42704705120516584 | Time Elapsed = 0.015068237641278001
K = 14 | Fold = 2 | MAE = 0.41397865891513363 | Time Elapsed = 0.015104130648091338
K = 14 | Fold = 3 | MAE = 0.41324162019975 | Time Elapsed = 0.014873517984638939
K = 14 | Fold = 4 | MAE = 0.4606222738798215 | Time Elapsed = 0.014769669588735896
K = 14 | Fold = 5 | MAE = 0.4196024107274682 | Time Elapsed = 0.015445236274803494
K = 14 | Fold = 6 | MAE = 0.4310329603872897 | Time Elapsed = 0.014870583350871637
K = 14 | Fold = 7 | MAE = 0.4276167567360585 | Time Elapsed = 0.014701819947204941
K = 14 | Fold = 8 | MAE = 0.4506346241651359 | Time Elapsed = 0.01507434949211123
K = 14 | Fold = 9 | MAE = 0.41846446255968456 | Time Elapsed = 0.015418922578788888
K = 14 | Fold = 10 | MAE = 0.3967682324585507 | Time Elapsed = 0.014962620196141333
K = 14 | Average MAE = 0.42590090512340584 | Average Time = 0.01502890877026657


In [17]:
run_test_kmeans(15)

K = 15 | Fold = 1 | MAE = 0.4732320878028206 | Time Elapsed = 0.014727466243793369
K = 15 | Fold = 2 | MAE = 0.4483356505965799 | Time Elapsed = 0.015452595556532066
K = 15 | Fold = 3 | MAE = 0.41678221442433083 | Time Elapsed = 0.014902572242754349
K = 15 | Fold = 4 | MAE = 0.41467910741794095 | Time Elapsed = 0.014979968388936206
K = 15 | Fold = 5 | MAE = 0.3810525051935354 | Time Elapsed = 0.01472555266567431
K = 15 | Fold = 6 | MAE = 0.4474895963491561 | Time Elapsed = 0.015260893459832711
K = 15 | Fold = 7 | MAE = 0.4480255946684987 | Time Elapsed = 0.014785274240321444
K = 15 | Fold = 8 | MAE = 0.4188101128956367 | Time Elapsed = 0.01551123459746192
K = 15 | Fold = 9 | MAE = 0.417738735083721 | Time Elapsed = 0.014945737207327791
K = 15 | Fold = 10 | MAE = 0.4334454959558419 | Time Elapsed = 0.015552479100838054
K = 15 | Average MAE = 0.42995911003880616 | Average Time = 0.015084377370347225


In [18]:
run_test_kmeans(16)


K = 16 | Fold = 1 | MAE = 0.43617319493008794 | Time Elapsed = 0.014748014238141573
K = 16 | Fold = 2 | MAE = 0.41603369897956777 | Time Elapsed = 0.015057224447561156
K = 16 | Fold = 3 | MAE = 0.4412030259717251 | Time Elapsed = 0.014974039651731288
K = 16 | Fold = 4 | MAE = 0.44292553770479626 | Time Elapsed = 0.015190603269738688
K = 16 | Fold = 5 | MAE = 0.394361448757506 | Time Elapsed = 0.015143698228302192
K = 16 | Fold = 6 | MAE = 0.455355483987289 | Time Elapsed = 0.015054288964356809
K = 16 | Fold = 7 | MAE = 0.4663329574306246 | Time Elapsed = 0.015118100225080984
K = 16 | Fold = 8 | MAE = 0.44129681598492093 | Time Elapsed = 0.01572049260434882
K = 16 | Fold = 9 | MAE = 0.4227127803518399 | Time Elapsed = 0.015260994986185727
K = 16 | Fold = 10 | MAE = 0.41290619696317 | Time Elapsed = 0.015524869110155697
K = 16 | Average MAE = 0.4329301141061527 | Average Time = 0.015179232572560292


In [19]:
run_test_kmeans(17)


K = 17 | Fold = 1 | MAE = 0.4334235655703924 | Time Elapsed = 0.014947851431812497
K = 17 | Fold = 2 | MAE = 0.44835022682817405 | Time Elapsed = 0.01517435861610691
K = 17 | Fold = 3 | MAE = 0.4275079701550077 | Time Elapsed = 0.01471676758276157
K = 17 | Fold = 4 | MAE = 0.40966544795183235 | Time Elapsed = 0.015131288032688558
K = 17 | Fold = 5 | MAE = 0.391739567935964 | Time Elapsed = 0.015173129402778951
K = 17 | Fold = 6 | MAE = 0.4540756966421835 | Time Elapsed = 0.014868012572836141
K = 17 | Fold = 7 | MAE = 0.4787152556986743 | Time Elapsed = 0.015619259799444809
K = 17 | Fold = 8 | MAE = 0.42173892223820425 | Time Elapsed = 0.01563823324729799
K = 17 | Fold = 9 | MAE = 0.41150436906280285 | Time Elapsed = 0.015378335552846994
K = 17 | Fold = 10 | MAE = 0.4178233287420259 | Time Elapsed = 0.014995793024926221
K = 17 | Average MAE = 0.4294544350825261 | Average Time = 0.015164302926350065


In [20]:
run_test_kmeans(18)


K = 18 | Fold = 1 | MAE = 0.46701912852763244 | Time Elapsed = 0.015120489252985618
K = 18 | Fold = 2 | MAE = 0.41609657878598955 | Time Elapsed = 0.015471995982201583
K = 18 | Fold = 3 | MAE = 0.4117363171798841 | Time Elapsed = 0.015258574396598343
K = 18 | Fold = 4 | MAE = 0.44690849825173007 | Time Elapsed = 0.01501912360757579
K = 18 | Fold = 5 | MAE = 0.3607661584896797 | Time Elapsed = 0.015365286590920258
K = 18 | Fold = 6 | MAE = 0.4469310443594706 | Time Elapsed = 0.01509180861530967
K = 18 | Fold = 7 | MAE = 0.46359071854062855 | Time Elapsed = 0.015496589520269534
K = 18 | Fold = 8 | MAE = 0.4389204291023311 | Time Elapsed = 0.015272113212831792
K = 18 | Fold = 9 | MAE = 0.42308124824241916 | Time Elapsed = 0.015605380987365443
K = 18 | Fold = 10 | MAE = 0.4055742391693035 | Time Elapsed = 0.01581599025184529
K = 18 | Average MAE = 0.42806243606490685 | Average Time = 0.015351735241790334


In [21]:
run_test_kmeans(19)


K = 19 | Fold = 1 | MAE = 0.4641285405634278 | Time Elapsed = 0.015134426287083672
K = 19 | Fold = 2 | MAE = 0.4256386107207156 | Time Elapsed = 0.015426847168384468
K = 19 | Fold = 3 | MAE = 0.43496782150216856 | Time Elapsed = 0.015597384927959
K = 19 | Fold = 4 | MAE = 0.4834691486774573 | Time Elapsed = 0.015314635891791358
K = 19 | Fold = 5 | MAE = 0.41156180533407755 | Time Elapsed = 0.015354155185160711
K = 19 | Fold = 6 | MAE = 0.43780482036714996 | Time Elapsed = 0.015494559315460859
K = 19 | Fold = 7 | MAE = 0.4780087428622009 | Time Elapsed = 0.01578695794909265
K = 19 | Fold = 8 | MAE = 0.46227703031762135 | Time Elapsed = 0.015159915453242894
K = 19 | Fold = 9 | MAE = 0.43548014125335416 | Time Elapsed = 0.015519937477906665
K = 19 | Fold = 10 | MAE = 0.43074844536875395 | Time Elapsed = 0.01525810562784434
K = 19 | Average MAE = 0.44640851069669274 | Average Time = 0.01540469252839266


In [22]:
run_test_kmeans(20)


K = 20 | Fold = 1 | MAE = 0.4813080367329032 | Time Elapsed = 0.015356434974928411
K = 20 | Fold = 2 | MAE = 0.45537983666793713 | Time Elapsed = 0.015848688001253233
K = 20 | Fold = 3 | MAE = 0.4559655720960494 | Time Elapsed = 0.015530088448966967
K = 20 | Fold = 4 | MAE = 0.4373405629746449 | Time Elapsed = 0.015338894699759053
K = 20 | Fold = 5 | MAE = 0.4216318336498941 | Time Elapsed = 0.015167847026288114
K = 20 | Fold = 6 | MAE = 0.474011054354954 | Time Elapsed = 0.015941832282341847
K = 20 | Fold = 7 | MAE = 0.4696158967423541 | Time Elapsed = 0.015847806129670985
K = 20 | Fold = 8 | MAE = 0.4411986843432945 | Time Elapsed = 0.015210782776326287
K = 20 | Fold = 9 | MAE = 0.44910704356209485 | Time Elapsed = 0.015201184646262644
K = 20 | Fold = 10 | MAE = 0.4413073254798194 | Time Elapsed = 0.015185004858575523
K = 20 | Average MAE = 0.4526865846603945 | Average Time = 0.015462856384437307


# K-MEANS K=2 - K=20 DATASET_MIN_75 (RATING 1-5)

In [None]:
run_test_kmeans(2, rating_path='dataset_5_min_75/')

K = 2 | Fold = 1 | MAE = 0.43968513643210366 | Time Elapsed = 0.04264682694545927
K = 2 | Fold = 2 | MAE = 0.4292427148803828 | Time Elapsed = 0.038223762716220834
K = 2 | Fold = 3 | MAE = 0.43530153262090154 | Time Elapsed = 0.04488874286124141
K = 2 | Fold = 4 | MAE = 0.4487263132897807 | Time Elapsed = 0.03688812700838688
K = 2 | Fold = 5 | MAE = 0.4136465457454407 | Time Elapsed = 0.03756488446629602
K = 2 | Fold = 6 | MAE = 0.44086966701277336 | Time Elapsed = 0.03770922713536775
K = 2 | Fold = 7 | MAE = 0.42402515724817086 | Time Elapsed = 0.037152953148235676
K = 2 | Fold = 8 | MAE = 0.4207476587668508 | Time Elapsed = 0.0422135064691942
K = 2 | Fold = 9 | MAE = 0.43527756463204054 | Time Elapsed = 0.03810112630767784
K = 2 | Fold = 10 | MAE = 0.4329875509880371 | Time Elapsed = 0.04309747204227189
K = 2 | Average MAE = 0.43205098416164817 | Average Time = 0.03984866291003517


In [8]:
run_test_kmeans(3, rating_path='dataset_5_min_75/')

K = 3 | Fold = 1 | MAE = 0.43813919703985954 | Time Elapsed = 0.029218481821468918
K = 3 | Fold = 2 | MAE = 0.4357269918591432 | Time Elapsed = 0.029313578554087262
K = 3 | Fold = 3 | MAE = 0.43868805723795085 | Time Elapsed = 0.029428243656001107
K = 3 | Fold = 4 | MAE = 0.45333230397630847 | Time Elapsed = 0.029351435492959245
K = 3 | Fold = 5 | MAE = 0.4305074514228762 | Time Elapsed = 0.02936112727598933
K = 3 | Fold = 6 | MAE = 0.44599781779644326 | Time Elapsed = 0.029542597391398792
K = 3 | Fold = 7 | MAE = 0.4359731443698995 | Time Elapsed = 0.029166414926459144
K = 3 | Fold = 8 | MAE = 0.4072136263957832 | Time Elapsed = 0.02951623699385943
K = 3 | Fold = 9 | MAE = 0.43933866611882755 | Time Elapsed = 0.02925707994114642
K = 3 | Fold = 10 | MAE = 0.4339471127193857 | Time Elapsed = 0.02950190890856904
K = 3 | Average MAE = 0.43588643689364764 | Average Time = 0.029365710496193864


In [9]:
run_test_kmeans(4, rating_path='dataset_5_min_75/')

K = 4 | Fold = 1 | MAE = 0.44005981282846285 | Time Elapsed = 0.028616597298758067
K = 4 | Fold = 2 | MAE = 0.44643626343189174 | Time Elapsed = 0.027917890668882325
K = 4 | Fold = 3 | MAE = 0.44011512379750917 | Time Elapsed = 0.028598623776634333
K = 4 | Fold = 4 | MAE = 0.4468904619243923 | Time Elapsed = 0.02890967657029383
K = 4 | Fold = 5 | MAE = 0.4287115642594079 | Time Elapsed = 0.028441492247496755
K = 4 | Fold = 6 | MAE = 0.45210333683528087 | Time Elapsed = 0.028675276614473562
K = 4 | Fold = 7 | MAE = 0.4325569104981901 | Time Elapsed = 0.028602936927076862
K = 4 | Fold = 8 | MAE = 0.41247715431551624 | Time Elapsed = 0.028722032551892
K = 4 | Fold = 9 | MAE = 0.4413018961475906 | Time Elapsed = 0.028584127428899035
K = 4 | Fold = 10 | MAE = 0.4454603078237282 | Time Elapsed = 0.028651468942430224
K = 4 | Average MAE = 0.438611283186197 | Average Time = 0.0285720123026837


In [10]:
run_test_kmeans(5, rating_path='dataset_5_min_75/')

K = 5 | Fold = 1 | MAE = 0.44340841968681594 | Time Elapsed = 0.025137307244500307
K = 5 | Fold = 2 | MAE = 0.4508423879452205 | Time Elapsed = 0.027321065010360295
K = 5 | Fold = 3 | MAE = 0.44248844759652745 | Time Elapsed = 0.025539946862782558
K = 5 | Fold = 4 | MAE = 0.4779166596523825 | Time Elapsed = 0.026457596484082795
K = 5 | Fold = 5 | MAE = 0.4551010066610462 | Time Elapsed = 0.02800531754113145
K = 5 | Fold = 6 | MAE = 0.472425386897557 | Time Elapsed = 0.02705163355788734
K = 5 | Fold = 7 | MAE = 0.45969884762690544 | Time Elapsed = 0.02601667347208181
K = 5 | Fold = 8 | MAE = 0.44224624252420447 | Time Elapsed = 0.026548149402628875
K = 5 | Fold = 9 | MAE = 0.4346146081067001 | Time Elapsed = 0.025893664342086304
K = 5 | Fold = 10 | MAE = 0.4515672932159221 | Time Elapsed = 0.028952461658872976
K = 5 | Average MAE = 0.45303092999132827 | Average Time = 0.02669238155764147


In [None]:
run_test_kmeans(6  , rating_path='dataset_5_min_75/')

K = 6 | Fold = 1 | MAE = 0.4612213162917326 | Time Elapsed = 0.024271075407417034
K = 6 | Fold = 2 | MAE = 0.44627334196025414 | Time Elapsed = 0.024778595976815986
K = 6 | Fold = 3 | MAE = 0.47842446852140014 | Time Elapsed = 0.026007419735835455
K = 6 | Fold = 4 | MAE = 0.4710358833873993 | Time Elapsed = 0.02498049785528933
K = 6 | Fold = 5 | MAE = 0.46637872359209465 | Time Elapsed = 0.024322480909444593
K = 6 | Fold = 6 | MAE = 0.4816758650245843 | Time Elapsed = 0.024111029907358316
K = 6 | Fold = 7 | MAE = 0.4713673112868097 | Time Elapsed = 0.024359982865291594
K = 6 | Fold = 8 | MAE = 0.4588426268964488 | Time Elapsed = 0.025701857415989245
K = 6 | Fold = 9 | MAE = 0.4915347432715568 | Time Elapsed = 0.02583943985409414
K = 6 | Fold = 10 | MAE = 0.46125260655528105 | Time Elapsed = 0.025593075149769624
K = 6 | Average MAE = 0.468800688678756 | Average Time = 0.024996545507730533


In [4]:
run_test_kmeans(7  , rating_path='dataset_5_min_75/')

K = 7 | Fold = 1 | MAE = 0.4881268398141985 | Time Elapsed = 0.026864574504994004
K = 7 | Fold = 2 | MAE = 0.45056817100433827 | Time Elapsed = 0.023806765576379257
K = 7 | Fold = 3 | MAE = 0.49438559130567405 | Time Elapsed = 0.025104280101180095
K = 7 | Fold = 4 | MAE = 0.4847884801576591 | Time Elapsed = 0.025719112372425686
K = 7 | Fold = 5 | MAE = 0.4780917461007537 | Time Elapsed = 0.023976984911863748
K = 7 | Fold = 6 | MAE = 0.48673217946620356 | Time Elapsed = 0.024264298052630164
K = 7 | Fold = 7 | MAE = 0.4783612451771053 | Time Elapsed = 0.023686668757343432
K = 7 | Fold = 8 | MAE = 0.45812472678626004 | Time Elapsed = 0.024250866786984945
K = 7 | Fold = 9 | MAE = 0.48846100768609285 | Time Elapsed = 0.02622282644424877
K = 7 | Fold = 10 | MAE = 0.47011974631277237 | Time Elapsed = 0.025891562867743346
K = 7 | Average MAE = 0.47777597338110567 | Average Time = 0.024978794037579348


In [5]:
run_test_kmeans(8  , rating_path='dataset_5_min_75/')

K = 8 | Fold = 1 | MAE = 0.4918597070621071 | Time Elapsed = 0.024071469193568645
K = 8 | Fold = 2 | MAE = 0.4684236119395117 | Time Elapsed = 0.024698847762813295
K = 8 | Fold = 3 | MAE = 0.5158036703652003 | Time Elapsed = 0.024965173910056117
K = 8 | Fold = 4 | MAE = 0.5152044932080632 | Time Elapsed = 0.02325270045680218
K = 8 | Fold = 5 | MAE = 0.4700580208974866 | Time Elapsed = 0.023762430595322778
K = 8 | Fold = 6 | MAE = 0.49518558632185183 | Time Elapsed = 0.024148033515002006
K = 8 | Fold = 7 | MAE = 0.48277917732830034 | Time Elapsed = 0.024226501565682422
K = 8 | Fold = 8 | MAE = 0.45722520907650605 | Time Elapsed = 0.024625192428071645
K = 8 | Fold = 9 | MAE = 0.5194987979580727 | Time Elapsed = 0.023376755425542635
K = 8 | Fold = 10 | MAE = 0.471160280476354 | Time Elapsed = 0.02488336408941851
K = 8 | Average MAE = 0.48871985546334545 | Average Time = 0.024201046894228025


In [6]:
run_test_kmeans(9  , rating_path='dataset_5_min_75/')

K = 9 | Fold = 1 | MAE = 0.5180241692161646 | Time Elapsed = 0.02564232563521135
K = 9 | Fold = 2 | MAE = 0.49649585780293964 | Time Elapsed = 0.02352532686189255
K = 9 | Fold = 3 | MAE = 0.5304088560445432 | Time Elapsed = 0.023374068453654633
K = 9 | Fold = 4 | MAE = 0.506518595857088 | Time Elapsed = 0.024097512055737957
K = 9 | Fold = 5 | MAE = 0.501514157174156 | Time Elapsed = 0.02437113223783102
K = 9 | Fold = 6 | MAE = 0.5415018262863479 | Time Elapsed = 0.02353651651562802
K = 9 | Fold = 7 | MAE = 0.4867985777717324 | Time Elapsed = 0.02338236885530249
K = 9 | Fold = 8 | MAE = 0.48434165294867637 | Time Elapsed = 0.02217317150216287
K = 9 | Fold = 9 | MAE = 0.4774559607404196 | Time Elapsed = 0.022207882444506978
K = 9 | Fold = 10 | MAE = 0.4950602698237978 | Time Elapsed = 0.02313983661388957
K = 9 | Average MAE = 0.5038119923665866 | Average Time = 0.023545014117581742


In [7]:
run_test_kmeans(10  , rating_path='dataset_5_min_75/')

K = 10 | Fold = 1 | MAE = 0.506976802257748 | Time Elapsed = 0.022785042704186863
K = 10 | Fold = 2 | MAE = 0.4889491002909593 | Time Elapsed = 0.022186573259985097
K = 10 | Fold = 3 | MAE = 0.5338154395572634 | Time Elapsed = 0.022241153730399938
K = 10 | Fold = 4 | MAE = 0.50810405021402 | Time Elapsed = 0.022929707028929904
K = 10 | Fold = 5 | MAE = 0.4597134997342582 | Time Elapsed = 0.023621180130505418
K = 10 | Fold = 6 | MAE = 0.5300336332661923 | Time Elapsed = 0.02402071117861129
K = 10 | Fold = 7 | MAE = 0.5239399026581643 | Time Elapsed = 0.022965487821370346
K = 10 | Fold = 8 | MAE = 0.5059106253357736 | Time Elapsed = 0.022425937867058078
K = 10 | Fold = 9 | MAE = 0.5270579109055988 | Time Elapsed = 0.025066958409672865
K = 10 | Fold = 10 | MAE = 0.49366858397174557 | Time Elapsed = 0.02386352981453018
K = 10 | Average MAE = 0.5078169548191723 | Average Time = 0.023210628194524997


In [8]:
run_test_kmeans(11  , rating_path='dataset_5_min_75/')

K = 11 | Fold = 1 | MAE = 0.5294658404805366 | Time Elapsed = 0.022650388364850515
K = 11 | Fold = 2 | MAE = 0.47923647361391447 | Time Elapsed = 0.02324176467189205
K = 11 | Fold = 3 | MAE = 0.5266182601251203 | Time Elapsed = 0.024169090777040696
K = 11 | Fold = 4 | MAE = 0.5295617268379099 | Time Elapsed = 0.022675310814863577
K = 11 | Fold = 5 | MAE = 0.49338260423469116 | Time Elapsed = 0.021885571933904287
K = 11 | Fold = 6 | MAE = 0.5359405386404998 | Time Elapsed = 0.022823562665640805
K = 11 | Fold = 7 | MAE = 0.5138672658446526 | Time Elapsed = 0.021722472645681938
K = 11 | Fold = 8 | MAE = 0.49940546549939824 | Time Elapsed = 0.024400492996243407
K = 11 | Fold = 9 | MAE = 0.5193054196071839 | Time Elapsed = 0.022011008435049526
K = 11 | Fold = 10 | MAE = 0.5178744088775965 | Time Elapsed = 0.023606479801551516
K = 11 | Average MAE = 0.5144658003761504 | Average Time = 0.022918614310671832


In [9]:
run_test_kmeans(12  , rating_path='dataset_5_min_75/')

K = 12 | Fold = 1 | MAE = 0.5178623704153512 | Time Elapsed = 0.022327611131770394
K = 12 | Fold = 2 | MAE = 0.5300650710198859 | Time Elapsed = 0.023492971184394494
K = 12 | Fold = 3 | MAE = 0.5257243320674494 | Time Elapsed = 0.021847663417052545
K = 12 | Fold = 4 | MAE = 0.5501156402014722 | Time Elapsed = 0.022592323470351717
K = 12 | Fold = 5 | MAE = 0.5143554170489744 | Time Elapsed = 0.02219258705333079
K = 12 | Fold = 6 | MAE = 0.5491322689365141 | Time Elapsed = 0.023006478090764206
K = 12 | Fold = 7 | MAE = 0.5325366025345745 | Time Elapsed = 0.023219485174662532
K = 12 | Fold = 8 | MAE = 0.5079669346983269 | Time Elapsed = 0.02254570625656358
K = 12 | Fold = 9 | MAE = 0.511674077888845 | Time Elapsed = 0.022646395382339153
K = 12 | Fold = 10 | MAE = 0.5165805991664779 | Time Elapsed = 0.023311737417151035
K = 12 | Average MAE = 0.5256013313977871 | Average Time = 0.022718295857838043


In [10]:
run_test_kmeans(13  , rating_path='dataset_5_min_75/')

K = 13 | Fold = 1 | MAE = 0.5247143767996657 | Time Elapsed = 0.024377915978559737
K = 13 | Fold = 2 | MAE = 0.5007951680645953 | Time Elapsed = 0.0226528010783962
K = 13 | Fold = 3 | MAE = 0.5464492471821982 | Time Elapsed = 0.021452086216718593
K = 13 | Fold = 4 | MAE = 0.5245940877860116 | Time Elapsed = 0.022502433041563918
K = 13 | Fold = 5 | MAE = 0.5109370236907436 | Time Elapsed = 0.021581833335496525
K = 13 | Fold = 6 | MAE = 0.5646958256092789 | Time Elapsed = 0.02314585038571132
K = 13 | Fold = 7 | MAE = 0.5141276992994481 | Time Elapsed = 0.02239495722045709
K = 13 | Fold = 8 | MAE = 0.5315710612191841 | Time Elapsed = 0.02273523016156474
K = 13 | Fold = 9 | MAE = 0.5237816958555004 | Time Elapsed = 0.022009052516210836
K = 13 | Fold = 10 | MAE = 0.5058047449920723 | Time Elapsed = 0.021812144924494795
K = 13 | Average MAE = 0.5247470930498698 | Average Time = 0.022466430485917375


In [11]:
run_test_kmeans(14  , rating_path='dataset_5_min_75/')

K = 14 | Fold = 1 | MAE = 0.548306485562475 | Time Elapsed = 0.022273158755160765
K = 14 | Fold = 2 | MAE = 0.5225449595533042 | Time Elapsed = 0.022248535463062397
K = 14 | Fold = 3 | MAE = 0.5482985425822184 | Time Elapsed = 0.022092493367172387
K = 14 | Fold = 4 | MAE = 0.5410259587831766 | Time Elapsed = 0.021953413076905296
K = 14 | Fold = 5 | MAE = 0.5321012647211109 | Time Elapsed = 0.022081066257718485
K = 14 | Fold = 6 | MAE = 0.5390129380856704 | Time Elapsed = 0.021930948550410216
K = 14 | Fold = 7 | MAE = 0.5370195459742161 | Time Elapsed = 0.022317553970091896
K = 14 | Fold = 8 | MAE = 0.5465749206736473 | Time Elapsed = 0.02200960097009252
K = 14 | Fold = 9 | MAE = 0.5379188579858522 | Time Elapsed = 0.021865670816677295
K = 14 | Fold = 10 | MAE = 0.5530471818385477 | Time Elapsed = 0.02191265317771078
K = 14 | Average MAE = 0.5405850655760219 | Average Time = 0.0220685094405002


In [12]:
run_test_kmeans(15  , rating_path='dataset_5_min_75/')

K = 15 | Fold = 1 | MAE = 0.523044114746342 | Time Elapsed = 0.022967936635827548
K = 15 | Fold = 2 | MAE = 0.5379234105146565 | Time Elapsed = 0.022066834566892648
K = 15 | Fold = 3 | MAE = 0.5403629611769499 | Time Elapsed = 0.02224966647070609
K = 15 | Fold = 4 | MAE = 0.5384754119593415 | Time Elapsed = 0.021813962853275796
K = 15 | Fold = 5 | MAE = 0.5296092962637493 | Time Elapsed = 0.02194432858876744
K = 15 | Fold = 6 | MAE = 0.5409685261882339 | Time Elapsed = 0.021484748257856644
K = 15 | Fold = 7 | MAE = 0.5334269614765752 | Time Elapsed = 0.022488477172804396
K = 15 | Fold = 8 | MAE = 0.5048830052472338 | Time Elapsed = 0.022701020222296142
K = 15 | Fold = 9 | MAE = 0.5335247520796746 | Time Elapsed = 0.021863477731308093
K = 15 | Fold = 10 | MAE = 0.5137311803097249 | Time Elapsed = 0.023007051295489217
K = 15 | Average MAE = 0.5295949619962481 | Average Time = 0.0222587503795224


In [13]:
run_test_kmeans(16  , rating_path='dataset_5_min_75/')

K = 16 | Fold = 1 | MAE = 0.5392714388971233 | Time Elapsed = 0.022622731252913933
K = 16 | Fold = 2 | MAE = 0.5256242530774026 | Time Elapsed = 0.022566516149316332
K = 16 | Fold = 3 | MAE = 0.5535114951785046 | Time Elapsed = 0.02188580653399583
K = 16 | Fold = 4 | MAE = 0.5328637985438934 | Time Elapsed = 0.022485412087393016
K = 16 | Fold = 5 | MAE = 0.5318914416281013 | Time Elapsed = 0.0216916386915732
K = 16 | Fold = 6 | MAE = 0.5535022334377492 | Time Elapsed = 0.022312865883038796
K = 16 | Fold = 7 | MAE = 0.5417899656891768 | Time Elapsed = 0.02155759810046438
K = 16 | Fold = 8 | MAE = 0.5277425925394346 | Time Elapsed = 0.02220372710850171
K = 16 | Fold = 9 | MAE = 0.5550180303994803 | Time Elapsed = 0.02169610825678559
K = 16 | Fold = 10 | MAE = 0.529104331608996 | Time Elapsed = 0.022465298661273093
K = 16 | Average MAE = 0.5390319580999863 | Average Time = 0.022148770272525587


In [None]:
run_test_kmeans(17  , rating_path='dataset_5_min_75/')

In [None]:
run_test_kmeans(18  , rating_path='dataset_5_min_75/')

In [None]:
run_test_kmeans(19  , rating_path='dataset_5_min_75/')

In [None]:
run_test_kmeans(20  , rating_path='dataset_5_min_75/')