# Main Function

In [1]:
from data_prep import load_rating_data
from cluster import kmeans_clustering
from evaluate import time_evaluate
from IPython.display import clear_output

In [2]:
def run_test_kmeans(n_clusters, rating_path = 'dataset_5_min_75/', item_path = 'comic_genre.csv'):

    total_mae = []
    total_time = []

    # 10-Fold Cross-Validation
    for i in range(1, 11):
        # load dataset
        print('K-Means (k={}) | Fold = {}'.format(n_clusters, i))
        print('Loading Data...')
        train_data, cluster_data = load_rating_data('train', number=i, rating_path=rating_path, item_path=item_path)
        test_data = load_rating_data('test', number=i, rating_path=rating_path, item_path=item_path)
        print('Data Loaded...')

        # perform clustering
        print('Clustering Starts')
        ratings_cluster, cluster_centroids = kmeans_clustering(n_clusters, cluster_data, train_data)
        print('Clustering Finished')

        # predict and evaluate
        print('Predicting and Evaluating Starts')
        mae, time = time_evaluate(test_data, ratings_cluster, cluster_centroids)
        total_mae.append(mae)
        total_time.append(time)
        print('Predicting and Evaluating Finished')

        clear_output(wait=True)
        print('K = {} | Fold = {} | MAE = {} | Time Elapsed = {}\n'.format(n_clusters, i, mae, time))


    clear_output(wait=True)
    for i in range(len(total_mae)):
        print('K = {} | Fold = {} | MAE = {} | Time Elapsed = {}'.format(n_clusters, i+1, total_mae[i], total_time[i]))
    avg_mae = sum(total_mae)/10
    avg_time = sum(total_time)/10
    print('K = {} | Average MAE = {} | Average Time = {}'.format(n_clusters, avg_mae, avg_time))

    # return avg_mae, total_mae

# No Clustering

In [3]:
run_test_kmeans(1, rating_path='dataset_5_min_75/')

K = 1 | Fold = 1 | MAE = 0.8148730613791498 | Time Elapsed = 0.05989280749031565
K = 1 | Fold = 2 | MAE = 0.8114433017903402 | Time Elapsed = 0.05124692488424027
K = 1 | Fold = 3 | MAE = 0.8205062644714537 | Time Elapsed = 0.0592468726440256
K = 1 | Fold = 4 | MAE = 0.8164264410905295 | Time Elapsed = 0.07276250382678626
K = 1 | Fold = 5 | MAE = 0.8234654574661855 | Time Elapsed = 0.07258743371972225
K = 1 | Fold = 6 | MAE = 0.8321202585939066 | Time Elapsed = 0.05943568919106904
K = 1 | Fold = 7 | MAE = 0.8171025837555983 | Time Elapsed = 0.05788431773975283
K = 1 | Fold = 8 | MAE = 0.8101566255413716 | Time Elapsed = 0.056146008107857026
K = 1 | Fold = 9 | MAE = 0.8217688658204603 | Time Elapsed = 0.06005717811253998
K = 1 | Fold = 10 | MAE = 0.8170008679354739 | Time Elapsed = 0.06007169657371717
K = 1 | Average MAE = 0.8184863727844469 | Average Time = 0.060933143229002604


# K-Means K=2 - K=20 DATASET_MIN_75 (RATING 1-5)

In [4]:
run_test_kmeans(2)

K = 2 | Fold = 1 | MAE = 0.8879477569148914 | Time Elapsed = 0.05330612835077765
K = 2 | Fold = 2 | MAE = 0.8857139525603617 | Time Elapsed = 0.043368089626106436
K = 2 | Fold = 3 | MAE = 0.889357493393063 | Time Elapsed = 0.04347732537805056
K = 2 | Fold = 4 | MAE = 0.897138948693297 | Time Elapsed = 0.043679409181508956
K = 2 | Fold = 5 | MAE = 0.8808844891521195 | Time Elapsed = 0.0461361569032029
K = 2 | Fold = 6 | MAE = 0.8957222101224602 | Time Elapsed = 0.04353744166543138
K = 2 | Fold = 7 | MAE = 0.8867527738838981 | Time Elapsed = 0.04261391839477551
K = 2 | Fold = 8 | MAE = 0.8823376237303717 | Time Elapsed = 0.04200558002146646
K = 2 | Fold = 9 | MAE = 0.889340743655417 | Time Elapsed = 0.03988277231590702
K = 2 | Fold = 10 | MAE = 0.8804280339309679 | Time Elapsed = 0.039354317237578006
K = 2 | Average MAE = 0.8875624026036848 | Average Time = 0.04373611390748049


In [5]:
run_test_kmeans(3)

K = 3 | Fold = 1 | MAE = 0.8948013091290226 | Time Elapsed = 0.031566787692771
K = 3 | Fold = 2 | MAE = 0.8921548882254622 | Time Elapsed = 0.03431556953042254
K = 3 | Fold = 3 | MAE = 0.894632107325555 | Time Elapsed = 0.03465643975030527
K = 3 | Fold = 4 | MAE = 0.9010288135377978 | Time Elapsed = 0.03182607281030356
K = 3 | Fold = 5 | MAE = 0.8986230965188567 | Time Elapsed = 0.032163747042166506
K = 3 | Fold = 6 | MAE = 0.9051658664893675 | Time Elapsed = 0.03189424726057326
K = 3 | Fold = 7 | MAE = 0.8997229404494949 | Time Elapsed = 0.03137847945961114
K = 3 | Fold = 8 | MAE = 0.8786232701528168 | Time Elapsed = 0.032431986829893886
K = 3 | Fold = 9 | MAE = 0.8875430273059088 | Time Elapsed = 0.031422130135192496
K = 3 | Fold = 10 | MAE = 0.887390886811068 | Time Elapsed = 0.031936463258299026
K = 3 | Average MAE = 0.893968620594535 | Average Time = 0.03235919237695387


In [6]:
run_test_kmeans(4)

K = 4 | Fold = 1 | MAE = 0.8907418108964733 | Time Elapsed = 0.03148879165356721
K = 4 | Fold = 2 | MAE = 0.8977863349493702 | Time Elapsed = 0.029589156551079975
K = 4 | Fold = 3 | MAE = 0.8936119219389722 | Time Elapsed = 0.032005809226783775
K = 4 | Fold = 4 | MAE = 0.8987079241780944 | Time Elapsed = 0.03085833239844743
K = 4 | Fold = 5 | MAE = 0.8936905466421234 | Time Elapsed = 0.030929315865999972
K = 4 | Fold = 6 | MAE = 0.9073898375385874 | Time Elapsed = 0.03192021180904204
K = 4 | Fold = 7 | MAE = 0.8973540320583869 | Time Elapsed = 0.030158793175846035
K = 4 | Fold = 8 | MAE = 0.8825776862423016 | Time Elapsed = 0.0314029051675883
K = 4 | Fold = 9 | MAE = 0.8893466801384498 | Time Elapsed = 0.030867522508373695
K = 4 | Fold = 10 | MAE = 0.8963972898667864 | Time Elapsed = 0.030937469260180865
K = 4 | Average MAE = 0.8947604064449546 | Average Time = 0.031015830761690934


In [7]:
run_test_kmeans(5)

K = 5 | Fold = 1 | MAE = 0.9023851132020319 | Time Elapsed = 0.02767983044051662
K = 5 | Fold = 2 | MAE = 0.9046063415551717 | Time Elapsed = 0.029363447670319072
K = 5 | Fold = 3 | MAE = 0.8966080892864943 | Time Elapsed = 0.03266439770185671
K = 5 | Fold = 4 | MAE = 0.9265552653919603 | Time Elapsed = 0.031033429513712525
K = 5 | Fold = 5 | MAE = 0.9101626943402902 | Time Elapsed = 0.03361894416490049
K = 5 | Fold = 6 | MAE = 0.9221466387530932 | Time Elapsed = 0.030396314385679763
K = 5 | Fold = 7 | MAE = 0.9189660656008546 | Time Elapsed = 0.02753499178460477
K = 5 | Fold = 8 | MAE = 0.9079133945034541 | Time Elapsed = 0.02913335311567376
K = 5 | Fold = 9 | MAE = 0.8919544574314661 | Time Elapsed = 0.027701114534299662
K = 5 | Fold = 10 | MAE = 0.899690445162278 | Time Elapsed = 0.031409984496046865
K = 5 | Average MAE = 0.9080988505227093 | Average Time = 0.030053580780761025


In [8]:
run_test_kmeans(6)

K = 6 | Fold = 1 | MAE = 0.9140819050214781 | Time Elapsed = 0.026073753724271138
K = 6 | Fold = 2 | MAE = 0.9057373237416083 | Time Elapsed = 0.026901864272461815
K = 6 | Fold = 3 | MAE = 0.9163533953618476 | Time Elapsed = 0.02839923440131411
K = 6 | Fold = 4 | MAE = 0.9211694346971299 | Time Elapsed = 0.026862844447224938
K = 6 | Fold = 5 | MAE = 0.9281082297233731 | Time Elapsed = 0.027878864864108593
K = 6 | Fold = 6 | MAE = 0.9309191144856749 | Time Elapsed = 0.02603170958636695
K = 6 | Fold = 7 | MAE = 0.9286302862839961 | Time Elapsed = 0.027288455943467762
K = 6 | Fold = 8 | MAE = 0.9170805745415906 | Time Elapsed = 0.02774727309697209
K = 6 | Fold = 9 | MAE = 0.926091073019099 | Time Elapsed = 0.02815639638942743
K = 6 | Fold = 10 | MAE = 0.9143589463485556 | Time Elapsed = 0.027903630067863432
K = 6 | Average MAE = 0.9202530283224352 | Average Time = 0.027324402679347826


In [None]:
run_test_kmeans(7)

K = 7 | Fold = 3 | MAE = 0.9289260293191737 | Time Elapsed = 0.030283766753006418

K-Means (k=7) | Fold = 4
Loading Data...
Data Loaded...
Clustering Starts
Clustering Finished
Predicting and Evaluating Starts


In [None]:
run_test_kmeans(8)

In [None]:
run_test_kmeans(9)

In [None]:
run_test_kmeans(10)

In [None]:
run_test_kmeans(11)

In [None]:
run_test_kmeans(12)

In [None]:
run_test_kmeans(13)

In [None]:
run_test_kmeans(14)

In [None]:
run_test_kmeans(15)

In [None]:
run_test_kmeans(16)


In [None]:
run_test_kmeans(17)


In [None]:
run_test_kmeans(18)


In [None]:
run_test_kmeans(19)


In [None]:
run_test_kmeans(20)
