In [1]:
from data_prep import load_rating_data
from cluster import dbscan_clustering
from evaluate import time_evaluate
from IPython.display import clear_output

In [2]:
def run_test_dbscan(epsilon, rating_path = 'dataset_5_min_100/', item_path = 'comic_genre.csv'):

    total_mae = []
    total_time = []

    # 10-Fold Cross-Validation
    for i in range(1, 11):
        # load dataset
        print('DBSCAN (eps={}) | Fold = {}'.format(epsilon, i))
        print('Loading Data...')
        train_data, cluster_data = load_rating_data('train', number=i, rating_path=rating_path, item_path=item_path)
        test_data = load_rating_data('test', number=i, rating_path=rating_path, item_path=item_path)
        print('Data Loaded...')

        # perform clustering
        print('Clustering Starts')
        ratings_cluster = dbscan_clustering(epsilon, cluster_data, train_data)
        print('Clustering Finished')

        # predict and evaluate
        print('Predicting and Evaluating Starts')
        mae, time = time_evaluate(test_data, ratings_cluster, centroids=None)
        total_mae.append(mae)
        total_time.append(time)
        print('Predicting and Evaluating Finished')

        clear_output(wait=True)
        print('Eps = {} | Fold = {} | MAE = {} | Time Elapsed = {}\n'.format(epsilon, i, mae, time))


    clear_output(wait=True)
    for i in range(len(total_mae)):
        print('Eps = {} | Fold = {} | MAE = {} | Time Elapsed = {}'.format(epsilon, i+1, total_mae[i], total_time[i]))
    avg_mae = sum(total_mae)/10
    avg_time = sum(total_time)/10
    print('Eps = {} | Average MAE = {} | Average Time = {}'.format(epsilon, avg_mae, avg_time))

    # return avg_mae, total_mae

# DBSCAN Eps = 9.6 - Eps = 16.8 DATASET_MIN_100 (RATING 1-5)

In [None]:
run_test_dbscan(9.6)

Eps = 9.6 | Fold = 1 | MAE = 0.7675135219958336 | Time Elapsed = 0.02042511981908574
Eps = 9.6 | Fold = 2 | MAE = 0.7539053910433107 | Time Elapsed = 0.020028928554623217
Eps = 9.6 | Fold = 3 | MAE = 0.7640230630637506 | Time Elapsed = 0.02019414321900771
Eps = 9.6 | Fold = 4 | MAE = 0.7721751457880569 | Time Elapsed = 0.020258719979726343
Eps = 9.6 | Fold = 5 | MAE = 0.7514019361131026 | Time Elapsed = 0.02294452197314164
Eps = 9.6 | Fold = 6 | MAE = 0.7883100001207105 | Time Elapsed = 0.020811059764076212
Eps = 9.6 | Fold = 7 | MAE = 0.7850148716913031 | Time Elapsed = 0.023996828241476604
Eps = 9.6 | Fold = 8 | MAE = 0.7542000143446822 | Time Elapsed = 0.027081085315599926
Eps = 9.6 | Fold = 9 | MAE = 0.7686350179338511 | Time Elapsed = 0.021729305252519416
Eps = 9.6 | Fold = 10 | MAE = 0.7574004774123672 | Time Elapsed = 0.025222587056041537
Eps = 9.6 | Average MAE = 0.7662579439506969 | Average Time = 0.02226922991752984


In [None]:
run_test_dbscan(9.7)

Eps = 9.7 | Fold = 1 | MAE = 0.7694449988479252 | Time Elapsed = 0.023837644255553275
Eps = 9.7 | Fold = 2 | MAE = 0.7601383862738641 | Time Elapsed = 0.020658449263299884
Eps = 9.7 | Fold = 3 | MAE = 0.7626331022185154 | Time Elapsed = 0.023543429325154692
Eps = 9.7 | Fold = 4 | MAE = 0.7698099600555207 | Time Elapsed = 0.01994630147262499
Eps = 9.7 | Fold = 5 | MAE = 0.7544536348120934 | Time Elapsed = 0.022406099119705723
Eps = 9.7 | Fold = 6 | MAE = 0.7899925847047556 | Time Elapsed = 0.021867996362600794
Eps = 9.7 | Fold = 7 | MAE = 0.788308497711683 | Time Elapsed = 0.022860567614932215
Eps = 9.7 | Fold = 8 | MAE = 0.766640090399658 | Time Elapsed = 0.02641137891218959
Eps = 9.7 | Fold = 9 | MAE = 0.7667776167161445 | Time Elapsed = 0.020394545481914748
Eps = 9.7 | Fold = 10 | MAE = 0.7665932812909373 | Time Elapsed = 0.022868475342326586
Eps = 9.7 | Average MAE = 0.7694792153031098 | Average Time = 0.02247948871503025


In [None]:
run_test_dbscan(9.8)

Eps = 9.8 | Fold = 1 | MAE = 0.7687683937264554 | Time Elapsed = 0.022926525866476685
Eps = 9.8 | Fold = 2 | MAE = 0.7620511527956391 | Time Elapsed = 0.02743330613752566
Eps = 9.8 | Fold = 3 | MAE = 0.7757875134989843 | Time Elapsed = 0.026103221553073756
Eps = 9.8 | Fold = 4 | MAE = 0.76316646853593 | Time Elapsed = 0.026781949811983393
Eps = 9.8 | Fold = 5 | MAE = 0.7527572254729293 | Time Elapsed = 0.024693619332520015
Eps = 9.8 | Fold = 6 | MAE = 0.7851309947834795 | Time Elapsed = 0.025451396951185718
Eps = 9.8 | Fold = 7 | MAE = 0.7851401683229235 | Time Elapsed = 0.021298928860542694
Eps = 9.8 | Fold = 8 | MAE = 0.7644699116294968 | Time Elapsed = 0.02245141609723386
Eps = 9.8 | Fold = 9 | MAE = 0.7758451780456136 | Time Elapsed = 0.021525776188578497
Eps = 9.8 | Fold = 10 | MAE = 0.770308946198448 | Time Elapsed = 0.020385654854343887
Eps = 9.8 | Average MAE = 0.7703425953009899 | Average Time = 0.023905179565346414


In [None]:
run_test_dbscan(10)

Eps = 10 | Fold = 6 | MAE = 0.7885988628091414 | Time Elapsed = 0.02456825777286186

DBSCAN (eps=10) | Fold = 7
Loading Data...
Data Loaded...
Clustering Starts
Clustering Finished
Predicting and Evaluating Starts


In [None]:
run_test_dbscan(10.5)

In [None]:
run_test_dbscan(11)

In [None]:
run_test_dbscan(11.5)

In [None]:
run_test_dbscan(12)

In [None]:
run_test_dbscan(12.5)

In [None]:
run_test_dbscan(13)

In [None]:
run_test_dbscan(13.5)

In [None]:
run_test_dbscan(14)

In [None]:
run_test_dbscan(14.5)

In [None]:
run_test_dbscan(15)

In [None]:
run_test_dbscan(15.5)

In [None]:
run_test_dbscan(16)

In [None]:
run_test_dbscan(16.5)

In [None]:
run_test_dbscan(16.8)