In [1]:
from data_prep import load_rating_data
from cluster import dbscan_clustering
from evaluate import time_evaluate
from IPython.display import clear_output

In [2]:
def run_test_dbscan(epsilon, rating_path = 'dataset_5_min_75/', item_path = 'comic_genre.csv'):

    total_mae = []
    total_time = []

    # 10-Fold Cross-Validation
    for i in range(1, 11):
        # load dataset
        print('DBSCAN (eps={}) | Fold = {}'.format(epsilon, i))
        print('Loading Data...')
        train_data, cluster_data = load_rating_data('train', number=i, rating_path=rating_path, item_path=item_path)
        test_data = load_rating_data('test', number=i, rating_path=rating_path, item_path=item_path)
        print('Data Loaded...')

        # perform clustering
        print('Clustering Starts')
        ratings_cluster = dbscan_clustering(epsilon, cluster_data, train_data)
        print('Clustering Finished')

        # predict and evaluate
        print('Predicting and Evaluating Starts')
        mae, time = time_evaluate(test_data, ratings_cluster, centroids=None)
        total_mae.append(mae)
        total_time.append(time)
        print('Predicting and Evaluating Finished')

        clear_output(wait=True)
        print('Eps = {} | Fold = {} | MAE = {} | Time Elapsed = {}\n'.format(epsilon, i, mae, time))


    clear_output(wait=True)
    for i in range(len(total_mae)):
        print('Eps = {} | Fold = {} | MAE = {} | Time Elapsed = {}'.format(epsilon, i+1, total_mae[i], total_time[i]))
    avg_mae = sum(total_mae)/10
    avg_time = sum(total_time)/10
    print('Eps = {} | Average MAE = {} | Average Time = {}'.format(epsilon, avg_mae, avg_time))

    # return avg_mae, total_mae

# DBSCAN Eps = 7.8 - Eps = 20 DATASET_MIN_75 (RATING 1-5)

In [None]:
run_test_dbscan(7.8, rating_path='dataset_5_min_75/')

Eps = 7.8 | Fold = 1 | MAE = 0.8139559621853646 | Time Elapsed = 0.04247715150876218
Eps = 7.8 | Fold = 2 | MAE = 0.8102310844270089 | Time Elapsed = 0.04372012779779956
Eps = 7.8 | Fold = 3 | MAE = 0.8194011371869288 | Time Elapsed = 0.050156913575768604
Eps = 7.8 | Fold = 4 | MAE = 0.8139997121860705 | Time Elapsed = 0.05306569470997241
Eps = 7.8 | Fold = 5 | MAE = 0.8198572740844148 | Time Elapsed = 0.06772324409645447
Eps = 7.8 | Fold = 6 | MAE = 0.8311560162678382 | Time Elapsed = 0.06442868511509765
Eps = 7.8 | Fold = 7 | MAE = 0.816311507372774 | Time Elapsed = 0.06406300708795405
Eps = 7.8 | Fold = 8 | MAE = 0.8081248909508287 | Time Elapsed = 0.04590224254269114
Eps = 7.8 | Fold = 9 | MAE = 0.8207978371618732 | Time Elapsed = 0.04845850849677321
Eps = 7.8 | Fold = 10 | MAE = 0.8159860240709742 | Time Elapsed = 0.06542329713183705
Eps = 7.8 | Average MAE = 0.8169821445894077 | Average Time = 0.054541887206311035


In [None]:
run_test_dbscan(8, rating_path='dataset_5_min_75/')

Eps = 8 | Fold = 1 | MAE = 0.8149365440875256 | Time Elapsed = 0.05126021568066253
Eps = 8 | Fold = 2 | MAE = 0.8097555500388964 | Time Elapsed = 0.0687499539017018
Eps = 8 | Fold = 3 | MAE = 0.819881833091519 | Time Elapsed = 0.0506759526413144
Eps = 8 | Fold = 4 | MAE = 0.8125592704483169 | Time Elapsed = 0.052203425949007065
Eps = 8 | Fold = 5 | MAE = 0.819907842790429 | Time Elapsed = 0.0489727466164104
Eps = 8 | Fold = 6 | MAE = 0.8309989615010642 | Time Elapsed = 0.06606665350361927
Eps = 8 | Fold = 7 | MAE = 0.8145324421256337 | Time Elapsed = 0.05387852755646267
Eps = 8 | Fold = 8 | MAE = 0.8107789866378965 | Time Elapsed = 0.044778930988870806
Eps = 8 | Fold = 9 | MAE = 0.8222458452855063 | Time Elapsed = 0.044892996235414796
Eps = 8 | Fold = 10 | MAE = 0.8160441375739774 | Time Elapsed = 0.04457636587850421
Eps = 8 | Average MAE = 0.8171641413580766 | Average Time = 0.05260557689519679


In [None]:
run_test_dbscan(8.1, rating_path='dataset_5_min_75/')

Eps = 8.1 | Fold = 1 | MAE = 0.8171518711191332 | Time Elapsed = 0.05116639118059423
Eps = 8.1 | Fold = 2 | MAE = 0.8147517591388018 | Time Elapsed = 0.044400724480624064
Eps = 8.1 | Fold = 3 | MAE = 0.8192689101291097 | Time Elapsed = 0.04444502736663526
Eps = 8.1 | Fold = 4 | MAE = 0.8200240659607302 | Time Elapsed = 0.06044879491842307
Eps = 8.1 | Fold = 5 | MAE = 0.8210392262948328 | Time Elapsed = 0.05921779583231573
Eps = 8.1 | Fold = 6 | MAE = 0.8362987122935029 | Time Elapsed = 0.05640279176222156
Eps = 8.1 | Fold = 7 | MAE = 0.823504857095632 | Time Elapsed = 0.05765857543753414
Eps = 8.1 | Fold = 8 | MAE = 0.8108902332733464 | Time Elapsed = 0.0451410814734365
Eps = 8.1 | Fold = 9 | MAE = 0.8244451107534808 | Time Elapsed = 0.05653803802258635
Eps = 8.1 | Fold = 10 | MAE = 0.8211225961021552 | Time Elapsed = 0.044999908556404566
Eps = 8.1 | Average MAE = 0.8208497342160725 | Average Time = 0.052041912903077546


In [None]:
run_test_dbscan(8.2, rating_path='dataset_5_min_75/')

Eps = 8.2 | Fold = 1 | MAE = 0.8158006483837521 | Time Elapsed = 0.044551934699709334
Eps = 8.2 | Fold = 2 | MAE = 0.8168069251229638 | Time Elapsed = 0.0439995799983384
Eps = 8.2 | Fold = 3 | MAE = 0.8264074187541917 | Time Elapsed = 0.04657640557470507
Eps = 8.2 | Fold = 4 | MAE = 0.8217431844328854 | Time Elapsed = 0.04752800743541791
Eps = 8.2 | Fold = 5 | MAE = 0.8265150793712966 | Time Elapsed = 0.06216545610318222
Eps = 8.2 | Fold = 6 | MAE = 0.83691520269292 | Time Elapsed = 0.04332880122836214
Eps = 8.2 | Fold = 7 | MAE = 0.8252408050556576 | Time Elapsed = 0.04444273990583588
Eps = 8.2 | Fold = 8 | MAE = 0.8109389731027744 | Time Elapsed = 0.042958219758073186
Eps = 8.2 | Fold = 9 | MAE = 0.8236438130274201 | Time Elapsed = 0.04347680850381061
Eps = 8.2 | Fold = 10 | MAE = 0.8215768926424757 | Time Elapsed = 0.05689059011465158
Eps = 8.2 | Average MAE = 0.8225588942586338 | Average Time = 0.047591854332208625


In [None]:
run_test_dbscan(8.3, rating_path='dataset_5_min_75/')

Eps = 8.3 | Fold = 5 | MAE = 0.8266630313618882 | Time Elapsed = 0.048685720909400654

DBSCAN (eps=8.3) | Fold = 6
Loading Data...
Data Loaded...
Clustering Starts
Clustering Finished
Predicting and Evaluating Starts


In [None]:
run_test_dbscan(9, rating_path='dataset_5_min_75/')

In [None]:
run_test_dbscan(10, rating_path='dataset_5_min_75/')

In [None]:
run_test_dbscan(11, rating_path='dataset_5_min_75/')

In [None]:
run_test_dbscan(12, rating_path='dataset_5_min_75/')

In [None]:
run_test_dbscan(13, rating_path='dataset_5_min_75/')

In [None]:
run_test_dbscan(14, rating_path='dataset_5_min_75/')

In [None]:
run_test_dbscan(15, rating_path='dataset_5_min_75/')

In [None]:
run_test_dbscan(16, rating_path='dataset_5_min_75/')

In [None]:
run_test_dbscan(17, rating_path='dataset_5_min_75/')

In [None]:
run_test_dbscan(18, rating_path='dataset_5_min_75/')

In [None]:
run_test_dbscan(19, rating_path='dataset_5_min_75/')

In [None]:
run_test_dbscan(20, rating_path='dataset_5_min_75/')