In [1]:
from data_prep import load_rating_data
from cluster import dbscan_clustering
from evaluate import time_evaluate
from IPython.display import clear_output

In [2]:
def run_test_dbscan(epsilon, rating_path = 'dataset_5_min_100/', item_path = 'comic_genre.csv'):

    total_mae = []
    total_time = []

    # 10-Fold Cross-Validation
    for i in range(1, 11):
        # load dataset
        print('DBSCAN (eps={}) | Fold = {}'.format(epsilon, i))
        print('Loading Data...')
        train_data, cluster_data = load_rating_data('train', number=i, rating_path=rating_path, item_path=item_path)
        test_data = load_rating_data('test', number=i, rating_path=rating_path, item_path=item_path)
        print('Data Loaded...')

        # perform clustering
        print('Clustering Starts')
        ratings_cluster = dbscan_clustering(epsilon, cluster_data, train_data)
        print('Clustering Finished')

        # predict and evaluate
        print('Predicting and Evaluating Starts')
        mae, time = time_evaluate(test_data, ratings_cluster, centroids=None)
        total_mae.append(mae)
        total_time.append(time)
        print('Predicting and Evaluating Finished')

        clear_output(wait=True)
        print('Eps = {} | Fold = {} | MAE = {} | Time Elapsed = {}\n'.format(epsilon, i, mae, time))


    clear_output(wait=True)
    for i in range(len(total_mae)):
        print('Eps = {} | Fold = {} | MAE = {} | Time Elapsed = {}'.format(epsilon, i+1, total_mae[i], total_time[i]))
    avg_mae = sum(total_mae)/10
    avg_time = sum(total_time)/10
    print('Eps = {} | Average MAE = {} | Average Time = {}'.format(epsilon, avg_mae, avg_time))

    # return avg_mae, total_mae

# DBSCAN Eps = 9.6 - Eps = 16.8 DATASET_MIN_100 (RATING 1-5)

In [3]:
run_test_dbscan(9.6)

Eps = 9.6 | Fold = 1 | MAE = 0.27215420143830615 | Time Elapsed = 0.01998934114545433
Eps = 9.6 | Fold = 2 | MAE = 0.24864892923060922 | Time Elapsed = 0.019495485223618863
Eps = 9.6 | Fold = 3 | MAE = 0.2519374090616489 | Time Elapsed = 0.01960745456904642
Eps = 9.6 | Fold = 4 | MAE = 0.2785969345571091 | Time Elapsed = 0.01947805438713425
Eps = 9.6 | Fold = 5 | MAE = 0.22478221680850008 | Time Elapsed = 0.019563353311500692
Eps = 9.6 | Fold = 6 | MAE = 0.27593298798077087 | Time Elapsed = 0.019046571542952553
Eps = 9.6 | Fold = 7 | MAE = 0.27661018830918255 | Time Elapsed = 0.019502099463006593
Eps = 9.6 | Fold = 8 | MAE = 0.250072731735818 | Time Elapsed = 0.01955734809126993
Eps = 9.6 | Fold = 9 | MAE = 0.23863486562190284 | Time Elapsed = 0.019669832578146786
Eps = 9.6 | Fold = 10 | MAE = 0.23875570307810667 | Time Elapsed = 0.01942170317615894
Eps = 9.6 | Average MAE = 0.25561261678219543 | Average Time = 0.019533124348828933


In [4]:
run_test_dbscan(10)

Eps = 10 | Fold = 1 | MAE = 0.2874283860722335 | Time Elapsed = 0.018222282031466187
Eps = 10 | Fold = 2 | MAE = 0.2652624241043504 | Time Elapsed = 0.01829615903003768
Eps = 10 | Fold = 3 | MAE = 0.26807212620163534 | Time Elapsed = 0.018279978530256265
Eps = 10 | Fold = 4 | MAE = 0.29051154890340253 | Time Elapsed = 0.018087961126939788
Eps = 10 | Fold = 5 | MAE = 0.23646080379946333 | Time Elapsed = 0.018307926570997543
Eps = 10 | Fold = 6 | MAE = 0.2824173658371465 | Time Elapsed = 0.01829258469745184
Eps = 10 | Fold = 7 | MAE = 0.2914069426776986 | Time Elapsed = 0.018421682262819823
Eps = 10 | Fold = 8 | MAE = 0.27596090019960523 | Time Elapsed = 0.018354844781877863
Eps = 10 | Fold = 9 | MAE = 0.26680583230670574 | Time Elapsed = 0.018255845011408072
Eps = 10 | Fold = 10 | MAE = 0.2728813639762765 | Time Elapsed = 0.018252585197816255
Eps = 10 | Average MAE = 0.2737207694078518 | Average Time = 0.01827718492410713


In [None]:
run_test_dbscan(10.5)

Eps = 10.5 | Fold = 5 | MAE = 0.2602404621743194 | Time Elapsed = 0.017161510150931123

DBSCAN (eps=10.5) | Fold = 6
Loading Data...
Data Loaded...
Clustering Starts
Clustering Finished
Predicting and Evaluating Starts


In [None]:
run_test_dbscan(11)

In [None]:
run_test_dbscan(11.5)

In [None]:
run_test_dbscan(12)

In [None]:
run_test_dbscan(12.5)

In [None]:
run_test_dbscan(13)

In [None]:
run_test_dbscan(13.5)

In [None]:
run_test_dbscan(14)

In [None]:
run_test_dbscan(14.5)

In [None]:
run_test_dbscan(15)

In [None]:
run_test_dbscan(15.5)

In [None]:
run_test_dbscan(16)

In [None]:
run_test_dbscan(16.5)

In [None]:
run_test_dbscan(16.8)

# DBSCAN Eps = 9.6 - Eps = 16.8 DATASET_MIN_75 (RATING 1-5)

In [None]:
run_test_dbscan(7.8, rating_path='dataset_5_min_100/')

In [None]:
run_test_dbscan(8, rating_path='dataset_5_min_100/')

In [None]:
run_test_dbscan(8.1, rating_path='dataset_5_min_100/')

In [None]:
run_test_dbscan(8.2, rating_path='dataset_5_min_100/')

In [None]:
run_test_dbscan(8.3, rating_path='dataset_5_min_100/')

In [None]:
run_test_dbscan(9, rating_path='dataset_5_min_100/')

In [None]:
run_test_dbscan(10, rating_path='dataset_5_min_100/')

In [None]:
run_test_dbscan(11, rating_path='dataset_5_min_100/')

In [None]:
run_test_dbscan(12, rating_path='dataset_5_min_100/')

In [None]:
run_test_dbscan(13, rating_path='dataset_5_min_100/')

In [None]:
run_test_dbscan(14, rating_path='dataset_5_min_100/')

In [None]:
run_test_dbscan(15, rating_path='dataset_5_min_100/')

In [None]:
run_test_dbscan(16, rating_path='dataset_5_min_100/')

In [None]:
run_test_dbscan(17, rating_path='dataset_5_min_100/')

In [None]:
run_test_dbscan(18, rating_path='dataset_5_min_100/')

In [None]:
run_test_dbscan(19, rating_path='dataset_5_min_100/')

In [None]:
run_test_dbscan(20, rating_path='dataset_5_min_100/')