In [1]:
from data_prep import load_rating_data
from cluster import dbscan_clustering
from evaluate import time_evaluate
from IPython.display import clear_output

In [2]:
def run_test_dbscan(epsilon, rating_path = 'dataset_5_min_100/', item_path = 'comic_genre.csv'):

    total_mae = []
    total_time = []

    # 10-Fold Cross-Validation
    for i in range(1, 11):
        # load dataset
        print('DBSCAN (eps={}) | Fold = {}'.format(epsilon, i))
        print('Loading Data...')
        train_data, cluster_data = load_rating_data('train', number=i, rating_path=rating_path, item_path=item_path)
        test_data = load_rating_data('test', number=i, rating_path=rating_path, item_path=item_path)
        print('Data Loaded...')

        # perform clustering
        print('Clustering Starts')
        ratings_cluster = dbscan_clustering(epsilon, cluster_data, train_data)
        print('Clustering Finished')

        # predict and evaluate
        print('Predicting and Evaluating Starts')
        mae, time = time_evaluate(test_data, ratings_cluster, centroids=None)
        total_mae.append(mae)
        total_time.append(time)
        print('Predicting and Evaluating Finished')

        clear_output(wait=True)
        print('Eps = {} | Fold = {} | MAE = {} | Time Elapsed = {}\n'.format(epsilon, i, mae, time))


    clear_output(wait=True)
    for i in range(len(total_mae)):
        print('Eps = {} | Fold = {} | MAE = {} | Time Elapsed = {}'.format(epsilon, i+1, total_mae[i], total_time[i]))
    avg_mae = sum(total_mae)/10
    avg_time = sum(total_time)/10
    print('Eps = {} | Average MAE = {} | Average Time = {}'.format(epsilon, avg_mae, avg_time))

    # return avg_mae, total_mae

# DBSCAN Eps = 9.6 - Eps = 16.8 DATASET_MIN_100 (RATING 1-5)

In [None]:
run_test_dbscan(9.6)

Eps = 9.6 | Fold = 3 | MAE = 0.2519374090616489 | Time Elapsed = 0.01960745456904642

DBSCAN (eps=9.6) | Fold = 4
Loading Data...
Data Loaded...
Clustering Starts
Clustering Finished
Predicting and Evaluating Starts


In [None]:
run_test_dbscan(10)

In [None]:
run_test_dbscan(10.5)

In [None]:
run_test_dbscan(11)

In [None]:
run_test_dbscan(11.5)

In [None]:
run_test_dbscan(12)

In [None]:
run_test_dbscan(12.5)

In [None]:
run_test_dbscan(13)

In [None]:
run_test_dbscan(13.5)

In [None]:
run_test_dbscan(14)

In [None]:
run_test_dbscan(14.5)

In [None]:
run_test_dbscan(15)

In [None]:
run_test_dbscan(15.5)

In [None]:
run_test_dbscan(16)

In [None]:
run_test_dbscan(16.5)

In [None]:
run_test_dbscan(16.8)

# DBSCAN Eps = 9.6 - Eps = 16.8 DATASET_MIN_75 (RATING 1-5)

In [None]:
run_test_dbscan(7.8, rating_path='dataset_5_min_100/')

In [None]:
run_test_dbscan(8, rating_path='dataset_5_min_100/')

In [None]:
run_test_dbscan(8.1, rating_path='dataset_5_min_100/')

In [None]:
run_test_dbscan(8.2, rating_path='dataset_5_min_100/')

In [None]:
run_test_dbscan(8.3, rating_path='dataset_5_min_100/')

In [None]:
run_test_dbscan(9, rating_path='dataset_5_min_100/')

In [None]:
run_test_dbscan(10, rating_path='dataset_5_min_100/')

In [None]:
run_test_dbscan(11, rating_path='dataset_5_min_100/')

In [None]:
run_test_dbscan(12, rating_path='dataset_5_min_100/')

In [None]:
run_test_dbscan(13, rating_path='dataset_5_min_100/')

In [None]:
run_test_dbscan(14, rating_path='dataset_5_min_100/')

In [None]:
run_test_dbscan(15, rating_path='dataset_5_min_100/')

In [None]:
run_test_dbscan(16, rating_path='dataset_5_min_100/')

In [None]:
run_test_dbscan(17, rating_path='dataset_5_min_100/')

In [None]:
run_test_dbscan(18, rating_path='dataset_5_min_100/')

In [None]:
run_test_dbscan(19, rating_path='dataset_5_min_100/')

In [None]:
run_test_dbscan(20, rating_path='dataset_5_min_100/')