# Main Function

In [1]:
from data_prep import load_rating_data
from cluster import kmeans_clustering
from evaluate import time_evaluate
from IPython.display import clear_output

In [2]:
def run_test_kmeans(n_clusters, rating_path = 'dataset_5_min_75/', item_path = 'comic_genre.csv'):

    total_mae = []
    total_time = []

    # 10-Fold Cross-Validation
    for i in range(1, 11):
        # load dataset
        print('K-Means (k={}) | Fold = {}'.format(n_clusters, i))
        print('Loading Data...')
        train_data, cluster_data = load_rating_data('train', number=i, rating_path=rating_path, item_path=item_path)
        test_data = load_rating_data('test', number=i, rating_path=rating_path, item_path=item_path)
        print('Data Loaded...')

        # perform clustering
        print('Clustering Starts')
        ratings_cluster, cluster_centroids = kmeans_clustering(n_clusters, cluster_data, train_data)
        print('Clustering Finished')

        # predict and evaluate
        print('Predicting and Evaluating Starts')
        mae, time = time_evaluate(test_data, ratings_cluster, cluster_centroids)
        total_mae.append(mae)
        total_time.append(time)
        print('Predicting and Evaluating Finished')

        clear_output(wait=True)
        print('K = {} | Fold = {} | MAE = {} | Time Elapsed = {}\n'.format(n_clusters, i, mae, time))


    clear_output(wait=True)
    for i in range(len(total_mae)):
        print('K = {} | Fold = {} | MAE = {} | Time Elapsed = {}'.format(n_clusters, i+1, total_mae[i], total_time[i]))
    avg_mae = sum(total_mae)/10
    avg_time = sum(total_time)/10
    print('K = {} | Average MAE = {} | Average Time = {}'.format(n_clusters, avg_mae, avg_time))

    # return avg_mae, total_mae

# No Clustering

In [None]:
run_test_kmeans(1, rating_path='dataset_5_min_75/')

K = 1 | Fold = 7 | MAE = 0.8171025837555983 | Time Elapsed = 0.05788431773975283

K-Means (k=1) | Fold = 8
Loading Data...
Data Loaded...
Clustering Starts
Clustering Finished
Predicting and Evaluating Starts


# K-Means K=2 - K=20 DATASET_MIN_75 (RATING 1-5)

In [None]:
run_test_kmeans(2)

In [None]:
run_test_kmeans(3)

In [None]:
run_test_kmeans(4)

In [None]:
run_test_kmeans(5)

In [None]:
run_test_kmeans(6)

In [None]:
run_test_kmeans(7)

In [None]:
run_test_kmeans(8)

In [None]:
run_test_kmeans(9)

In [None]:
run_test_kmeans(10)

In [None]:
run_test_kmeans(11)

In [None]:
run_test_kmeans(12)

In [None]:
run_test_kmeans(13)

In [None]:
run_test_kmeans(14)

In [None]:
run_test_kmeans(15)

In [None]:
run_test_kmeans(16)


In [None]:
run_test_kmeans(17)


In [None]:
run_test_kmeans(18)


In [None]:
run_test_kmeans(19)


In [None]:
run_test_kmeans(20)
