In [1]:
# necessary modules
import sqlite3
import utils
import recommender_classifier
import evaluation
import time
import numpy as np
from sklearn import svm
from sklearn.metrics import mean_absolute_error
from multiprocessing import Pool

In [2]:
# load random users and feature vectors
conn = sqlite3.connect('database.db')
Users = utils.selectRandomUsers(conn)
LOW_LEVEL_FEATURES, DEEP_FEATURES, HYBRID_FEATURES = utils.extract_features()
USER_TFIDF_FEATURES, MOVIE_TFIDF_FEATURES = utils.extract_tfidf_features()

print len(Users)

user_profiles = recommender_classifier.build_user_profiles(conn, Users)

100


In [3]:
def run(user_profiles, N, feature_vector, feature_vector2=None):

    conn = sqlite3.connect('database.db')
    
    SumRecall, SumPrecision = 0, 0
    
    for user, profile in user_profiles.iteritems():                
        
        if feature_vector2 is not None:
            if np.sum(feature_vector[user]) == 0:
                print "Blank user profile", user
                continue
        
        hits = 0
        
        predictions = recommender_classifier.get_predict_collaborative_filtering(conn, profile, feature_vector, feature_vector2)
        # print "Predictions", sorted(predictions, key=lambda tup: tup[2], reverse=True)
        
        for elite_movie in profile['datasets']['elite_test']:
                        
            if feature_vector is list and elite_movie[0] not in feature_vector:
                continue
                
            # Predict to the user movie and to random movies that the user did not rated
            # print predictions            
            elite_prediction = recommender_classifier.get_prediction_elite(conn, elite_movie, profile, feature_vector, feature_vector2)
            all_predictions = predictions[:]
            all_predictions.append(elite_prediction)
            
            # print "Elite Movie", elite_movie, elite_prediction
            
            hits += recommender_classifier.count_hit(all_predictions, elite_movie, N)
        try:
            recall = hits / float(len(profile['datasets']['elite_test']))
            SumRecall += recall
            SumPrecision += (recall / float(N))
        except ZeroDivisionError:
            continue
        # print "Size is", len(predictions)
        # print "Predictions", sorted(predictions, key=lambda tup: tup[2], reverse=True)

    size = len(user_profiles)
    avgRecall = utils.evaluateAverage(SumRecall, size)
    avgPrecision = utils.evaluateAverage(SumPrecision, size)

    return avgPrecision, avgRecall

In [6]:
# def experiment(N, user_profiles_low_level, LOW_LEVEL_FEATURES, user_profiles_deep, DEEP_FEATURES):
def experiment(N):
    
    global user_profiles, LOW_LEVEL_FEATURES, DEEP_FEATURES, HYBRID_FEATURES, USER_TFIDF_FEATURES, MOVIE_TFIDF_FEATURES
            
    result = {}
    start = time.time()
    
    # Tag-based
    # p_t, r_t = run(user_profiles, N, USER_TFIDF_FEATURES, MOVIE_TFIDF_FEATURES)
    # print "Tag-based Recall", r_t, "Tag-based Precision", p_t, "For iteration with", N
    
    # LOW LEVEL FEATURES check precision, recall and mae
    p_l, r_l = run(user_profiles, N, LOW_LEVEL_FEATURES)
    print "Low-Level Recall", r_l, "Low-Level Precision", p_l, "For iteration with", N
    
    # end = time.time()
    # print "Execution time", (end - start)
#     result[N] = {'ll': {'recall': r_l, 'precision': p_l}}

    # start = time.time()
    # DEEP FEATURES check precision, recall and mae
    p_d, r_d = run(user_profiles, N, DEEP_FEATURES)
    print "Deep Recall", r_d, "Deep Precision", p_d, "For iteration with", N
    end = time.time()
    print "Execution time", (end - start)
#     result[N] = {'deep': {'recall': r_d, 'precision': p_d}}
    
    # HYBRID
#     p_d, r_d = run(user_profiles_deep, N, HYBRID_FEATURES)
#     p_d, r_d = run(user_profiles, N, HYBRID_FEATURES)
#     print "Hybrid Recall", r_d, "Hybrid Precision", p_d, "For iteration with", N, "\n\n"
#     result[N] = {'hybrid': {'recall': r_d, 'precision': p_d}}

    p, r, mae = recommender_classifier.recommend_random(user_profiles, N)
    print "Random Recall", r, "Random Precision",  p, "Random MAE", mae, "For iteration with", N
    
#     result = {'ll': {'recall': r_l, 'precision': p_l}, 'deep': {'recall': r_d, 'precision': p_d}, 'random': {'recall': r, 'precision': p}}
   

In [7]:
# q = Queue()
# N = 10
# start = time.time()
# experiment(N, user_profiles, LOW_LEVEL_FEATURES, DEEP_FEATURES, HYBRID_FEATURES)
# procs = [Process(target=experiment, args=(N, user_profiles, LOW_LEVEL_FEATURES, DEEP_FEATURES, HYBRID_FEATURES,)) for i in range(1)]
# for p in procs: p.start()
# for p in procs: p.join()
# end = time.time()
# print(end - start)

#     p = Process(target=experiment, args=(q, N, user_profiles_low_level, LOW_LEVEL_FEATURES, user_profiles_deep, DEEP_FEATURES, Users,))
#     p.start()
#     N += 1
# print q.get()
# p.join()
# experiment(1)
p = Pool(5)
print(p.map(experiment, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]))

Low-Level Recall 0.245776149776 Low-Level Precision 0.122888074888 For iteration with 2
Low-Level Recall 0.164506308506 Low-Level Precision 0.164506308506 For iteration with 1
Low-Level Recall 0.283109483109 Low-Level Precision 0.0943698277032 For iteration with 3
Low-Level Recall 0.367384615385 Low-Level Precision 0.0734769230769 For iteration with 5
Low-Level Recall 0.318368742369 Low-Level Precision 0.0795921855922 For iteration with 4
Deep Recall 0.210348392348 Deep Precision 0.210348392348 For iteration with 1
Execution time 523.05669117
Random Recall 0.0133333333333 Random Precision 0.0133333333333 Random MAE 0.0 For iteration with 1
Deep Recall 0.34238990639 Deep Precision 0.171194953195 For iteration with 2
Execution time 526.827446938
Deep Recall 0.504061050061 Deep Precision 0.100812210012 For iteration with 5
Execution time 527.133561134
Random Recall 0.00666666666667 Random Precision 0.00333333333333 Random MAE 0.0 For iteration with 2
Random Recall 0.0314814814815 Random P