In [1]:
# necessary modules
import sqlite3
import utils
import recommender_classifier
import evaluation
from sklearn import svm
from multiprocessing import Process, Queue

In [2]:
# load random users and feature vectors
conn = sqlite3.connect('database.db')
Users = utils.selectRandomUsers(conn)

LOW_LEVEL_FEATURES, DEEP_FEATURES, HYBRID_FEATURES = utils.extract_features()

print len(Users)

133


In [3]:
def run(user_profiles, N, featureVector):

    conn = sqlite3.connect('database.db')
    
    SumRecall, SumPrecision = 0, 0
    
    for user, profile in user_profiles.iteritems():
        hits = 0
        
        for eliteMovie in profile['datasets']['elite_test']:

            if eliteMovie[0] not in featureVector:
                continue
                
            # Predict to the user movie and to random movies that the user did not rated
            predictions = recommender_classifier.get_predict(conn, eliteMovie, user, profile['model'], featureVector)
            # print predictions
            hits += recommender_classifier.count_hit(predictions, eliteMovie, N)
        try:
            recall = hits / float(len(profile['datasets']['elite_test']))
            SumRecall += recall
            SumPrecision += (recall / float(N))
        except ZeroDivisionError:
            continue

    size = len(user_profiles)
    avgRecall = utils.evaluateAverage(SumRecall, size)
    avgPrecision = utils.evaluateAverage(SumPrecision, size)

    return avgPrecision, avgRecall

In [4]:
def train_user_profile_svm_regressor(conn, user_datasets, feature_vector):
    
    user_profiles = {}
    sum_mae = 0

    for user, datasets in user_datasets.iteritems():     
        
        userInstances, userValues = utils.getUserInstances(datasets['train'], feature_vector)
        
        clf = svm.SVR(kernel='rbf')
        clf.fit(userInstances, userValues)
        
        user_profiles[user] = {'model': clf.predict, 'datasets': datasets}
        # userInstances, userValues = utils.getUserInstances(full_test_set, LOW_LEVEL_FEATURES)

        # check the model quality using user's full test set (ratings not used for the model)
        predictions = [(movie[2], clf.predict([feature_vector[movie[0]]])) for movie in full_test_set]
        # print predictions
        sum_mae += evaluation.evaluateMAE(conn, user, predictions, 0, 1)

    mae = utils.evaluateAverage(sum_mae, len(user_datasets))
    return user_profiles, mae

In [5]:
# split users ratings into training, test and elite test (only high ratings) datasets
user_datasets = {}

for user in Users:
    
    userMoviesTraining, userMoviesTest, full_test_set = utils.getUserTrainingTestMovies(conn, user[0])
    
    if len(userMoviesTest) == 0:
        continue
        
    user_datasets[user[0]] = {'train': userMoviesTraining, 'elite_test': userMoviesTest, 'test': full_test_set}

# print "Datasets", user_datasets, "\n\n"

In [6]:
# LOW LEVEL FEATURES preprocess users profiles - Support Vector Machine Regressor
user_profiles_low_level, mae = train_user_profile_svm_regressor(conn, user_datasets, LOW_LEVEL_FEATURES)
print "Low-Level Features MAE", mae

# DEEP FEATURES preprocess users profiles - SVM
user_profiles_deep, mae = train_user_profile_svm_regressor(conn, user_datasets, DEEP_FEATURES)
print "Deep Features MAE", mae

Low-Level Features MAE 0.709768660601
Deep Features MAE 0.505449651276


In [7]:
def experiment(v, lock, N, user_profiles_low_level, LOW_LEVEL_FEATURES, user_profiles_deep, DEEP_FEATURES, Users):
        
    i += 1    
    
    result = {}

    # LOW LEVEL FEATURES check precision, recall and mae
    p_l, r_l = run(user_profiles_low_level, N, LOW_LEVEL_FEATURES)
    print "Low-Level Recall", r_l, "Low-Level Precision", p_l, "For iteration with", N
    result[N] = {'ll': {'recall': r_l, 'precision': p_l}}

    # DEEP FEATURES check precision, recall and mae
    p_d, r_d = run(user_profiles_deep, N, DEEP_FEATURES)
    print "Deep Recall", r_d, "Deep Precision", p_d, "For iteration with", N
    result[N] = {'deep': {'recall': r_d, 'precision': p_d}}

    p, r, mae = recommender_classifier.recommend_random(user_datasets, N)
    print "Random Recall", r, "Random Precision",  p, "Random MAE", mae, "For iteration with", N
    result = {'ll': {'recall': r_l, 'precision': p_l}, 'deep': {'recall': r_d, 'precision': p_d}, 'random': {'recall': r, 'precision': p}}
    
    time.sleep(0.01)
    with lock:
        val.value = result
    
    # q.put({N: result})

In [None]:
q = Queue()
N = 1
procs = [Process(target=func, args=(v, lock, i, user_profiles_low_level, LOW_LEVEL_FEATURES, user_profiles_deep, DEEP_FEATURES, Users,)) for i in range(5)]
for p in procs: p.start()
for p in procs: p.join()

print v.value
#     p = Process(target=experiment, args=(q, N, user_profiles_low_level, LOW_LEVEL_FEATURES, user_profiles_deep, DEEP_FEATURES, Users,))
#     p.start()
#     N += 1
# print q.get()
# p.join()

# p = Pool(5)
# print(p.map(run, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]))

Low-Level Recall 0.0147071531146 Low-Level Precision 0.00245119218577 For iteration with 6
Low-Level Recall 0.0112532039062 Low-Level Precision 0.00281330097654 For iteration with 4
Low-Level Recall 0.00520050450961 Low-Level Precision 0.00260025225481 For iteration with 2
Low-Level Recall 0.00440226903084 Low-Level Precision 0.00440226903084 For iteration with 1
Low-Level Recall 0.00816601853341 Low-Level Precision 0.0027220061778 For iteration with 3
Low-Level Recall 0.0124544030102 Low-Level Precision 0.00249088060203 For iteration with 5
Deep Recall 0.0105748165465 Deep Precision 0.00264370413663 For iteration with 4


Process Process-4:
Traceback (most recent call last):
  File "/usr/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/usr/lib/python2.7/multiprocessing/process.py", line 114, in run
    self._target(*self._args, **self._kwargs)
  File "<ipython-input-7-ddf49b1c9ca4>", line 15, in experiment
    p, r, mae = recommender_classifier.recommend_random(user_datasets, N)
  File "recommender_classifier.py", line 104, in recommend_random
    randomMovies = utils.getRandomMovieSet(conn, user[0])
TypeError: 'int' object has no attribute '__getitem__'


Deep Recall 0.00955713766769 Deep Precision 0.0031857125559 For iteration with 3


Process Process-3:
Traceback (most recent call last):
  File "/usr/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/usr/lib/python2.7/multiprocessing/process.py", line 114, in run
    self._target(*self._args, **self._kwargs)
  File "<ipython-input-7-ddf49b1c9ca4>", line 15, in experiment
    p, r, mae = recommender_classifier.recommend_random(user_datasets, N)
  File "recommender_classifier.py", line 104, in recommend_random
    randomMovies = utils.getRandomMovieSet(conn, user[0])
TypeError: 'int' object has no attribute '__getitem__'


Deep Recall 0.017102476647 Deep Precision 0.00285041277449 For iteration with 6


Process Process-6:
Traceback (most recent call last):
  File "/usr/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/usr/lib/python2.7/multiprocessing/process.py", line 114, in run
    self._target(*self._args, **self._kwargs)
  File "<ipython-input-7-ddf49b1c9ca4>", line 15, in experiment
    p, r, mae = recommender_classifier.recommend_random(user_datasets, N)
  File "recommender_classifier.py", line 104, in recommend_random
    randomMovies = utils.getRandomMovieSet(conn, user[0])
TypeError: 'int' object has no attribute '__getitem__'


Deep Recall 0.00557784444544 Deep Precision 0.00557784444544 For iteration with 1


Process Process-1:
Traceback (most recent call last):
  File "/usr/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/usr/lib/python2.7/multiprocessing/process.py", line 114, in run
    self._target(*self._args, **self._kwargs)
  File "<ipython-input-7-ddf49b1c9ca4>", line 15, in experiment
    p, r, mae = recommender_classifier.recommend_random(user_datasets, N)
  File "recommender_classifier.py", line 104, in recommend_random
    randomMovies = utils.getRandomMovieSet(conn, user[0])
TypeError: 'int' object has no attribute '__getitem__'


Deep Recall 0.00764076843442 Deep Precision 0.00382038421721 For iteration with 2


Process Process-2:
Traceback (most recent call last):
  File "/usr/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/usr/lib/python2.7/multiprocessing/process.py", line 114, in run
    self._target(*self._args, **self._kwargs)
  File "<ipython-input-7-ddf49b1c9ca4>", line 15, in experiment
    p, r, mae = recommender_classifier.recommend_random(user_datasets, N)
  File "recommender_classifier.py", line 104, in recommend_random
    randomMovies = utils.getRandomMovieSet(conn, user[0])
TypeError: 'int' object has no attribute '__getitem__'


Deep Recall 0.0151387044943 Deep Precision 0.00302774089886 For iteration with 5


Process Process-5:
Traceback (most recent call last):
  File "/usr/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/usr/lib/python2.7/multiprocessing/process.py", line 114, in run
    self._target(*self._args, **self._kwargs)
  File "<ipython-input-7-ddf49b1c9ca4>", line 15, in experiment
    p, r, mae = recommender_classifier.recommend_random(user_datasets, N)
  File "recommender_classifier.py", line 104, in recommend_random
    randomMovies = utils.getRandomMovieSet(conn, user[0])
TypeError: 'int' object has no attribute '__getitem__'
