In [22]:
import numpy as np
import UserResults as ur
import UserMatrix as um
import pickle
from Classifiers import TransparentRidge

In [46]:
# Loading user matrix
user_id = 945
user_matrix = um.get_user_matrix(user_id)

In [47]:
# Loading user ratings and movie list
ratings = np.genfromtxt('postprocessed-data/user_ratings', delimiter=',', dtype=int)
movies = np.genfromtxt('postprocessed-data/movie_list', delimiter='|', dtype=str)
user_ratings = ratings[user_id-1]

In [56]:
# Creating model
clf =TransparentRidge(alpha=.05)
user_cols = user_matrix.shape[1]
data = user_matrix[:, 1:(user_cols-1)]
target = user_matrix[:, (user_cols-1)]
clf.fit(data,target)
weights = clf.coef_
neg_evi, pos_evi = clf.predict_evidences(data)
bias = clf.get_bias()
y_pred = clf.predict(data)
indices = np.argsort(y_pred)

In [57]:
# Predict all Films
infile = open("postprocessed-data/movie_matrix", "r")
movie_matrix = pickle.load(infile)
all_pred = clf.predict(movie_matrix[:, 1:])
all_pred = np.column_stack((movies, all_pred))
all_pred = all_pred[np.argsort(all_pred[:,1])][::-1]
print all_pred[range(0,10)]

[['S.F.W. (1994)' '4.84013029552']
 ['Inventing the Abbotts (1997)' '4.68355079009']
 ['Last Time I Saw Paris, The (1954)' '4.57702513538']
 ['Foreign Student (1994)' '4.55134903092']
 ['Mixed Nuts (1994)' '4.50663036638']
 ['Relic, The (1997)' '4.47136503121']
 ['Walkabout (1971)' '4.42527544534']
 ['Ayn Rand: A Sense of Life (1997)' '4.39748547487']
 ['Foreign Correspondent (1940)' '4.38984052091']
 ['Hackers (1995)' '4.38045515779']]


In [17]:
# The Highest Rating
j = indices[-1]
movie_id = user_matrix[j][0]
res = um.get_avg_rating_for_movie(ratings, movie_id-1)
avg_rating = res[0]
num_rating = res[1]
movie_features = ur.gen_movie_weights(movie_id,weights,user_matrix)
print "Movie Title: ", movies[movie_id-1]
print "User Rating: ", user_ratings[movie_id-1]
print "Average Rating: ", avg_rating
print "Number of Ratings: ", num_rating
print "Prediction: ",  clf.predict(data[j])[0]
print "Bias and evidences:", bias, neg_evi[j], pos_evi[j]
print "Positive Features"
print movie_features[0].head(10)
print "Negative Features"
print movie_features[1].head(10)

In [18]:
# The Lowest Rating
j = indices[0]
movie_id = user_matrix[j][0]
res = um.get_avg_rating_for_movie(ratings, movie_id-1)
avg_rating = res[0]
num_rating = res[1]
movie_features = ur.gen_movie_weights(movie_id,weights,user_matrix)
print "Movie Title: ", movies[movie_id-1]
print "User Rating: ", user_ratings[movie_id-1]
print "Average Rating: ", avg_rating
print "Number of Ratings: ", num_rating
print "Prediction: ",  clf.predict(data[j])[0]
print "Bias and evidences:", bias, neg_evi[j], pos_evi[j]
print "Positive Features"
print movie_features[0].head(10)
print "Negative Features"
print movie_features[1].head(10)

In [19]:
# The case that has the most negative evidence, regardless of positive evidence
j = np.argsort(neg_evi)[0]
movie_id = user_matrix[j][0]
res = um.get_avg_rating_for_movie(ratings, movie_id-1)
avg_rating = res[0]
num_rating = res[1]
movie_features = ur.gen_movie_weights(movie_id,weights,user_matrix)
print "Movie Title: ", movies[movie_id-1]
print "User Rating: ", user_ratings[movie_id-1]
print "Average Rating: ", avg_rating
print "Number of Ratings: ", num_rating
print "Prediction: ",  clf.predict(data[j])[0]
print "Bias and evidences:", bias, neg_evi[j], pos_evi[j]
print "Positive Features"
print movie_features[0].head(10)
print "Negative Features"
print movie_features[1].head(10)

In [20]:
# The case that has the most positive evidence, regardless of negative evidence
j = np.argsort(pos_evi)[-1]
movie_id = user_matrix[j][0]
res = um.get_avg_rating_for_movie(ratings, movie_id-1)
avg_rating = res[0]
num_rating = res[1]
movie_features = ur.gen_movie_weights(movie_id,weights,user_matrix)
print "Movie Title: ", movies[movie_id-1]
print "User Rating: ", user_ratings[movie_id-1]
print "Average Rating: ", avg_rating
print "Number of Ratings: ", num_rating
print "Prediction: ",  clf.predict(data[j])[0]
print "Bias and evidences:", bias, neg_evi[j], pos_evi[j]
print "Positive Features"
print movie_features[0].head(10)
print "Negative Features"
print movie_features[1].head(10)

In [21]:
# Most conflicted
conflict = np.min([abs(neg_evi), pos_evi], axis=0)
indices = np.argsort(conflict)
j=indices[-1]
movie_id = user_matrix[j][0]
res = um.get_avg_rating_for_movie(ratings, movie_id-1)
avg_rating = res[0]
num_rating = res[1]
movie_features = ur.gen_movie_weights(movie_id,weights,user_matrix)
print "Movie Title: ", movies[movie_id-1]
print "User Rating: ", user_ratings[movie_id-1]
print "Average Rating: ", avg_rating
print "Number of Ratings: ", num_rating
print "Prediction: ",  clf.predict(data[j])[0]
print "Bias and evidences:", bias, neg_evi[j], pos_evi[j]
print "Positive Features"
print movie_features[0].head(10)
print "Negative Features"
print movie_features[1].head(10)

In [22]:
# Least amount of info
information = np.max([abs(neg_evi), pos_evi], axis=0)
indices = np.argsort(information)
j=indices[0]
movie_id = user_matrix[j][0]
res = um.get_avg_rating_for_movie(ratings, movie_id-1)
avg_rating = res[0]
num_rating = res[1]
movie_features = ur.gen_movie_weights(movie_id,weights,user_matrix)
print "Movie Title: ", movies[movie_id-1]
print "User Rating: ", user_ratings[movie_id-1]
print "Average Rating: ", avg_rating
print "Number of Ratings: ", num_rating
print "Prediction: ",  clf.predict(data[j])[0]
print "Bias and evidences:", bias, neg_evi[j], pos_evi[j]
print "Positive Features"
print movie_features[0].head(10)
print "Negative Features"
print movie_features[1].head(10)

Movie Title:  Return of the Pink Panther, The (1974)
User Rating:  3
Average Rating:  3.54838709677
Number of Ratings:  93
Prediction:  3.17226003094
Bias and evidences: 3.19221026975 -0.0831241817808 0.0631739429755
Positive Features
          Feature  Weights
0          Comedy   0.0446
1  Average Rating   0.0331
Negative Features
Empty DataFrame
Columns: [Feature, Weights]
Index: []
