In [None]:
import pandas as pd

train = pd.read_csv('data/new_int_train_no_header.csv', names = ['user_id', 'item', 'rating'])
train.head()

In [None]:
import graphlab as gl

dat = gl.SFrame(train)

training_data, validation_data = gl.recommender.util.random_split_by_user(dat, 'user_id', 'item')


X_train = training_data.to_dataframe()
X_test = validation_data.to_dataframe()

In [None]:
X_train.to_csv('data/SVD_train_no_header.csv', header = None, index = False)
X_test.to_csv('data/SVD_test_no_header.csv', header = None, index = False)

In [None]:
from recsys.algorithm.factorize import SVD
svd = SVD()
svd.load_data(filename='data/SVD_train_no_header.csv',
            sep=',',
            format={'col':0, 'row':1, 'value':2, 'ids': int})

In [None]:
k = 100
svd.compute(k=k,
            min_values=10,
            pre_normalize=None,
            mean_center=True,
            post_normalize=True,
            savefile='/tmp/save')

In [None]:
svd.similarity(1, 200)

In [None]:
svd.similar(1)

In [None]:
test = pd.read_csv('data/SVD_sample_test.csv', header = None)
X = test.as_matrix()[:, 0:2]
y = test.as_matrix()[:, 2]

In [None]:
MIN_RATING = 0.0
MAX_RATING = 10.0
ITEMID = 100
USERID = 10

svd.predict(ITEMID, USERID, MIN_RATING, MAX_RATING)
# Predicted value 5.0

# svd.get_matrix().value(ITEMID, USERID)
# Real value 5.0

In [None]:
from recsys.evaluation.prediction import RMSE

In [None]:
X

In [1]:
import sys
import numpy as np

#To show some messages:
import recsys.algorithm
recsys.algorithm.VERBOSE = True

from recsys.algorithm.factorize import SVD, SVDNeighbourhood
from recsys.datamodel.data import Data
from recsys.evaluation.prediction import RMSE, MAE

#Dataset
PERCENT_TRAIN = 80
data = Data()
data.load('data/sample_train_int_no_header.csv', sep=',', format={'col':0, 'row':1, 'value':2, 'ids':int})
    # About format parameter:
    #   'row': 1 -> Rows in matrix come from column 1 in ratings.dat file
    #   'col': 0 -> Cols in matrix come from column 0 in ratings.dat file
    #   'value': 2 -> Values (Mij) in matrix come from column 2 in ratings.dat file
    #   'ids': int -> Ids (row and col ids) are integers (not strings)

#Train & Test data
train, test = data.split_train_test(percent=PERCENT_TRAIN)

#Create SVD
K=100
svd = SVDNeighbourhood()
svd.set_data(train)
svd.compute(k=K, min_values=10, pre_normalize='tfidf', mean_center=True, post_normalize=True)

#Evaluation using prediction-based metrics
rmse = RMSE()
mae = MAE()
for rating, item_id, user_id in test.get():
    try:
        pred_rating = np.clip(svd.predict(item_id, user_id), 1, 10)
        rmse.add(rating, pred_rating)
        mae.add(rating, pred_rating)
    except KeyError:
        continue

print 'RMSE=%s' % rmse.compute()
print 'MAE=%s' % mae.compute()

Loading data/sample_train_int_no_header.csv

Creating matrix (79999 tuples)
Matrix density is: 0.229%
Updating matrix: squish to at least 10 values
Computing svd k=100, min_values=10, pre_normalize=tfidf, mean_center=True, post_normalize=True
RMSE=3.115937
MAE=2.432875
