In [2]:
import math
import numpy as np
import sklearn.metrics
import sklearn.metrics.pairwise




In [3]:
train_files = ["../ml-100k/u" + str(i) + ".base" for i in range(1, 6)]
test_files  = ["../ml-100k/u" + str(i) + ".test" for i in range(1, 6)]

"""
    data[i] -> user id | item id | rating | timestamp
"""

"""
    -> 943 users
    -> 1682 movies
"""

numUsers = 943
numMovies = 1682

### 1. Global average

In [4]:
errors = []

for t in range(5):
    print 'Fold', t + 1,

    """ Load Train Data, fold = t """
    with open(train_files[t], 'r') as fp:
        trainData = fp.readlines()
        trainData = map(lambda x : x.strip('\r\n'), trainData)
        trainData = map(lambda x : map(int, x.split('\t')[:3]), trainData)

    """ Prepare rating matrix """
    ratingMatrix = np.zeros((numUsers, numMovies))
    for u, m, r in trainData:
        ratingMatrix[u - 1][m - 1] = r

    averageRating = ratingMatrix[ratingMatrix != 0].mean()

    """ Load Test Data, fold = t """
    with open(test_files[t], 'r') as fp:
        testData = fp.readlines()
        testData = map(lambda x : x.strip('\r\n'), testData)
        testData = map(lambda x : map(int, x.split('\t')[:3]), testData)


    """ Test Model """

    true = []
    pred = []
    unable = 0

    for u, m, trueRating in testData:

        """ Get common raters """
        
        true.append(trueRating)
        pred.append(averageRating)


    """ Compute Error """
    MSE = sklearn.metrics.mean_squared_error(true, pred)
    MAE = sklearn.metrics.mean_absolute_error(true, pred)
    print 'MSE:', MSE,
    print 'MAE:', MAE,
    print 'NMAE:', MAE / 5.
    print 'Unable to predict', unable, '\n'

    errors.append([MSE, MAE])

    del ratingMatrix

errors = np.array(errors)
print 'MSE across 5 folds:', errors[:, 0].mean()
print 'MAE across 5 folds:', errors[:, 1].mean()
print 'NMAE across 5 folds :', errors[:, 1].mean() / 5.

Fold 1 MSE: 1.3309681925 MAE: 0.968048775 NMAE: 0.193609755
Unable to predict 0 

Fold 2 MSE: 1.27840067266 MAE: 0.94891099 NMAE: 0.189782198
Unable to predict 0 

Fold 3 MSE: 1.23561516641 MAE: 0.930603945 NMAE: 0.186120789
Unable to predict 0 

Fold 4 MSE: 1.23942282766 MAE: 0.936131395 NMAE: 0.187226279
Unable to predict 0 

Fold 5 MSE: 1.25143442766 MAE: 0.939934095 NMAE: 0.187986819
Unable to predict 0 

MSE across 5 folds: 1.26716825738
MAE across 5 folds: 0.94472584
NMAE across 5 folds : 0.188945168


### 2. User average

In [10]:
errors = []

for t in range(5):
    print 'Fold', t + 1,

    """ Load Train Data, fold = t """
    with open(train_files[t], 'r') as fp:
        trainData = fp.readlines()
        trainData = map(lambda x : x.strip('\r\n'), trainData)
        trainData = map(lambda x : map(int, x.split('\t')[:3]), trainData)

    """ Prepare rating matrix """
    ratingMatrix = np.zeros((numUsers, numMovies))
    for u, m, r in trainData:
        ratingMatrix[u - 1][m - 1] = r

    """ Load Test Data, fold = t """
    with open(test_files[t], 'r') as fp:
        testData = fp.readlines()
        testData = map(lambda x : x.strip('\r\n'), testData)
        testData = map(lambda x : map(int, x.split('\t')[:3]), testData)


    """ Test Model """

    true = []
    pred = []
    unable = 0

    for u, m, trueRating in testData:

        """ Get common raters """
        
        predictedRating = ratingMatrix[u - 1, :][np.where(ratingMatrix[u - 1, :] != 0)].mean()
        true.append(trueRating)
        pred.append(predictedRating)


    """ Compute Error """
    MSE = sklearn.metrics.mean_squared_error(true, pred)
    MAE = sklearn.metrics.mean_absolute_error(true, pred)
    print 'MSE:', MSE,
    print 'MAE:', MAE,
    print 'NMAE:', MAE / 5.
    print 'Unable to predict', unable, '\n'

    errors.append([MSE, MAE])

    del ratingMatrix

errors = np.array(errors)
print 'MSE across 5 folds:', errors[:, 0].mean()
print 'MAE across 5 folds:', errors[:, 1].mean()
print 'NMAE across 5 folds :', errors[:, 1].mean() / 5.

Fold 1 MSE: 1.12995864142 MAE: 0.850191274015 NMAE: 0.170038254803
Unable to predict 0 

Fold 2 MSE: 1.09567875703 MAE: 0.838340145799 NMAE: 0.16766802916
Unable to predict 0 

Fold 3 MSE: 1.06687508944 MAE: 0.82652751695 NMAE: 0.16530550339
Unable to predict 0 

Fold 4 MSE: 1.07465897369 MAE: 0.83077651289 NMAE: 0.166155302578
Unable to predict 0 

Fold 5 MSE: 1.08012858977 MAE: 0.835048437302 NMAE: 0.16700968746
Unable to predict 0 

MSE across 5 folds: 1.08946001027
MAE across 5 folds: 0.836176777391
NMAE across 5 folds : 0.167235355478


### 3. Item average

In [18]:
errors = []

for t in range(5):
    print 'Fold', t + 1,

    """ Load Train Data, fold = t """
    with open(train_files[t], 'r') as fp:
        trainData = fp.readlines()
        trainData = map(lambda x : x.strip('\r\n'), trainData)
        trainData = map(lambda x : map(int, x.split('\t')[:3]), trainData)

    """ Prepare rating matrix """
    ratingMatrix = np.zeros((numUsers, numMovies))
    for u, m, r in trainData:
        ratingMatrix[u - 1][m - 1] = r

    """ Load Test Data, fold = t """
    with open(test_files[t], 'r') as fp:
        testData = fp.readlines()
        testData = map(lambda x : x.strip('\r\n'), testData)
        testData = map(lambda x : map(int, x.split('\t')[:3]), testData)


    """ Test Model """

    true = []
    pred = []
    unable = 0

    for u, m, trueRating in testData:

        """ Get common raters """
        
        true.append(trueRating)
        
        rel = ratingMatrix[:, m - 1][np.where(ratingMatrix[:, m - 1] != 0)]
        if rel.shape[0] == 0:
            unable += 1
            pred.append(3.)
            continue
            
        pred.append(rel.mean())


    """ Compute Error """
    MSE = sklearn.metrics.mean_squared_error(true, pred)
    MAE = sklearn.metrics.mean_absolute_error(true, pred)
    print 'MSE:', MSE,
    print 'MAE:', MAE,
    print 'NMAE:', MAE / 5.
    print 'Unable to predict', unable, '\n'

    errors.append([MSE, MAE])

    del ratingMatrix

errors = np.array(errors)
print 'MSE across 5 folds:', errors[:, 0].mean()
print 'MAE across 5 folds:', errors[:, 1].mean()
print 'NMAE across 5 folds :', errors[:, 1].mean() / 5.

Fold 1 MSE: 1.06622437661 MAE: 0.827145723289 NMAE: 0.165429144658
Unable to predict 32 

Fold 2 MSE: 1.05934583127 MAE: 0.820010747804 NMAE: 0.164002149561
Unable to predict 36 

Fold 3 MSE: 1.03824893742 MAE: 0.811239160119 NMAE: 0.162247832024
Unable to predict 36 

Fold 4 MSE: 1.03265119012 MAE: 0.810707331979 NMAE: 0.162141466396
Unable to predict 27 

Fold 5 MSE: 1.04407585553 MAE: 0.815413567127 NMAE: 0.163082713425
Unable to predict 36 

MSE across 5 folds: 1.04810923819
MAE across 5 folds: 0.816903306064
NMAE across 5 folds : 0.163380661213
