# Alternating Least Squares (ALS) for collaborative RecSys

----------------------------------------------------------------------
## <i><u>Actual ratings<u></i>

### pu : Rating of user <i><font color="red">i</font></i> against item <i><font color="red">j</font></i>. Rating can be missing.

### pv : Rating of item <i><font color="red">i</font></i> against user <i><font color="red">j</font></i>. Rating can be missing.

### Transpose(pu) = pv

In [1]:
import math,numpy

pu = [  [(0,0,1),(0,1,22),(0,2,1),(0,3,1),        (0,5,0)],
        [(1,0,1),(1,1,32),(1,2,0),(1,3,0),(1,4,1),(1,5,0)],
        [(2,0,0),(2,1,18),(2,2,1),(2,3,1),(2,4,0),(2,5,1)],
        [(3,0,1),(3,1,40),(3,2,1),(3,3,0),(3,4,0),(3,5,1)],
        [(4,0,0),(4,1,40),(4,2,0),        (4,4,1),(4,5,0)],
        [(5,0,0),(5,1,25),(5,2,1),(5,3,1),(5,4,1)        ]]


pv = [  [(0,0,1),(0,1,1),(0,2,0),(0,3,1),(0,4,0),(0,5,0)],
        [(1,0,22),(1,1,32),(1,2,18),(1,3,40),(1,4,40),(1,5,25)],
        [(2,0,1),(2,1,0),(2,2,1),(2,3,1),(2,4,0),(2,5,1)],
        [(3,0,1),(3,1,0),(3,2,1),(3,3,0),        (3,5,1)],
        [        (4,1,1),(4,2,0),(4,3,0),(4,4,1),(4,5,1)],
        [(5,0,0),(5,1,0),(5,2,1),(5,3,1),(5,4,0)        ]]

## <i><u>Predicting ratings</u></i>

### _V_ : randomly generated inital ratings for 6 items by 3 latent features

### _U_ : initial zero matrix for 6 users by 3 latent features 

### _Latent feature_:  
> **Coolness** is an example of a latent feature since it’s unobserved and not measurable directly. It also **reduces dimensions** because it’s a combination of many “features” we’ve observed about the person and implictly weighted in our mind.

In [2]:
# random ratings for item
# row: number of items
# column: number of latent features
V = numpy.mat([[  0.15968384, 0.94411985, 0.83651085],
                [ 0.73573009, 0.24906915, 0.85338239],
                [ 0.25605814, 0.69905325, 0.50900407],
                [ 0.24058435, 0.31848888, 0.60233653],
                [ 0.24237479, 0.15293281, 0.22240255],
                [ 0.03943766, 0.19287528, 0.95094265]])

# create a 6 by 3 matrix with 0
# row: number of user:
# column: number of latent features
U = numpy.mat(numpy.zeros([6,3]))

### _L_ : Bayesian prior for adding a penalty term for large coefficients to tackle overfitting

In [3]:
# Bayesian prior lamda
L = 0.03

### Fix V and minimize U

### Fix U and minimize V

### 5 iterations in total to end

### Calculate accuracy by squared error of (pu - U) and (pv - V)

In [4]:
for iter in range(5):

    print("\n----- ITER %s -----" % (iter + 1))

# fixing V and minimizing U
    print("U before ALS = \n %s" %U)
    urs = []
    for uset in pu:
        vo = []
        pvo = []
        for i,j,p in uset:
            vor = []
            for k in range(3):
                vor.append(V[j,k])
            vo.append(vor)
            pvo.append(p)
        vo = numpy.mat(vo)
        # linalg.inv: Compute the (multiplicative) inverse of a matrix.
        # adding a prior matrix to the covariance matrix.
        ur = numpy.linalg.inv(vo.T*vo + L*numpy.mat(numpy.eye(3))) * vo.T * numpy.mat(pvo).T
        urs.append(ur.T)
    # vstack: Stack arrays in sequence vertically (row wise).
    U = numpy.vstack(urs)
    print("U after ALS = \n %s \n" %U)

# fixing U and minimizing V
    print("V before ALS = \n %s" %V)
    vrs = []
    for vset in pv:
        uo = []
        puo = []
        for j,i,p in vset:
            uor = []
            for k in range(3):
                uor.append(U[i,k])
            uo.append(uor)
            puo.append(p)
        uo = numpy.mat(uo)
        vr = numpy.linalg.inv(uo.T*uo + L*numpy.mat(numpy.eye(3))) * uo.T * numpy.mat(puo).T
        vrs.append(vr.T)
    V = numpy.vstack(vrs)
    print("V after ALS = \n %s \n" %V)

# calcurate error
    err = 0.
    n = 0.
    for uset in pu:
        for i,j,p in uset:
            err += (p - (U[i]*V[j].T)[0,0])**2
            n += 1
    print("Accuracy after %s iteration = %5.5f" %(iter+1, math.sqrt(err/n)))
    #print("Accuracy after {} iteration = {}" .format(iter+1, math.sqrt(err/n)))


----- ITER 1 -----
U before ALS = 
 [[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]
U after ALS = 
 [[ 27.4885602   -6.39422952   0.76339048]
 [ 36.40898582 -10.96084392   2.19757652]
 [ 19.79884099  -6.62335648   2.23966906]
 [ 44.36456983 -13.91333908   3.61771636]
 [ 46.25668398 -15.11954125   4.75538362]
 [ 24.08497021 -11.99211026   7.23072313]] 

V before ALS = 
 [[0.15968384 0.94411985 0.83651085]
 [0.73573009 0.24906915 0.85338239]
 [0.25605814 0.69905325 0.50900407]
 [0.24058435 0.31848888 0.60233653]
 [0.24237479 0.15293281 0.22240255]
 [0.03943766 0.19287528 0.95094265]]
V after ALS = 
 [[ 0.05453246  0.11205156 -0.03322625]
 [ 0.45882762 -1.51295042 -0.61387642]
 [ 0.19340569  0.78784359  0.78828987]
 [ 0.25513016  1.06866187  1.06871891]
 [-0.16891961 -0.70834584 -0.47720952]
 [-0.0148988  -0.04766133  0.12516835]] 

Accuracy after 1 iteration = 0.45826

----- ITER 2 -----
U before ALS = 
 [[ 27.4885602   -6.39422952   0.76339048]
 [ 36.40898582 -

## Final predictions for ratings of user <i>i</i> against item <i>j</i> after 5th iteration

In [5]:
print("Prediction of ratings of user i (row) against item j (column) after %sth iteration =\n" %(iter+1))
print(U*V.T)

Prediction of ratings of user i (row) against item j (column) after 5th iteration =

[[ 0.9789139  22.02597562  0.91877634  0.62461392 -0.41481479  0.39150616]
 [ 0.58191553 32.00769898  0.19842249 -0.254153    0.56590923  0.10318179]
 [ 0.18295371 17.99655422  1.05223409  1.00874849  0.17996798  1.05800142]
 [ 1.07806626 40.01474242  0.95779206  0.44714297  0.14977945  0.54731646]
 [ 0.28996928 39.96955198 -0.08842635 -0.61841292  1.23796027  0.12871275]
 [-0.16494296 24.9916029   0.99005735  0.93524641  0.80399884  1.32782601]]
