In [33]:
from numpy import *

In [34]:
U, Sigma, VT = linalg.svd([[1, 1], [7, 7]])

In [35]:
U

array([[-0.14142136, -0.98994949],
       [-0.98994949,  0.14142136]])

In [36]:
Sigma

array([10.,  0.])

In [37]:
VT

array([[-0.70710678, -0.70710678],
       [-0.70710678,  0.70710678]])

In [38]:
def loadExData():
    return [
        [1, 1, 1, 0, 0],
        [2, 2, 2, 0, 0],
        [1, 1, 1, 0, 0],
        [5, 5, 5, 0, 0],
        [1, 1, 0, 2, 2],
        [0, 0, 0, 3, 3],
        [0, 0, 0, 1, 1]
    ]

In [39]:
Data = loadExData()

In [40]:
U, Sigma, VT = linalg.svd(Data)

In [41]:
Sigma

array([9.72140007e+00, 5.29397912e+00, 6.84226362e-01, 1.58746493e-15,
       3.13347038e-31])

In [42]:
Sig3 = mat([
    [Sigma[0], 0, 0],
    [0, Sigma[1], 0],
    [0, 0, Sigma[2]]
])

In [43]:
U[:, :3] * Sig3 * VT[:3, :]

matrix([[ 1.00000000e+00,  1.00000000e+00,  1.00000000e+00,
         -2.69315820e-16, -2.70616862e-16],
        [ 2.00000000e+00,  2.00000000e+00,  2.00000000e+00,
          4.61219604e-16,  4.57966998e-16],
        [ 1.00000000e+00,  1.00000000e+00,  1.00000000e+00,
          1.15543426e-15,  1.15185639e-15],
        [ 5.00000000e+00,  5.00000000e+00,  5.00000000e+00,
         -1.50053581e-16, -1.66533454e-16],
        [ 1.00000000e+00,  1.00000000e+00, -6.10622664e-16,
          2.00000000e+00,  2.00000000e+00],
        [-1.11022302e-16,  1.80411242e-15, -1.77635684e-15,
          3.00000000e+00,  3.00000000e+00],
        [-7.63278329e-17,  5.48172618e-16, -4.85722573e-16,
          1.00000000e+00,  1.00000000e+00]])

In [44]:
from numpy import *
from numpy import linalg as la

In [45]:
def ecludSim(inA, inB):
    return 1.0 / (1.0 + la.norm(inA - inB))

def pearsSim(inA, inB):
    if len(inA) < 3:
        return 1.0
    return 0.5 + 0.5 * corrcoef(inA, inB, rowvar=0)[0][1]

def cosSim(inA, inB):
    num = float(inA.T * inB)
    denom = la.norm(inA) * la.norm(inB)
    return 0.5 + 0.5 * (num / denom)

In [46]:
myMat = mat(loadExData())

In [47]:
ecludSim(myMat[:, 0], myMat[:, 4])

0.13367660240019172

In [48]:
ecludSim(myMat[:, 0], myMat[:, 0])

1.0

In [49]:
cosSim(myMat[:, 0], myMat[:, 4])

0.5472455591261534

In [50]:
cosSim(myMat[:, 0], myMat[:, 0])

0.9999999999999999

In [51]:
pearsSim(myMat[:, 0], myMat[:, 4])

0.23768619407595826

In [52]:
pearsSim(myMat[:, 0], myMat[:, 0])

1.0

## Item-based recommendation engine

In [53]:
def standEst(dataMat, user, simMeas, item):
    n = shape(dataMat)[1]
    simTotal = 0.0
    ratSimTotal = 0.0
    for j in range(n):
        userRating = dataMat[user, j]
        if userRating == 0:
            continue
            
        overLap = nonzero(logical_and(dataMat[:, item].A > 0, dataMat[:, j].A > 0))[0]
        
        if len(overLap) == 0:
            similarity = 0
        else:
            similarity = simMeas(dataMat[overLap, item], dataMat[overLap, j])
            
        print("The %d and %d similarity is: %f" % (item, j, similarity))
        simTotal += similarity
        ratSimTotal += similarity * userRating
    if simTotal == 0:
        return 0;
    else:
        return ratSimTotal / simTotal

In [54]:
def recommend(dataMat, user, N=3, simMeas=cosSim, estMethod=standEst):
    unratedItemes = nonzero(dataMat[user, :].A == 0)[1]
    if len(unratedItemes) == 0:
        return "You rated everything"
    itemScores = []
    for item in unratedItemes:
        estimatedScore = estMethod(dataMat, user, simMeas, item)
        itemScores.append((item, estimatedScore))
    return sorted(itemScores, key=lambda jj: jj[1], reverse=True)[:N]

In [55]:
myMat = mat(loadExData())

In [56]:
myMat[0, 1] = myMat[0, 0] = myMat[1, 0] = myMat[2, 0] = 4
myMat[3, 3] = 2

In [57]:
myMat

matrix([[4, 4, 1, 0, 0],
        [4, 2, 2, 0, 0],
        [4, 1, 1, 0, 0],
        [5, 5, 5, 2, 0],
        [1, 1, 0, 2, 2],
        [0, 0, 0, 3, 3],
        [0, 0, 0, 1, 1]])

In [58]:
recommend(myMat, 2)

The 3 and 0 similarity is: 0.916025
The 3 and 1 similarity is: 0.916025
The 3 and 2 similarity is: 1.000000
The 4 and 0 similarity is: 1.000000
The 4 and 1 similarity is: 1.000000
The 4 and 2 similarity is: 0.000000


[(4, 2.5), (3, 1.9703483892927431)]

In [60]:
myMat2 = mat([
    [2, 0, 0, 4, 4, 0, 0, 0, 0, 0, 0],
    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5],
    [0, 0, 0, 0, 0, 0, 0, 1, 0, 4, 0],
    [3, 3, 4, 0, 3, 0, 0, 2, 2, 0, 0],
    [5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0],
    [0, 0, 0, 0, 0, 0, 5, 0, 0, 5, 0],
    [4, 0, 4, 0, 0, 0, 0, 0, 0, 0, 5],
    [0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 4],
    [0, 0, 0, 0, 0, 0, 5, 0, 0, 5, 0],
    [0, 0, 0, 3, 0, 0, 0, 0, 4, 5, 0],
    [1, 1, 2, 1, 1, 2, 1, 0, 4, 5, 0]
])

In [61]:
from numpy import linalg as la

In [62]:
U, Sigma, Vt = la.svd(myMat2)

In [63]:
Sigma

array([1.34342819e+01, 1.18190832e+01, 8.20176076e+00, 6.86912480e+00,
       5.29063022e+00, 3.91213561e+00, 2.94562509e+00, 2.35486137e+00,
       2.08702082e+00, 7.08715931e-01, 1.18673615e-16])

In [64]:
Sig2 = Sigma ** 2

In [66]:
sum(Sig2) * 0.9

447.2999999999997

In [74]:
sum(Sig2[:4])

434.62441339532046

In [79]:
def svdEst(dataMat, user, simMeas, item):
    n = shape(dataMat)[1]
    simTotal = 0.0
    ratSimTotal = 0.0
    U, Sigma, VT = la.svd(dataMat)
    Sig4 = mat(eye(4) * Sigma[:4])
    xformedItems = dataMat.T * U[:, :4] * Sig4.I
    for j in range(n):
        userRating = dataMat[user, j]
        if userRating == 0 or j == item:
            continue
        similarity = simMeas(xformedItems[item, :].T, xformedItems[j, :].T)
        print("The %d and %d similarity is: %f" % (item, j, similarity))
        simTotal += similarity
        ratSimTotal += similarity * userRating
    if simTotal == 0:
        return 0
    else:
        return ratSimTotal / simTotal

In [80]:
recommend(myMat2, 1, estMethod=svdEst)

The 0 and 10 similarity is: 0.576545
The 1 and 10 similarity is: 0.346327
The 2 and 10 similarity is: 0.565529
The 3 and 10 similarity is: 0.484314
The 4 and 10 similarity is: 0.415442
The 5 and 10 similarity is: 0.929326
The 6 and 10 similarity is: 0.490999
The 7 and 10 similarity is: 0.322192
The 8 and 10 similarity is: 0.474054
The 9 and 10 similarity is: 0.496642


[(0, 5.0), (1, 5.0), (2, 5.0)]

## Image compression with SVD

In [98]:
def printMat(inMat, thresh=0.8):
    for i in range(32):
        for k in range(32):
            if float(inMat[i, k]) > thresh:
                print('1', end="")
            else:
                print('.', end="")
        print(" ")

In [99]:
def imgCompress(numSV=3, thresh=0.8):
    myl = []
    for line in open('0_5.txt').readlines():
        newRow = []
        for i in range(32):
            newRow.append(int(line[i]))
        myl.append(newRow)
    myMat = mat(myl)
    print("--Original matrix--")
    printMat(myMat, thresh)
    U, Sigma, VT = la.svd(myMat)
    SigRecon = mat(zeros((numSV, numSV)))
    for k in range(numSV):
        SigRecon[k, k] = Sigma[k]
    reconMat = U[:, :numSV] * SigRecon * VT[:numSV, :]
    print()
    print("--Reconstructed matrix using %d singular values--" % numSV)
    printMat(reconMat, thresh)

In [100]:
imgCompress(2)

--Original matrix--
..............11................ 
............111111.............. 
...........11111111............. 
..........1111111111............ 
........1111111111111........... 
.......111111111111111.......... 
........111111111111111......... 
........1111111....11111........ 
.......1111111.....11111........ 
......111111........1111........ 
......111111........11111....... 
......111111.........1111....... 
......111111.........1111....... 
.......111111.........1111...... 
......1111111.........1111...... 
......111111..........1111...... 
.......11111..........1111...... 
......111111..........1111...... 
.......11111..........1111...... 
.......11111.........11111...... 
........11111.........11111..... 
........11111.........11111..... 
........11111.........11111..... 
........11111........11111...... 
........11111.......111111...... 
........111111.....111111....... 
.........1111111111111111....... 
..........111111111111111....... 
..........111111111111111...