In [5]:
import numpy as np
from __future__ import division

# 9.2.1

In [97]:
class Computer(object):
    def __init__(self, proc, disk, mem):
        self.processor_speed = proc
        self.disk = disk
        self.main_memory = mem
        self.summary = [proc, disk, mem]
    
    def dot_prod(self,X):
        if isinstance(X, Computer):
            bar = [X.processor_speed, X.disk, X.main_memory]
            return sum([x*y for x,y in zip(self.summary,bar)])
        else:
            assert len(X) == 3
            return sum([x*y for x,y in zip(self.summary,X)])
            
    def cosine(self,X,alpha=1,beta=1):
        if isinstance(X, Computer):
            foo = [self.processor_speed, alpha*self.disk, beta*self.main_memory]
            bar = [X.processor_speed, alpha*X.disk, alpha*X.main_memory]
            ati = np.dot(foo,bar)
            tun = np.sqrt(np.dot(foo,foo))*np.sqrt(np.dot(bar,bar))
            return ati/tun
    
    def normalize(self, mu):
        assert len(mu) == 3
        return [self.processor_speed - mu[0], self.disk - mu[1], 
                       self.main_memory - mu[2]]

In [98]:
A = Computer(3.06,500,6)
B = Computer(2.68,320,4)
C = Computer(2.92,640,6)

In [99]:
computers = {'A':A, 'B':B, 'C':C}

(b) cosine similarities when alpha=beta=1

In [100]:
pairs = [['A','B'],['A','C'],['B','C']]

In [101]:
# cosine similarities
for pair in pairs:
    print pair, computers[pair[0]].cosine(computers[pair[1]])

['A', 'B'] 0.999997333284
['A', 'C'] 0.999995343121
['B', 'C'] 0.999987853375


(c) cosine similarities when alpha=0.01 and beta=0.5

In [102]:
for pair in pairs:
    print pair, computers[pair[0]].cosine(computers[pair[1]],0.01,0.5)

['A', 'B'] 0.884792148899
['A', 'C'] 0.887525858762
['B', 'C'] 0.873005241921


(d) setting alpha = 1/avg(disk size) and beta = 1/avg(main_memory)

In [103]:
alpha = 1/np.mean([A.disk,B.disk,C.disk])
beta = 1/np.mean([A.main_memory,B.main_memory,C.main_memory])

In [104]:
print alpha,beta

0.00205479452055 0.1875


In [105]:
for pair in pairs:
    print pair, computers[pair[0]].cosine(computers[pair[1]],alpha,beta)

['A', 'B'] 0.941990802633
['A', 'C'] 0.940905717338
['B', 'C'] 0.949959248828


# 9.2.2 

(a) Normalizing the vectors of the three computers of 9.2.1

In [106]:
mean_proc = np.mean([comp.processor_speed for comp in computers.values()])
mean_disk = np.mean([comp.disk for comp in computers.values()])
mean_memory = np.mean([comp.main_memory for comp in computers.values()])

In [107]:
means = [mean_proc, mean_disk, mean_memory]

In [111]:
print 'A:', A.normalize(means)
print 'B:',B.normalize(means)
print 'C:',C.normalize(means)

A: [0.17333333333333334, 13.333333333333314, 0.66666666666666696]
B: [-0.20666666666666655, -166.66666666666669, -1.333333333333333]
C: [0.033333333333333215, 153.33333333333331, 0.66666666666666696]


# 9.2.3 

In [123]:
# ordered [A,B,C]
user_ratings = [4,2,5]

(a) normalizing the ratings for this user

In [116]:
avg_rating = np.mean(user_ratings)

In [118]:
# normalize the ratings for this user
[rate - avg_rating for rate in user_ratings]

[0.33333333333333348, -1.6666666666666665, 1.3333333333333335]

(b) constructing a user profile from the items profiles

*I use the weights rating/5 *

In [119]:
weights = [rate/5 for rate in user_ratings]

In [126]:
weights

[0.8, 0.4, 1.0]

In [124]:
user_profile = {}
user_profile['proc_speed'] = sum([wt*proc for wt,proc in zip(weights,[A.processor_speed,
                                                                B.processor_speed,C.processor_speed])])
user_profile['disk'] = sum([wt*disk for wt,disk in zip(weights,[A.disk,B.disk,C.disk])])
user_profile['main_memory'] = sum([wt*mm for wt,mm in zip(weights,[A.main_memory,B.main_memory,
                                                                  C.main_memory])])

In [125]:
user_profile

{'disk': 1168.0, 'main_memory': 12.4, 'proc_speed': 6.44}

Alternatively, can use the following weights.

In [128]:
weights = [rate/sum(user_ratings) for rate in user_ratings]
weights

[0.36363636363636365, 0.18181818181818182, 0.45454545454545453]

In [129]:
user_profile = {}
user_profile['proc_speed'] = sum([wt*proc for wt,proc in zip(weights,[A.processor_speed,
                                                                B.processor_speed,C.processor_speed])])
user_profile['disk'] = sum([wt*disk for wt,disk in zip(weights,[A.disk,B.disk,C.disk])])
user_profile['main_memory'] = sum([wt*mm for wt,mm in zip(weights,[A.main_memory,B.main_memory,
                                                                  C.main_memory])])

In [130]:
user_profile

{'disk': 530.9090909090909,
 'main_memory': 5.636363636363637,
 'proc_speed': 2.9272727272727272}

This user_profile supplies aggregates that are within the support of each component.