In [42]:
import numpy as np
import pandas as pd
import scipy.spatial.distance as dist
from sympy import *
from itertools import product

In [43]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

<IPython.core.display.Javascript object>

In [132]:
class CollaborativeFiltering:
    def __init__(self,  items, data, users=None, rounding_digit = 2):
        self.users = users
        self.items = items
        self.rounding_digit  = rounding_digit
        if not self.users:
            self.users = [f'U{i+1}' for i in range(len(data))]
        self.cf_matrix = pd.DataFrame(data, index = self.users, columns=self.items)
    def calculate_jaccard_similarity(self):
        print('Calculating Jaccard Similarity')
        display(self.cf_matrix)
        
        user_combs = [(i, j) for i, j in list(product(self.users, self.users)) if  i < j]
        
        for comb in user_combs:
            c1 = comb[0]
            c2 = comb[1]
            display(Eq(Symbol(f'sim({c1},{c2})'), Symbol(f'r_{{{c1}}} \u2229 r_{{{c2}}}')/
                       Symbol(f'r_{{{c1}}} \u222A r_{{{c2}}}')))
            r_a = self.cf_matrix.loc[c1]
            r_b = self.cf_matrix.loc[c2]
            
            num = sum(r_a.astype(bool)&r_b.astype(bool))
            
            den = sum(r_a.astype(bool)|r_b.astype(bool))
            display(Eq(Symbol(f'sim({c1},{c2})'), Symbol(f'{num}')/
                       Symbol(f'{den}')))
            display(Eq(Symbol(f'sim({c1},{c2})'), round(num/den, self.rounding_digit)))
            print('\n---------------------------------------------\n')
            
    def calculate_cosine_similarity(self):
        print('Calculating Cosine Similarity')
        display(self.cf_matrix)
        
        user_combs = [(i, j) for i, j in list(product(self.users, self.users)) if  i < j]
        
        for comb in user_combs:
            c1 = comb[0]
            c2 = comb[1]
            display(Eq(Symbol(f'sim({c1},{c2})'), Symbol(f'cos(r_{{{c1}}}, r_{{{c2}}})')))
            
            display(Eq(Symbol(f'sim({c1},{c2})'), Symbol(f'r_{{{c1}}}. r_{{{c2}}}')/
                       Symbol(f'|r_{{{c1}}}| |r_{{{c2}}}|')))
            r_a = self.cf_matrix.loc[c1]
            r_b = self.cf_matrix.loc[c2]
            
            sim = r_a@r_b/(np.linalg.norm(r_a)*np.linalg.norm(r_b))
            
            display(Eq(Symbol(f'sim({c1},{c2})'), round(sim, self.rounding_digit)))
            print('\n---------------------------------------------\n')
            
    def calculate_centered_cosine_similarity(self):
        print('Calculating Cosine Similarity')
        print('\n CF matrix')
        display(self.cf_matrix)
        print('\n CF matrix normalized by mean')
        
        diff_df = self.cf_matrix.sum(axis=1)/self.cf_matrix.astype(bool).sum(axis=1)
        self.cf_centered_matrix = self.cf_matrix.subtract(diff_df,axis=0)
        self.cf_centered_matrix = self.cf_centered_matrix[self.cf_matrix.astype(bool)]
        self.cf_centered_matrix.fillna(0, inplace=True)
        self.cf_centered_matrix = np.round(self.cf_centered_matrix, self.rounding_digit)
        display(self.cf_centered_matrix)
        
        user_combs = [(i, j) for i, j in list(product(self.users, self.users)) if  i < j]
        
        for comb in user_combs:
            c1 = comb[0]
            c2 = comb[1]
            display(Eq(Symbol(f'sim({c1},{c2})'), Symbol(f'cos(r_{{{c1}}}, r_{{{c2}}})')))
            
            display(Eq(Symbol(f'sim({c1},{c2})'), Symbol(f'r_{{{c1}}}. r_{{{c2}}}')/
                       Symbol(f'|r_{{{c1}}}| |r_{{{c2}}}|')))
            r_a = self.cf_centered_matrix.loc[c1]
            r_b = self.cf_centered_matrix.loc[c2]
            
            sim = r_a@r_b/(np.linalg.norm(r_a)*np.linalg.norm(r_b))
            
            display(Symbol(f'sim({c1},{c2}) = {round(sim, self.rounding_digit)}'))
            print('\n---------------------------------------------\n')
                

In [127]:
items = ['HP1', 'HP2', 'HP3', 'TW', 'SW1', 'SW2', 'SW3']
users = ['A', 'B', 'C', 'D']
data = [
    [4,0,0,5,1,0,0],
    [5,5,4,0,0,0,0],
    [0,0,0,2,4,5,0],
    [0,3,0,0,0,0,3]
]
CollaborativeFiltering(items=items, data=data, users=users).calculate_jaccard_similarity()

Calculating Jaccard Similarity


Unnamed: 0,HP1,HP2,HP3,TW,SW1,SW2,SW3
A,4,0,0,5,1,0,0
B,5,5,4,0,0,0,0
C,0,0,0,2,4,5,0
D,0,3,0,0,0,0,3


Eq(sim(A,B), r_{A} ∩ r_{B}/r_{A} ∪ r_{B})

Eq(sim(A,B), 1/5)

Eq(sim(A,B), 0.2)


---------------------------------------------



Eq(sim(A,C), r_{A} ∩ r_{C}/r_{A} ∪ r_{C})

Eq(sim(A,C), 2/4)

Eq(sim(A,C), 0.5)


---------------------------------------------



Eq(sim(A,D), r_{A} ∩ r_{D}/r_{A} ∪ r_{D})

Eq(sim(A,D), 0/5)

Eq(sim(A,D), 0.0)


---------------------------------------------



Eq(sim(B,C), r_{B} ∩ r_{C}/r_{B} ∪ r_{C})

Eq(sim(B,C), 0/6)

Eq(sim(B,C), 0.0)


---------------------------------------------



Eq(sim(B,D), r_{B} ∩ r_{D}/r_{B} ∪ r_{D})

Eq(sim(B,D), 1/4)

Eq(sim(B,D), 0.25)


---------------------------------------------



Eq(sim(C,D), r_{C} ∩ r_{D}/r_{C} ∪ r_{D})

Eq(sim(C,D), 0/5)

Eq(sim(C,D), 0.0)


---------------------------------------------



In [130]:
CollaborativeFiltering(items=items, data=data, users=users).calculate_cosine_similarity()

Calculating Cosine Similarity


Unnamed: 0,HP1,HP2,HP3,TW,SW1,SW2,SW3
A,4,0,0,5,1,0,0
B,5,5,4,0,0,0,0
C,0,0,0,2,4,5,0
D,0,3,0,0,0,0,3


Eq(sim(A,B), cos(r_{A}, r_{B}))

Eq(sim(A,B), r_{A}. r_{B}/|r_{A}| |r_{B}|)

Eq(sim(A,B), 0.38)


---------------------------------------------



Eq(sim(A,C), cos(r_{A}, r_{C}))

Eq(sim(A,C), r_{A}. r_{C}/|r_{A}| |r_{C}|)

Eq(sim(A,C), 0.32)


---------------------------------------------



Eq(sim(A,D), cos(r_{A}, r_{D}))

Eq(sim(A,D), r_{A}. r_{D}/|r_{A}| |r_{D}|)

Eq(sim(A,D), 0.0)


---------------------------------------------



Eq(sim(B,C), cos(r_{B}, r_{C}))

Eq(sim(B,C), r_{B}. r_{C}/|r_{B}| |r_{C}|)

Eq(sim(B,C), 0.0)


---------------------------------------------



Eq(sim(B,D), cos(r_{B}, r_{D}))

Eq(sim(B,D), r_{B}. r_{D}/|r_{B}| |r_{D}|)

Eq(sim(B,D), 0.44)


---------------------------------------------



Eq(sim(C,D), cos(r_{C}, r_{D}))

Eq(sim(C,D), r_{C}. r_{D}/|r_{C}| |r_{D}|)

Eq(sim(C,D), 0.0)


---------------------------------------------



In [133]:
CollaborativeFiltering(items=items, data=data, users=users).calculate_centered_cosine_similarity()

Calculating Cosine Similarity

 CF matrix


Unnamed: 0,HP1,HP2,HP3,TW,SW1,SW2,SW3
A,4,0,0,5,1,0,0
B,5,5,4,0,0,0,0
C,0,0,0,2,4,5,0
D,0,3,0,0,0,0,3



 CF matrix normalized by mean


Unnamed: 0,HP1,HP2,HP3,TW,SW1,SW2,SW3
A,0.67,0.0,0.0,1.67,-2.33,0.0,0.0
B,0.33,0.33,-0.67,0.0,0.0,0.0,0.0
C,0.0,0.0,0.0,-1.67,0.33,1.33,0.0
D,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Eq(sim(A,B), cos(r_{A}, r_{B}))

Eq(sim(A,B), r_{A}. r_{B}/|r_{A}| |r_{B}|)

sim(A,B) = 0.09


---------------------------------------------



Eq(sim(A,C), cos(r_{A}, r_{C}))

Eq(sim(A,C), r_{A}. r_{C}/|r_{A}| |r_{C}|)

sim(A,C) = -0.56


---------------------------------------------



Eq(sim(A,D), cos(r_{A}, r_{D}))

Eq(sim(A,D), r_{A}. r_{D}/|r_{A}| |r_{D}|)



sim(A,D) = nan


---------------------------------------------



Eq(sim(B,C), cos(r_{B}, r_{C}))

Eq(sim(B,C), r_{B}. r_{C}/|r_{B}| |r_{C}|)

sim(B,C) = 0.0


---------------------------------------------



Eq(sim(B,D), cos(r_{B}, r_{D}))

Eq(sim(B,D), r_{B}. r_{D}/|r_{B}| |r_{D}|)

sim(B,D) = nan


---------------------------------------------



Eq(sim(C,D), cos(r_{C}, r_{D}))

Eq(sim(C,D), r_{C}. r_{D}/|r_{C}| |r_{D}|)

sim(C,D) = nan


---------------------------------------------

