# Cosine Similarity

Algorithm is testing how similary between 2 entities (user-user or item-item). 

In [1]:
import pandas as pd
import numpy as np

## Model Explain

In [2]:
from bokeh.plotting import figure, show
from bokeh.io import output_notebook

output_notebook()

In [3]:
x = np.linspace(-np.pi, np.pi, 100)
y = np.cos(x)

p = figure(plot_width=600, plot_height=300, title='Cosine of Unit circle')
p.line(x=x, y=y)

p.y_range.start = 0.
p.x_range.start = -np.pi / 2
p.x_range.end = np.pi / 2
show(p)

## Apply Cosine to 2D-point

\begin{equation}
\begin{split}
cos(A - B) & = cos(A)cos(B) + sin(A)sin(B)\\
 & = (\frac{A_x}{\sqrt{A_x ^ 2 + A_y ^ 2}} * \frac{B_x}{\sqrt{B_x ^ 2 + B_y ^ 2}}) + (\frac{A_y}{\sqrt{A_x ^ 2 + A_y ^ 2}} * \frac{B_y}{\sqrt{B_x ^ 2 + B_y ^ 2}}) \\
 & = \frac{A_x B_x + A_y B_y}{\sqrt{A_x ^ 2 + A_y ^ 2}{\sqrt{B_x ^ 2 + B_y ^ 2}}}
\end{split}
\end{equation}

## Cosine of vector

\begin{equation}
cos(V_A, V_B) = \frac{\sum_{}{A_i, B_i}}{\sqrt{\sum_{}{A_i ^ 2}}\sqrt{\sum_{}{B_i ^ 2}}} \\
When \; A_i \text{is i-th member of}\; V_A\; and \; B_i \text{is i-th member of}\; V_B
\end{equation}

In [4]:
ratings = pd.read_csv('../ratings.csv')
ratings['count'] = 1

rating_matrix = ratings.pivot(columns='member', index='follower_id', values='count')
rating_matrix.fillna(0, inplace=True)
rating_matrix.iloc[:10]

member,Can,Cherprang,Izurina,Jaa,Jan,Jane,Jennis,Jib,Kaew,Kaimook,...,Music,Namneung,Namsai,Nink,Noey,Orn,Piam,Pun,Satchan,Tarwaan
follower_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
758518,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
989241,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3219851,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3546211,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3957551,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4476611,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4541451,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4581431,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5060291,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5202411,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Item-Item

### Normalize Data::User-wise L2 Norm

Reduce effect of `DD` or ones who oshi(love) so many member and boost up for who love very few group of them.

\begin{equation*}
\sqrt{\sum_{i=0}^{n}{R_i^2}} \quad\quad \text{for}\; R_i \in \text{Rating of}\; I_u
\end{equation*}

In [5]:
from math import sqrt
from bokeh.palettes import Plasma

p = figure(plot_width=500, plot_height=450, title='L2-Norm on unary data')

x = np.arange(1, 10)

for i in x:
    p.line(y=[(1. / np.sqrt(i)) if j <= i else 0. for j in x], x=x, color=Plasma[10][i])

# p.line(x=x, y=(1. / np.sqrt(x)), color=Plasma[10][0])

p.y_range.start = 0

show(p)

In [6]:
user_norm = 1 / (rating_matrix).sum(axis=1) ** (0.5)
l2norm_rating = rating_matrix.mask(rating_matrix > 0, user_norm, axis=0)
l2norm_rating.iloc[2345:2358]

member,Can,Cherprang,Izurina,Jaa,Jan,Jane,Jennis,Jib,Kaew,Kaimook,...,Music,Namneung,Namsai,Nink,Noey,Orn,Piam,Pun,Satchan,Tarwaan
follower_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
99721980,0.0,0.707107,0.0,0.0,0.0,0.0,0.0,0.707107,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
99843951,0.0,0.707107,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.707107,0.0,0.0
99845163,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
99865778,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
99868180,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
99880710,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
99885582,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
99900710,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
99948923,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
99958903,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Similarity Function :: Cosine Simiarlity

\begin{equation*}
\cos({\theta})=\frac{\sum_{u \in P}{R_{i,u} R_{j,u}}}{\sqrt{\sum_{u \in P}{R_{i,u}}^2}\sqrt{\sum_{u \in P}{R_{j,u}}^2}}\\
\text{When P is set of users}
\end{equation*}

In [7]:
ratings = l2norm_rating.values.transpose()
l2norm = np.sqrt(np.sum(ratings ** 2, axis=1))

sim_values = (
    np.dot(ratings, ratings.transpose()) / 
    (l2norm * l2norm[np.newaxis].T)
)
item_sim_df = pd.DataFrame(sim_values, index=rating_matrix.columns, columns=rating_matrix.columns)

In [8]:
item_sim_df

member,Can,Cherprang,Izurina,Jaa,Jan,Jane,Jennis,Jib,Kaew,Kaimook,...,Music,Namneung,Namsai,Nink,Noey,Orn,Piam,Pun,Satchan,Tarwaan
member,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Can,1.0,0.027634,0.010673,0.027043,0.033927,0.019522,0.021931,0.011006,0.04144,0.021587,...,0.028931,0.04432,0.0384,0.013257,0.035738,0.025128,0.012719,0.03413,0.006843,0.046261
Cherprang,0.027634,1.0,0.037515,0.067094,0.161402,0.037378,0.066059,0.013933,0.156627,0.109067,...,0.28228,0.0923,0.078244,0.012351,0.10143,0.185265,0.025895,0.249634,0.023969,0.128286
Izurina,0.010673,0.037515,1.0,0.026361,0.037411,0.016193,0.016184,0.013238,0.033331,0.028479,...,0.048287,0.029449,0.023934,0.010639,0.024136,0.0286,0.018569,0.027704,0.013767,0.030038
Jaa,0.027043,0.067094,0.026361,1.0,0.097439,0.071486,0.059595,0.026486,0.113775,0.118397,...,0.102538,0.163259,0.167227,0.040588,0.100224,0.100514,0.036849,0.072928,0.026334,0.134082
Jan,0.033927,0.161402,0.037411,0.097439,1.0,0.050586,0.082087,0.020478,0.261984,0.141374,...,0.214094,0.167254,0.100709,0.023864,0.152507,0.140364,0.033563,0.175671,0.029952,0.184268
Jane,0.019522,0.037378,0.016193,0.071486,0.050586,1.0,0.028015,0.015605,0.045545,0.05055,...,0.046717,0.062986,0.065372,0.04958,0.044961,0.045761,0.024964,0.041345,0.009051,0.070521
Jennis,0.021931,0.066059,0.016184,0.059595,0.082087,0.028015,1.0,0.014299,0.090632,0.071019,...,0.086596,0.087557,0.062232,0.009142,0.07007,0.063912,0.017736,0.087634,0.015565,0.096228
Jib,0.011006,0.013933,0.013238,0.026486,0.020478,0.015605,0.014299,1.0,0.020686,0.025581,...,0.021527,0.029261,0.03017,0.020541,0.017274,0.013954,0.03814,0.013143,0.009356,0.027059
Kaew,0.04144,0.156627,0.033331,0.113775,0.261984,0.045545,0.090632,0.020686,1.0,0.157785,...,0.199538,0.198013,0.113517,0.023552,0.156947,0.172139,0.033698,0.176547,0.031583,0.242794
Kaimook,0.021587,0.109067,0.028479,0.118397,0.141374,0.05055,0.071019,0.025581,0.157785,1.0,...,0.166355,0.146993,0.127518,0.020899,0.114367,0.129975,0.033063,0.124024,0.022843,0.166804


In [9]:
item_sim_df['Jan'].sort_values(ascending=False)

member
Jan          1.000000
Kaew         0.261984
Music        0.214094
Tarwaan      0.184268
Mind         0.181907
Pun          0.175671
Namneung     0.167254
Cherprang    0.161402
Noey         0.152507
Kaimook      0.141374
Orn          0.140364
Kate         0.113338
Mobile       0.110738
Namsai       0.100709
Jaa          0.097439
Korn         0.096807
Jennis       0.082087
Jane         0.050586
Izurina      0.037411
Can          0.033927
Piam         0.033563
Satchan      0.029952
Miori        0.025922
Nink         0.023864
Maysa        0.022733
Jib          0.020478
Name: Jan, dtype: float64

### Load member images

In [10]:
# %load ../utility/member_display.py
from operator import itemgetter

import requests

IMAGE_MAX_WIDTH = 150
members = requests.get('https://www.api.bnk48.com/api/members')

member_images = {
    element['slug'].capitalize(): element['avatar_image']
    for element in members.json()['members']
}

def show_rank(ranks, n=5):
    selected_items = ranks[:n]
    max_size = max(selected_items, key=itemgetter(1))[1]

    def img_tag(name, size):
        image_size = max(int(size / max_size * IMAGE_MAX_WIDTH), IMAGE_MAX_WIDTH / 2)
        return f'''<img src="{member_images[name]}" 
            alt="{name} = {size * 100}%" 
            style="width: {image_size}px; display: inline-block;"/>'''

    return ' '.join([img_tag(*member) for member in ranks[:n]])

## Top 5 Similarity to Saint'Jan

{{ show_rank(list(item_sim_df['Jan'].sort_values(ascending=False).iteritems())[1:], n=5) }}

## Top 5 Similarity to Pun

{{ show_rank(list(item_sim_df['Pun'].sort_values(ascending=False).iteritems())[1:], n=5) }}

### Rating Function

\begin{equation*}
\text{S}(u,i) = \frac{\sum_{j \in N} Sim_{i,j} r_{ui}}{\sum_{j \in N}{|Sim_{i,j}|}} \quad \text{When N is set of rated items}
\end{equation*}

In [11]:
def predict(similarity_matrix, oshi_mems, target):
    weight_vec = similarity_matrix[target]
    return weight_vec.loc[oshi_mems].sum() / (weight_vec.sum() - weight_vec.loc[target])

In [12]:
predict(item_sim_df, ['Cherprang', 'Music'], 'Jan')

0.14114359560025197

In [13]:
def recommend(similarity_matrix, oshi_mems, n=5):
    other_members = list(set(similarity_matrix.index) - set(oshi_mems))
    scores = [
        (member, predict(similarity_matrix, oshi_mems, member))
        for member in other_members
    ]
    
    return sorted(scores, key=lambda x: x[1], reverse=True)[:n]

### Recommendation
What if I like `Cherprang`, `Kaew` and `Tarwaan`

In [14]:
result = recommend(item_sim_df, {'Music', 'Kaew', 'Tarwaan'}, n=20)

In [15]:
result

[('Jan', 0.24821481838842668),
 ('Cherprang', 0.24547201251752609),
 ('Pun', 0.22635969677405124),
 ('Orn', 0.21428872115687819),
 ('Mind', 0.206317933938203),
 ('Kaimook', 0.20527530710096173),
 ('Jennis', 0.20332036511323096),
 ('Noey', 0.20267533094597537),
 ('Mobile', 0.20215426297696767),
 ('Namneung', 0.19890606531069013),
 ('Satchan', 0.19812932646146322),
 ('Izurina', 0.18208598846017296),
 ('Can', 0.17790867331049651),
 ('Miori', 0.172909345121632),
 ('Kate', 0.17049731358418688),
 ('Namsai', 0.16973726522191582),
 ('Korn', 0.16828387631569269),
 ('Jaa', 0.16055099254092248),
 ('Maysa', 0.15335008946197462),
 ('Jane', 0.15146674766729157)]

{{show_rank(result, n=5)}}

# User-User

## Remove user with too few ratings

In [16]:
tan_oshis = rating_matrix[rating_matrix.sum(axis=1) == 1.].index
active_ratings = rating_matrix.drop(tan_oshis)
active_ratings

member,Can,Cherprang,Izurina,Jaa,Jan,Jane,Jennis,Jib,Kaew,Kaimook,...,Music,Namneung,Namsai,Nink,Noey,Orn,Piam,Pun,Satchan,Tarwaan
follower_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
5202411,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8085222,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9366932,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9594042,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0
9990712,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
10650962,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
12156692,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
12157412,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
12791262,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
12815372,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,...,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0


## Optimize with sparse matrix

In [31]:
active_ratio = active_ratings.values.sum() / active_ratings.size
f'Only {active_ratings.values.sum()} cells of {active_ratings.size} has value ({active_ratio * 100}%)'

'Only 95900.0 cells of 523380 has value (18.32320684779701%)'

Instead of keep all values in memory, we can keep store only value & position active cell (data != 0).

In [18]:
from scipy.sparse import coo_matrix

ITEM_INDEX = active_ratings.columns
USER_INDEX = active_ratings.index

def to_sparse(rating_df):
    return coo_matrix(rating_df.values)

def to_dataframe(sparse_ratings):
    return pd.DataFrame(
        sparse_ratings.todense(), index=USER_INDEX, columns=ITEM_INDEX
    )

ratings = to_sparse(active_ratings)

In [19]:
l2norm_rating = active_ratings / (active_ratings).sum(axis=0) ** (0.5)
l2norm_rating.max(axis=0).sort_values(ascending=False)

member
Jib          0.090536
Nink         0.082761
Maysa        0.079057
Satchan      0.058124
Miori        0.057260
Can          0.048280
Piam         0.042563
Jane         0.038925
Jaa          0.026153
Namsai       0.023525
Kate         0.022092
Korn         0.020712
Mind         0.019062
Namneung     0.018337
Kaimook      0.017689
Jennis       0.016772
Noey         0.015863
Mobile       0.015831
Tarwaan      0.014947
Izurina      0.014464
Orn          0.013251
Kaew         0.012548
Jan          0.012239
Music        0.009934
Pun          0.009380
Cherprang    0.007990
dtype: float64

## Normalization

Reduce effect of popular items and offer something niche but more relevant.

In [20]:
def normalize(matrix):
    matrix = coo_matrix(matrix)
    
    norms = np.sqrt(ratings.power(2).sum(axis=0))
    norms = np.squeeze(np.asarray(norms))
    matrix.data = matrix.data / norms[matrix.col]
    return matrix

normalized = normalize(ratings)
normalized.data

array([ 0.00799029,  0.01223888,  0.01446392, ...,  0.04256283,
        0.00937986,  0.01494703])

## Similarity Matrix

In [21]:
l2norm = np.sqrt(np.sum(normalized.power(2), axis=1))

similarity = (
    np.dot(normalized, normalized.T) / 
    (l2norm * l2norm.T)
)

In [22]:
MAX_PAIRS = 200
MIN_SIMILARITY = .4

def predict(user, item, rating_matrix, similarity):
    user_idx = USER_INDEX.get_loc(user)
    user_sims = np.squeeze(np.asarray(similarity[user_idx]))
    user_sims = pd.Series(
        np.where((user_sims >= 1.) | (user_sims < MIN_SIMILARITY), 0., user_sims),
        index=USER_INDEX
    ).sort_values(ascending=False)[:MAX_PAIRS]
    
    item_ratings = rating_matrix.loc[user_sims.index][item]
    return (user_sims * item_ratings).sum() / user_sims.sum()

In [23]:
user = 892030302792568833 # Music, Keaw, Tarwaan
predict(user, 'Jan', active_ratings, similarity)

0.22026623943404225

In [24]:
def recommend(user, rating_matrix, similarity):
    user_rating = rating_matrix.loc[user]
    non_liked_items = user_rating[user_rating < 1.].index

    scores = [
        (member, predict(user, member, rating_matrix, similarity)) 
        for member in non_liked_items
    ]
    
    return sorted(scores, key=lambda x: x[1], reverse=True)

In [25]:
result = recommend(user, active_ratings, similarity)
result

[('Cherprang', 0.49263833819171676),
 ('Pun', 0.2866058201146374),
 ('Jan', 0.22026623943404225),
 ('Orn', 0.08004589163569609),
 ('Izurina', 0.00962393291318067),
 ('Noey', 0.009397954315789568),
 ('Mobile', 0.004903216637632995),
 ('Jennis', 0.004803384052463453),
 ('Kaimook', 0.004706647625903431),
 ('Can', 0.0),
 ('Jaa', 0.0),
 ('Jane', 0.0),
 ('Jib', 0.0),
 ('Kate', 0.0),
 ('Korn', 0.0),
 ('Maysa', 0.0),
 ('Mind', 0.0),
 ('Miori', 0.0),
 ('Namneung', 0.0),
 ('Namsai', 0.0),
 ('Nink', 0.0),
 ('Piam', 0.0),
 ('Satchan', 0.0)]

## Top Recommend for User who liked {Music, Keaw, Tarwaan}
{{show_rank(result, n=5) }}

In [26]:
rating_matrix[(rating_matrix['Mind'] == 1.) & (rating_matrix['Kaimook'] == 1.)
              & (rating_matrix.sum(axis=1) == 2)]

member,Can,Cherprang,Izurina,Jaa,Jan,Jane,Jennis,Jib,Kaew,Kaimook,...,Music,Namneung,Namsai,Nink,Noey,Orn,Piam,Pun,Satchan,Tarwaan
follower_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
882027016354017280,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
905467375016742912,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
913836977480359936,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
958152522438344713,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [27]:
recommend(905467375016742912, active_ratings, similarity)

[('Cherprang', 0.7041801425764659),
 ('Music', 0.5574931183620865),
 ('Pun', 0.4954418276757902),
 ('Jan', 0.4053811731475094),
 ('Kaew', 0.38982324667300833),
 ('Tarwaan', 0.2714654617639244),
 ('Mobile', 0.19321509524909217),
 ('Orn', 0.17409128646258845),
 ('Izurina', 0.13357926135092593),
 ('Jennis', 0.12355682700742511),
 ('Noey', 0.11096815916337688),
 ('Namneung', 0.03963094270706811),
 ('Namsai', 0.01418752459510672),
 ('Kate', 0.009315298830346219),
 ('Jaa', 0.004833302362300896),
 ('Can', 0.0),
 ('Jane', 0.0),
 ('Jib', 0.0),
 ('Korn', 0.0),
 ('Maysa', 0.0),
 ('Miori', 0.0),
 ('Nink', 0.0),
 ('Piam', 0.0),
 ('Satchan', 0.0)]

{{ show_rank(recommend(905467375016742912, active_ratings, similarity), n=5) }}

In [28]:
user_idx = USER_INDEX.get_loc(905467375016742912)
user_sims = np.squeeze(np.asarray(similarity[user_idx]))
user_sims = pd.Series(
    np.where((user_sims >= 1.) | (user_sims < MIN_SIMILARITY), 0., user_sims),
    index=USER_INDEX
).sort_values(ascending=False)[:MAX_PAIRS]

In [29]:
active_ratings.loc[user_sims.index].sum(axis=0)

member
Can            0.0
Cherprang    144.0
Izurina       27.0
Jaa            1.0
Jan           83.0
Jane           0.0
Jennis        26.0
Jib            0.0
Kaew          80.0
Kaimook      157.0
Kate           2.0
Korn           0.0
Maysa          0.0
Mind         179.0
Miori          0.0
Mobile        40.0
Music        113.0
Namneung       8.0
Namsai         3.0
Nink           0.0
Noey          23.0
Orn           35.0
Piam           0.0
Pun          100.0
Satchan        0.0
Tarwaan       56.0
dtype: float64

In [30]:
active_ratings.sum(axis=0).sort_values(ascending=False) / len(active_ratings.index) * 100

member
Cherprang    77.809240
Pun          56.462991
Music        50.337804
Jan          33.164431
Kaew         31.549925
Orn          28.291108
Izurina      23.745653
Tarwaan      22.235469
Mobile       19.821162
Noey         19.741679
Jennis       17.660209
Kaimook      15.876801
Namneung     14.773969
Mind         13.671138
Korn         11.579732
Kate         10.178838
Namsai        8.976652
Jaa           7.262792
Jane          3.278689
Piam          2.742176
Can           2.131148
Miori         1.515152
Satchan       1.470442
Maysa         0.794834
Nink          0.725286
Jib           0.606061
dtype: float64