In [1]:
import numpy as np

In [47]:
R = np.array([[5,3,4,4,np.nan], 
             [3,1,2,3,3],
             [4,3,4,3,5],
             [3,3,1,5,4],
             [1,5,5,2,1]])

User-based prediction: 

$$ r_{ui}^* \triangleq \overline{r_u} + \frac{\sum_{v \in \textrm{p_users}} Corr(u,v)(r_{vi} - \overline{r_v})}{\left|\left| \textrm{p_users} \right|\right|} $$

Parameters:
<ol>
    <li> $u$: user of prediction.</li>
    <li> $i$: item whose rating from user $u$ is unknown.</li>
    <li> $\mathbf{R}$: matrix of ratings. 
        <ul>
        <li> <strong> Not the rate matrix in markov jump processes</strong>. </li> 
        <li> <strong> $R_{ij}$ = rating from user $i$ on item $j$ </strong> </li>
        </ul>
    </li>
    <li> $t$: threshold of user correlation to be involved in predictions.</li>
    <li> <code>method</code>: Usual pearson correlation (<code>p</code>, default) or cosine similarity. </li>
    <li> <code>corr</code>: Return correlation matrix if <code>True</code>. (Default <code>False</code>). </li>
</ol>

In [85]:
def predict_u(u, i, R, t = 0, method = 'p', corr = False):
    assert not R[u, i] < np.inf, "User {} has already rated item {} with rating {}!".format(u, i, R[u,i])
    if method == 'p':
        import pandas as pd
        # calculate (Nan-tolerant) correlation matrix
        SS = pd.DataFrame(R.T).corr().values
    elif method == 'c':
        from numpy.linalg import norm
        SS = np.zeros((len(R), len(R)))
        for i in range(len(R)):
            for j in np.arange(i+1, len(R), 1):
                both = (R[i] < np.inf) * (R[j] < np.inf)
                u_, v_ = R[i,both], R[j,both]
                SS[i,j] = (u_ @ v_)/(norm(u_) * norm(v_))
        SS = SS + SS.T + np.eye(len(R))
    # check which users have significant correlation with user u
    p = (SS[u, :] > t) * (SS[u, :] != 1)
    p_users = np.where(p)[0]
    # extract correlation between user u and such users
    S = SS[u, p_users]
    m = np.nanmean(R, axis = 1)
    np.sum(S*(R[p_users,i] - m[p_users]))
    pred = m[u] + np.sum(S*(R[p_users,i] - m[p_users]))/np.sum(S)
    if corr == False:
        return pred
    else:
        return pred, SS

In [86]:
predict_u(0, 4, R, t=0.9, method='c')

4.902580043392168

In [105]:
def predict_i(u, i, R, t = 0, method = 'p', corr = False):
    assert not R[u, i] < np.inf, "User {} has already rated item {} with rating {}!".format(u, i, R[u,i])
    if method == 'p':
        import pandas as pd
        # calculate (Nan-tolerant) correlation matrix
        SS = pd.DataFrame(R).corr().values
    elif method == 'c':
        from numpy.linalg import norm
        SS = np.zeros((len(R.T), len(R.T)))
        for k in range(len(R.T)):
            for j in np.arange(k+1, len(R.T), 1):
                both = (R.T[k] < np.inf) * (R.T[j] < np.inf)
                u_, v_ = R.T[k,both], R.T[j,both]
                SS[k,j] = (u_ @ v_)/(norm(u_) * norm(v_))
        SS = SS + SS.T + np.eye(len(R.T))
    # check which users have significant correlation with item i
    p = (SS[i, :] > t) * (SS[i, :] != 1)
    p_items = np.where(p)[0]
    # extract correlation between item i and such items
    S = SS[i, p_items]
    m = np.nanmean(R, axis = 0)
    np.sum(S*(R[i, p_items] - m[p_items]))
    pred = m[i] + np.sum(S*(R[u,p_items] - m[p_items]))/np.sum(S)
    if corr == False:
        return pred
    else:
        return pred, SS

In [106]:
predict_i(0, 4, R, t=0.9, method='c')

4.466923907166172