# Filtragem Colaborativa

In [11]:
import numpy as np  
import pandas as pd  
import matplotlib.pyplot as plt  
import seaborn as sb  
from scipy.io import loadmat  
%matplotlib inline


## Conjunto de dados de classifiçãoes de Filmes

**Y** é uma matrix que contém o **número de filmes x número de usuários**.

**R** é uma matriz binária (0 ou 1) que indica **se um usuáio já avaliou um filme**

In [12]:
data = loadmat('data/ex8_movies.mat')  

Y = data['Y']  
R = data['R']  
Y.shape, R.shape  

((1682, 943), (1682, 943))

In [13]:
dataY = pd.DataFrame(Y)
dataY.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,933,934,935,936,937,938,939,940,941,942
0,5,4,0,0,4,4,0,0,0,4,...,2,3,4,0,4,0,0,5,0,0
1,3,0,0,0,3,0,0,0,0,0,...,4,0,0,0,0,0,0,0,0,5
2,4,0,0,0,0,0,0,0,0,0,...,0,0,4,0,0,0,0,0,0,0
3,3,0,0,0,0,0,5,0,0,4,...,5,0,0,0,0,0,2,0,0,0
4,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [14]:
dataR = pd.DataFrame(R)
dataR.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,933,934,935,936,937,938,939,940,941,942
0,1,1,0,0,1,1,0,0,0,1,...,1,1,1,0,1,0,0,1,0,0
1,1,0,0,0,1,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,1
2,1,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
3,1,0,0,0,0,0,1,0,0,1,...,1,0,0,0,0,0,1,0,0,0
4,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [15]:
media_avaliacoes_filmes = Y[1,R[1,:]].mean()
media_avaliacoes_filmes

2.5832449628844114

### Algoritmo de aprendizagem de Filtragem colaborativa

In [48]:
def custo_filtragem_colaborativa(parametros, Y, R, qtd_caracteristicas):  
    Y = np.matrix(Y)
    R = np.matrix(R)
    
    qtd_filmes = Y.shape[0]
    qtd_usuarios = Y.shape[1]
    
    
    # redimensionando o vetor de paramtros
    X = np.matrix(np.reshape(parametros[:qtd_filmes * qtd_caracteristicas], (qtd_filmes, qtd_caracteristicas)))  
    Theta = np.matrix(np.reshape(parametros[qtd_filmes * qtd_caracteristicas:], (qtd_usuarios, qtd_caracteristicas)))

    J = 0
    X_grad = np.zeros(X.shape)
    Theta_grad = np.zeros(Theta.shape)
    
    # Calculando o erro
    erro = np.multiply((X * Theta.T) - Y, R)
    erro_quadratico = np.power(erro, 2)
    J = (1. / 2) * np.sum(erro_quadratico)
        
    
    # Gradientes
    X_grad = (erro * Theta)
    Theta_grad = (erro.T * X)
    
    grad = np.concatenate((np.ravel(X_grad), np.ravel(Theta_grad)))
    
    return J, grad

In [51]:
usuarios = 4  
filmes = 5  
caracteristicas = 3

dados_parametros = loadmat('data/ex8_movieParams.mat')  

X = dados_parametros['X']  
Theta = dados_parametros['Theta']

X_sub = X[:filmes, :caracteristicas]  
Theta_sub = Theta[:usuarios, :caracteristicas]  
Y_sub = Y[:filmes, :usuarios]  
R_sub = R[:filmes, :usuarios]

parametros = np.concatenate((np.ravel(X_sub), np.ravel(Theta_sub)))
custo, grad = custo_filtragem_colaborativa(parametros, Y_sub, R_sub, caracteristicas)  

custo

22.224603725685675