In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import display
from lightfm.lightfm import LightFM
from lightfm.datasets import fetch_movielens
from lightfm.evaluation import precision_at_k, auc_score, reciprocal_rank
from scipy.sparse import coo_matrix, csr_matrix

from fastFM import datasets, als
from sklearn.cross_validation import train_test_split, cross_val_score
from sklearn.metrics import roc_auc_score, roc_curve, mean_squared_error
from sklearn.preprocessing import OneHotEncoder


%matplotlib inline
sns.set_context('notebook', font_scale=1.6)



In [137]:
class FormatDataFromCOO(object):
    def __init__(self):
        self.encoder = None
        
    def fit(self, coo_matrix):
        encoder = OneHotEncoder(handle_unknown='ignore', n_values=[coo_matrix.shape[0], coo_matrix.shape[1]])
        data = np.array(zip(coo_matrix.row, coo_matrix.col))
        self.encoder = encoder.fit(data)
    
    def transform(self, coo_matrix):
        data = np.array(zip(coo_matrix.row, coo_matrix.col))
        X = self.encoder.transform(data)
        y = coo_matrix.data
        y[y > 0] = 1
        return X, y
    
    def fit_transform(self, coo_matrix):
        self.fit(coo_matrix)
        X, y = self.transform(coo_matrix)
        return X, y        

## dummy data

In [108]:
test = coo_matrix(([1]*3, ([0, 1, 2], [0, 1, 2])), shape=(5,5))
print test.toarray()

[[1 0 0 0 0]
 [0 1 0 0 0]
 [0 0 1 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]]


In [109]:
pp = FormatDataFromCOO()
pp.fit(test)
print pp.transform(test).toarray()

test2 = coo_matrix(([1]*3, ([0, 1, 2], [0, 1, 3])))
print '\n', pp.transform(test2).toarray()

[[ 1.  0.  0.  0.  0.  1.  0.  0.  0.  0.]
 [ 0.  1.  0.  0.  0.  0.  1.  0.  0.  0.]
 [ 0.  0.  1.  0.  0.  0.  0.  1.  0.  0.]]

[[ 1.  0.  0.  0.  0.  1.  0.  0.  0.  0.]
 [ 0.  1.  0.  0.  0.  0.  1.  0.  0.  0.]
 [ 0.  0.  1.  0.  0.  0.  0.  0.  1.  0.]]


In [112]:
print pp.transform(test2).toarray()

[[ 1.  0.  0.  0.  0.  1.  0.  0.  0.  0.]
 [ 0.  1.  0.  0.  0.  0.  1.  0.  0.  0.]
 [ 0.  0.  1.  0.  0.  0.  0.  0.  1.  0.]]


## Movielens data

In [138]:
movielens = fetch_movielens(min_rating=4.0)
train = movielens['train']
test = movielens['test']

print 'train:\n-----\n{}'.format(repr(train))
display(pd.DataFrame(train.todense(), columns=movielens['item_labels']).iloc[:5, :5])
print '\ntest:\n-----\n{}'.format(repr(test))
display(pd.DataFrame(test.todense(), columns=movielens['item_labels']).iloc[:5, :5])

train:
-----
<943x1682 sparse matrix of type '<type 'numpy.int32'>'
	with 49906 stored elements in COOrdinate format>


Unnamed: 0,Toy Story (1995),GoldenEye (1995),Four Rooms (1995),Get Shorty (1995),Copycat (1995)
0,5,0,4,0,0
1,4,0,0,0,0
2,0,0,0,0,0
3,0,0,0,0,0
4,0,0,0,0,0



test:
-----
<943x1682 sparse matrix of type '<type 'numpy.int32'>'
	with 5469 stored elements in COOrdinate format>


Unnamed: 0,Toy Story (1995),GoldenEye (1995),Four Rooms (1995),Get Shorty (1995),Copycat (1995)
0,0,0,0,0,0
1,0,0,0,0,0
2,0,0,0,0,0
3,0,0,0,0,0
4,4,0,0,0,0


In [141]:
pp = FormatDataFromCOO()
X_train, y_train = pp.fit_transform(train)
X_test, y_test = pp.transform(test)

In [144]:
fm = als.FMClassification(n_iter=20, rank=40)
fm.fit(X_train, y_train)
y_pred = fm.predict(X_test)

AssertionError: 

In [147]:
from fastFM import mcmc
fm = mcmc.FMRegression(n_iter=1000, rank=40, init_stdev=0.1)

In [148]:
y_pred = fm.fit_predict(X_train, y_train, X_test)
y_pred_proba = fm.fit_predict_proba(X_train, y_train, X_test)

AttributeError: 'FMRegression' object has no attribute 'fit_predict_proba'

In [150]:
roc_auc_score(y_test, y_pred)

ValueError: Only one class present in y_true. ROC AUC score is not defined in that case.

In [48]:
print 'mse: {}'.format(mean_squared_error(y_test, y_pred))

mse: 0.240806451788


In [21]:
enc = OneHotEncoder()
enc.fit([[0, 0, 3], [1, 1, 0], [0, 2, 1], [1, 0, 2]])

OneHotEncoder(categorical_features='all', dtype=<type 'float'>,
       handle_unknown='error', n_values='auto', sparse=True)

In [22]:
enc.n_values_

array([2, 3, 4])

In [23]:
enc.feature_indices_

array([0, 2, 5, 9])

In [24]:
enc.transform([[0, 1, 1]]).toarray()

array([[ 1.,  0.,  0.,  1.,  0.,  0.,  1.,  0.,  0.]])