## Factorization Machines in Python
---
https://www.csie.ntu.edu.tw/~b97053/paper/Rendle2010FM.pdf

In [3]:
from pyfm import pylibfm
import numpy as np
from sklearn.feature_extraction import DictVectorizer

In [5]:
train = [
    {"user": "1", "item": "5", "age": 19},
    {"user": "2", "item": "43", "age": 33},
    {"user": "3", "item": "20", "age": 55},
    {"user": "4", "item": "10", "age": 20},
]

In [6]:
v = DictVectorizer()

In [7]:
X = v.fit_transform(train)

In [11]:
X.toarray()

array([[ 19.,   0.,   0.,   0.,   1.,   1.,   0.,   0.,   0.],
       [ 33.,   0.,   0.,   1.,   0.,   0.,   1.,   0.,   0.],
       [ 55.,   0.,   1.,   0.,   0.,   0.,   0.,   1.,   0.],
       [ 20.,   1.,   0.,   0.,   0.,   0.,   0.,   0.,   1.]])

In [14]:
v.get_feature_names()

['age',
 'item=10',
 'item=20',
 'item=43',
 'item=5',
 'user=1',
 'user=2',
 'user=3',
 'user=4']

In [16]:
y = np.repeat(1.0,X.shape[0])

In [17]:
print(y)

[ 1.  1.  1.  1.]


In [30]:
fm = pylibfm.FM(num_iter=100, task='classification')

In [31]:
fm.fit(X,y)

Creating validation dataset of 0.01 of training for adaptive regularization
-- Epoch 1
Training log loss: 0.37518
-- Epoch 2
Training log loss: 0.00898
-- Epoch 3
Training log loss: 0.00796
-- Epoch 4
Training log loss: 0.00715
-- Epoch 5
Training log loss: 0.00650
-- Epoch 6
Training log loss: 0.00595
-- Epoch 7
Training log loss: 0.00549
-- Epoch 8
Training log loss: 0.00510
-- Epoch 9
Training log loss: 0.00476
-- Epoch 10
Training log loss: 0.00446
-- Epoch 11
Training log loss: 0.00420
-- Epoch 12
Training log loss: 0.00397
-- Epoch 13
Training log loss: 0.00376
-- Epoch 14
Training log loss: 0.00357
-- Epoch 15
Training log loss: 0.00340
-- Epoch 16
Training log loss: 0.00325
-- Epoch 17
Training log loss: 0.00311
-- Epoch 18
Training log loss: 0.00298
-- Epoch 19
Training log loss: 0.00286
-- Epoch 20
Training log loss: 0.00275
-- Epoch 21
Training log loss: 0.00265
-- Epoch 22
Training log loss: 0.00255
-- Epoch 23
Training log loss: 0.00247
-- Epoch 24
Training log loss: 0.002

In [32]:
fm.predict(v.transform({"user": "1", "item": "10", "age": 24}))

array([ 0.99992634])

In [37]:
result = fm.predict(v.transform({"user": "1", "item": "10", "age": 24}))[0]
print(round(result, 0))

1.0


## Example using make_classification dataset from sklearn

In [48]:
from sklearn.datasets import make_classification

In [49]:
X, y = make_classification(n_samples=1000,n_features=100, n_clusters_per_class=1)

In [55]:
data = [ {v: k for k, v in dict(zip(i, range(len(i)))).items()} for i in X ]

In [61]:
X_train, X_test, y_train, y_test = train_test_split(data, y, test_size=0.1, random_state=42)

In [62]:
v = DictVectorizer()
X_train = v.fit_transform(X_train)
X_test = v.transform(X_test)

In [63]:
fm = pylibfm.FM(num_factors=50, num_iter=10, verbose=True, task="classification", initial_learning_rate=0.0001, learning_rate_schedule="optimal")

In [64]:
fm.fit(X_train,y_train)

Creating validation dataset of 0.01 of training for adaptive regularization
-- Epoch 1
Training log loss: 1.95837
-- Epoch 2
Training log loss: 1.64273
-- Epoch 3
Training log loss: 1.38121
-- Epoch 4
Training log loss: 1.16588
-- Epoch 5
Training log loss: 0.98915
-- Epoch 6
Training log loss: 0.84433
-- Epoch 7
Training log loss: 0.72480
-- Epoch 8
Training log loss: 0.62565
-- Epoch 9
Training log loss: 0.54304
-- Epoch 10
Training log loss: 0.47396
