In [2]:
import numpy as np
import pandas as pd
import random

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import mean_squared_error

import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader

import warnings
warnings.filterwarnings('ignore')

In [3]:
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

$$
\begin{equation}
\begin{aligned}
\bar{y}(x) &= w_0+\sum_{i=1}^nw_ix_i+\sum_{i=1}^n\sum_{j=i+1}^n<v_i,v_j>x_ix_j \\
&=s_1+s_2
\end{aligned}
\end{equation}
$$

$$
\begin{equation}
\begin{aligned}
s_2 &=\sum_{i=1}^n\sum_{j=i+1}^n<v_i, v_j>x_ix_j  \\
&=\frac{1}{2}\sum_{i=1}^n\sum_{j=1}^n<v_i,v_j>x_ix_j-\frac{1}{2}\sum_{i=1}^n<v_i, v_i>x_ix_i \\
&=\frac{1}{2}(\sum_{i=1}^n\sum_{j=1}^n\sum_{f=1}^k v_{i,f}v_{j,f}x_ix_j-\sum_{i=1}^n\sum_{f=1}^kv_{i,f}v_{i,f}x_ix_i) \\
&=\frac{1}{2}\sum_{f=1}^k((\sum_{i=1}^nv_{i,f}x_i)^2-\sum_{i=1}^nv_{i,f}^2x_i^2) \\
&=\frac{1}{2}(out1-out2)
\end{aligned}
\end{equation}
$$

In [4]:
class FM(nn.Module):
    def __init__(self, n=None, k=None):
        super(FM).__init__()
        self.V = nn.Parameter(torch.randn(n, k), requires_grad=True)
        self.linear = nn.Linear(n, 1)
    
    def forward(self, x):
        out1 = torch.matmul(x, self.V).pow(2).sum(1, keepdim=True)        # out1
        out2 = torch.matmul(x.pow(2), self.V.pow(2)).sum(1, keepdim=True) # out2
        s2 = 1/2. * (out1 - out2)
        s1 = self.linear(x)
        out = s1 + s2
        return out

In [5]:
data = pd.read_csv('data/movielens_sample.txt')

In [6]:
sparse_features = ['movie_id', 'user_id', 'gender', 'age', 'occupation', 'zip']
target = ['rating']

Unnamed: 0,user_id,movie_id,rating,timestamp,title,genres,gender,age,occupation,zip
0,3299,235,4,968035345,Ed Wood (1994),Comedy|Drama,F,25,4,19119
1,3630,3256,3,966536874,Patriot Games (1992),Action|Thriller,M,18,4,77005
2,517,105,4,976203603,"Bridges of Madison County, The (1995)",Drama|Romance,F,25,14,55408
