In [None]:
#Libraries and packages
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import os
import nltk
#import cPickle5 as cPickle
from collections import defaultdict
import sys, re
import pandas as pd
from tqdm.notebook import tqdm
import torch 
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader, Subset
import torchvision.models as models
from torchvision import datasets, transforms

#Surprise library
from surprise import accuracy
from surprise.model_selection.validation import cross_validate
from surprise.dataset import Dataset
from surprise.reader import Reader
from surprise import SVD
from surprise.model_selection import train_test_split
from surprise.model_selection import RandomizedSearchCV
from collections import defaultdict
from nltk.stem import WordNetLemmatizer
from sklearn.model_selection import train_test_split as sktrain_test_split
from sklearn.model_selection import KFold
from sklearn.metrics import roc_auc_score

from numba import njit
from tqdm import trange
from sklearn.base import BaseEstimator, ClassifierMixin


In [None]:
class FactorizationMachineClassifier(BaseEstimator, ClassifierMixin):
  def __init__(self, n_iter = 10, n_factors = 10, learning_rate = 0.1, reg_coef = 0.01,
                 reg_factors = 0.01, random_state = 1234, verbose = False):
    self.n_iter = n_iter
    self.verbose = verbose
    self.reg_coef = reg_coef
    self.n_factors = n_factors
    self.reg_factors = reg_factors
    self.random_state = random_state
    self.learning_rate = learning_rate

  def fit(self, X, y):

    n_samples, n_features = X.shape
    self.coef_ = np.zeros(n_features)
    self.intercept_ = 0.0

    np.random.seed(self.random_state)
    self.feature_factors_ = np.random.normal(scale = 1 / np.sqrt(self.n_factors), size = (self.n_factors, n_features))
        
    y = y.copy().astype(np.int32)

    loop = range(self.n_iter)
    if self.verbose:
        loop = trange(self.n_iter)

    self.history_ = []
    for _ in loop:
        loss = _sgd_update(X.data, X.indptr, X.indices,y, n_samples, n_features,self.intercept_, self.coef_,self.feature_factors_, self.n_factors,self.learning_rate, self.reg_coef, self.reg_factors)
        self.history_.append(loss)

    return self

  def predict_proba(self, X):
    pred = self._predict(X)
    return pred

  def _predict(self, X):
    linear_output = X * self.coef_
    v = self.feature_factors_.T
    term = (X * v) ** 2 - (X.power(2) * (v ** 2))
    factor_output = 0.5 * np.sum(term, axis = 1)
    return self.intercept_ + linear_output + factor_output

  def predict(self, X):
    pred_proba = self.predict_proba(X)
    return pred_proba.astype(np.int)


@njit
def _sgd_update(data, indptr, indices, y, n_samples, n_features,
                w0, w, v, n_factors, learning_rate, reg_w, reg_v):
    """
    Compute the loss of the current iteration and update
    gradients accordingly.
    """
  loss = 0.0
  for i in range(n_samples):
    pred, summed = _predict_instance(data, indptr, indices, w0, w, v, n_factors, i)
        
    loss += _log_loss(pred, y[i])
    loss_gradient = 2*(pred-y[i])
    
    w0 -= learning_rate * loss_gradient

    for index in range(indptr[i], indptr[i + 1]):
      feature = indices[index]
      w[feature] -= learning_rate * (loss_gradient * data[index] + 2 * reg_w * w[feature])

    for factor in range(n_factors):
      for index in range(indptr[i], indptr[i + 1]):
        feature = indices[index]
        term = summed[factor] - v[factor, feature] * data[index]
        v_gradient = loss_gradient * data[index] * term
        v[factor, feature] -= learning_rate * (v_gradient + 2 * reg_v * v[factor, feature])
    
  loss /= n_samples
  return loss


@njit
def _predict_instance(data, indptr, indices, w0, w, v, n_factors, i):
    """predicting a single instance"""
  summed = np.zeros(n_factors)
  summed_squared = np.zeros(n_factors)

  pred = w0
  for index in range(indptr[i], indptr[i + 1]):
    feature = indices[index]
    pred += w[feature] * data[index]

  for factor in range(n_factors):
    for index in range(indptr[i], indptr[i + 1]):
      feature = indices[index]
      term = v[factor, feature] * data[index]
      summed[factor] += term
      summed_squared[factor] += term * term

    pred += 0.5 * (summed[factor] * summed[factor] - summed_squared[factor])
    
  return pred, summed


@njit
def _log_loss(pred, y):
  return (pred-y)**2