# Recommender using Factorization Machine

In [None]:
import pandas as pd
import numpy as np
import datetime
from tqdm import tqdm

## Load the data

In [None]:
# Load Data set
transactions = pd.read_csv("../input/h-and-m-personalized-fashion-recommendations/transactions_train.csv", dtype={'article_id':str})
transactions.drop(['sales_channel_id', 'price'], inplace=True, axis=1)

# Filter transactions by date
start_date = datetime.datetime(2020,9,15) # I replaced 1 by 15 to shorten the data set length
transactions["t_dat"] = pd.to_datetime(transactions["t_dat"])
transactions = transactions.loc[transactions["t_dat"] >= start_date]

# Filter transactions by number of an article has been bought
article_bought_count = transactions[['article_id', 't_dat']].groupby('article_id').count().reset_index().rename(columns={'t_dat': 'count'})
most_bought_articles = article_bought_count[article_bought_count['count']>10]['article_id'].values
transactions = transactions[transactions['article_id'].isin(most_bought_articles)]

transactions = transactions.reset_index(drop=True)
transactions.shape

## Load customers and get last bought article for each one

In [None]:
# Get the customers to perform the predictions lately
customers = pd.read_csv('../input/h-and-m-personalized-fashion-recommendations/sample_submission.csv').customer_id.values

# Get the last bought articles for each customer in the filtered transactions
last_bought_articles = transactions.sort_values(['customer_id', 't_dat'], ascending=False).drop_duplicates(['customer_id'], keep='first')

In [None]:
# Create an array with the last bought article for each customer in the whole customer set
def get_last_bought_article_per_customer(customers, last_bought_articles):
    last_articles = []
    transaction_customers = last_bought_articles.customer_id.values
    for customer in tqdm(customers):
        if customer in transaction_customers:
            last_articles.append(last_bought_articles[last_bought_articles['customer_id'] == customer].article_id.values[0])
        else:
            last_articles.append(None)
    return np.array(last_articles)

In [None]:
# Retrieve the last bought articles
last_articles = get_last_bought_article_per_customer(customers, last_bought_articles)
np.save('data/last_articles.npy', last_articles)
# last_articles = np.load('data/last_articles.npy')

## Compute the default recommendation

In [None]:
# Calculate time decaying popularity. This leads to items bought more recently having more weight in the popularity list.
# In simple words, item A bought 5 times on the first day of the train period is inferior than item B bought 4 times on the last day of the train period.
transactions['pop_factor'] = transactions['t_dat'].apply(lambda x: 1/(datetime.datetime(2020,9,23) - x).days)
transactions_by_article = transactions[['article_id', 'pop_factor']].groupby('article_id').sum().reset_index()
default_recommendation =  transactions_by_article.sort_values(by='pop_factor', ascending=False)['article_id'].values[:12]

## Prepare data and features

In [None]:
# Sort the transactions by customer and date and assign the last item bought to each transaction
transactions = transactions.sort_values(['customer_id', 't_dat'], axis=0)
transactions['last_bought_id'] = pd.concat([pd.Series([transactions['article_id'].values[-1]]), transactions['article_id']])[:-1].values
transactions.drop(['t_dat', 'pop_factor'], inplace=True, axis=1)

In [None]:
# Retrieve the users and articles
customer_values = np.unique(transactions.customer_id.values)
article_values = np.unique(transactions.article_id.values)

# Create functions to map customer and article ids to indices
customer_id2index = {c: i for i, c in enumerate(customer_values)}
article_id2index = {a: i for i, a in enumerate(article_values)}

customer_index2id = {i: c for c, i in customer_id2index.items()}
article_index2id = {i: a for a, i in article_id2index.items()}

In [None]:
# Assign the customer and article indices to the transactions and drop the ids
transactions['customer_index'] = transactions.customer_id.map(customer_id2index)
transactions['article_index'] = transactions.article_id.map(article_id2index)
transactions['last_bought_index'] = transactions.last_bought_id.map(article_id2index)

transactions.drop(['customer_id', 'article_id', 'last_bought_id'], inplace=True, axis=1)

In [None]:
# TODO: Add some features if we want

In [None]:
transactions

## The model

In [None]:
import time

In [None]:
class RecSys_FM:
    def __init__(self, transactions, customer_id2index, customer_index2id, article_id2index, article_index2id, default_recommendation, num_components=20):
        # Save the transactions and split the labels
        self.transactions = transactions

        # Generate negative samples
        self.negative_transactions = self.transactions.copy()

        # Save the default recommendation
        self.default_recommendation = default_recommendation

        # Save customer and article mapping functions
        self.customer_id2index = customer_id2index
        self.customer_index2id = customer_index2id
        self.article_id2index = article_id2index
        self.article_index2id = article_index2id

        # Compute the length of the one hot features
        self.max_feature_values = self.transactions.max().values
        self.dimensionality = self.max_feature_values.sum() + len(self.max_feature_values)

        # Initialize the biases and parameters
        self.global_bias = np.random.normal(scale=1, size=1)
        self.biases = np.random.normal(scale=1/self.dimensionality, size=self.dimensionality)
        self.params = np.random.normal(scale=1./self.dimensionality, size=(num_components, self.dimensionality))

        # Create the list of indices
        self.training_indices = np.arange(len(self.transactions)*2)

    def __sgd__(self, lr, reg_w, reg_v):
        """ Stochastic gradient descent """
        for idx in self.training_indices:
            # Get the sample
            if idx < len(self.transactions):
                sample = self.transactions.iloc[idx]
                bought = 1
            else:
                sample = self.negative_transactions.iloc[idx - len(self.transactions)]
                bought = 0

            # Get the one hot encoding positions for the current sample
            sample_positions = []
            accum = 0
            for idx, col in enumerate(sample.index):
                sample_positions.append(sample[col])
                accum += self.max_feature_values[idx]
            sample_positions = np.array(sample_positions)

            # Make a prediction
            prediction, summed = self.__predict__(sample_positions)

            # Compute the error
            error = self.__log_loss__(prediction, bought)

            # Compute the gradient error
            error_gradient = -bought / (np.exp(bought * prediction) + 1.0)

            # Update biases and parameters
            self.global_bias -= lr * error_gradient

            self.biases[sample_positions] -= lr * (error_gradient + 2 * reg_w * self.biases[sample_positions])

            self.params[:, sample_positions] -= lr * (error_gradient * (summed[:, np.newaxis] * self.params[:, sample_positions]) + 2 * reg_v * self.params[:, sample_positions])

    def __predict__(self, sample_positions):
        """ Make a prediction """
        # Compute the sum of the square component
        summed = np.sum(self.params[:, sample_positions], axis=1)
        summed_square = np.sum(self.params[:, sample_positions]**2, axis=1)
        
        # Return the prediction using the biases and parameters
        return self.global_bias + np.sum(self.biases[sample_positions]) + 0.5 * np.sum(summed**2 - summed_square), summed

    def __log_loss__(self, pred, real):
        """ Log loss error """
        return np.log(np.exp(-pred * real) + 1.0)

    def fit(self, n_epochs=10, learning_rate=0.001, reg_w=0.01, reg_v=0.001):
        """ Train the model """
        for epoch in range(n_epochs):
            print('Epoch:', epoch)
            # Shuffle negative sample articles
            self.__shuffle_negative_transactions__()
            
            # Shuffle the training indices
            np.random.shuffle(self.training_indices)

            # Run the SGD
            self.__sgd__(learning_rate, reg_w, reg_v)

    def __shuffle_negative_transactions__(self):
        """ Shuffle negative samples """
        self.negative_transactions['article_index'] = self.negative_transactions['article_index'].sample(frac=1)

    
    def predict(self, customers, last_bought_articles):
        """ Predict the articles to be recommended to each user """
        recommendations = []
        
        # Create the articles matrix
        len_articles = len(self.article_index2id)
        articles = np.eye(len_articles)

        # Compute the matrix product between articles and the bias vector that apply to articles
        len_customers = len(self.customer_index2id)
        article_bias = np.dot(articles, self.biases[len_customers:len_customers+len_articles])

        # Compute the matrix product between articles and the vectors from params that apply to articles
        article_params = np.dot(articles, self.params[:, len_customers:len_customers+len_articles].T)

        # Compute the matrix product between articles and the vectors from params to the square that apply to articles
        article_square_params = np.dot(articles, self.params[:, len_customers:len_customers+len_articles].T**2)

        for customer, last_bought_article in zip(customers, last_bought_articles):
            if customer not in self.customer_id2index.keys():
                # If the the customer is not in the trained ones return the default recommendation
                recommendations.append(' '.join(default_recommendation))
            else: # Else use the factorization machine
                customer_idx = self.customer_id2index[customer]

                last_bought_idx = self.article_id2index[last_bought_article] + len_customers + len_articles

                # Make a prediction for each article using the one hot matrix
                bias_product = self.biases[customer_idx] + article_bias + self.biases[last_bought_idx]
                params_product = self.params[:, customer_idx] + article_params + self.params[:, last_bought_idx]
                params_product_square = self.params[:, customer_idx]**2 + article_square_params + self.params[:, last_bought_idx]**2

                predictions = self.global_bias + bias_product + 0.5 * np.sum(params_product**2 - params_product_square, axis=1)

                # Sort the predictions and keep the 12 higher
                recommended_indexes = predictions.argsort()[-12:]

                # Keep the recommendations for this customer
                recommendations.append(' '.join([self.article_index2id[item_idx] for item_idx in recommended_indexes]))
            
        return pd.DataFrame({
            'customer_id': customers,
            'prediction': recommendations,
        })

In [None]:
recsys = RecSys_FM(transactions, customer_id2index, customer_index2id, article_id2index, article_index2id, default_recommendation, num_components=20)

In [None]:
recsys.fit()
np.save('data/params.npy', recsys.params)
np.save('data/biases.npy', recsys.biases)
np.save('data/global_bias.npy', recsys.global_bias)
# recsys.params = np.load('data/params.npy')
# recsys.biases = np.load('data/biases.npy')
# recsys.global_bias = np.load('data/global_bias.npy')

In [None]:
# Make a prediction for each customer
predictions = recsys.predict(customers, last_articles)

In [None]:
predictions.head()

In [None]:
predictions.to_csv('submission.csv', index=False)

https://www.csie.ntu.edu.tw/~b97053/paper/Rendle2010FM.pdf

http://ethen8181.github.io/machine-learning/recsys/factorization_machine/factorization_machine.html