H&M RECOMMENDATION SYSTEM USING THE FACTORIZTION METHOD

IMPORT PACKAGES

In [2]:
import pandas as pd
import numpy as np
import datetime
from tqdm import tqdm

THE MODEL CLASS

In [3]:
# Create a class for recommendation system
class Recommend:
    def __init__(self, transactions, customer_id2index, customer_index2id, article_id2index, article_index2id,
                 default_recommendation, num_components=20):

        self.transactions = transactions  # Save the transactions
        self.negative_transactions = self.transactions.copy()  # Generate negative samples
        self.default_recommendation = default_recommendation  # Save the default recommendation
        self.customer_id2index = customer_id2index  # Save customer and article mapping functions
        self.customer_index2id = customer_index2id
        self.article_id2index = article_id2index
        self.article_index2id = article_index2id
        self.max_feature_values = self.transactions.max().values  # Compute the length of the one-hot features
        self.dimensionality = self.max_feature_values.sum() + len(self.max_feature_values)
        self.global_bias = np.random.normal(scale=1, size=1)  # Initialize the biases and parameters
        self.biases = np.random.normal(scale=1 / self.dimensionality, size=self.dimensionality)
        self.params = np.random.normal(scale=1. / self.dimensionality, size=(num_components, self.dimensionality))
        self.training_indices = np.arange(len(self.transactions) * 2)  # Create the list of indices

    # Stochastic gradient descent
    def __sgd__(self, lr, reg_w, reg_v):
        for idx in self.training_indices:
            # Get the sample from transactions
            if idx < len(self.transactions):
                sample = self.transactions.iloc[idx]
                bought = 1
            else:
                sample = self.negative_transactions.iloc[idx - len(self.transactions)]
                bought = 0

            # Get the encoding positions for the current sample
            sample_positions = []
            accum = 0
            for idx, col in enumerate(sample.index):
                sample_positions.append(sample[col])
                accum += self.max_feature_values[idx]
            sample_positions = np.array(sample_positions)

            # Make a prediction
            prediction, summed = self.__predict__(sample_positions)

            # Compute the error
            self.__log_loss__(prediction, bought)

            # Compute the gradient error
            error_gradient = -bought / (np.exp(bought * prediction) + 1.0)

            # Update biases and parameters
            self.global_bias -= lr * error_gradient

            self.biases[sample_positions] -= lr * (error_gradient + 2 * reg_w * self.biases[sample_positions])

            self.params[:, sample_positions] -= lr * (error_gradient * (
                    summed[:, np.newaxis] * self.params[:, sample_positions]) + 2 * reg_v * self.params[:,
                                                                                            sample_positions])

    # Make a prediction
    def __predict__(self, sample_positions):
        # Compute the sum of the square component
        summed = np.sum(self.params[:, sample_positions], axis=1)
        summed_square = np.sum(self.params[:, sample_positions] ** 2, axis=1)

        # Return the prediction using the biases and parameters
        return self.global_bias + np.sum(self.biases[sample_positions]) + 0.5 * np.sum(
            summed ** 2 - summed_square), summed

    # Log loss error
    def __log_loss__(self, pred, real):
        return np.log(np.exp(-pred * real) + 1.0)

    # Train the model
    def fit(self, n_epochs=10, learning_rate=0.001, reg_w=0.01, reg_v=0.001):
        for epoch in range(n_epochs):
            print('Epoch:', epoch)
            # Shuffle negative sample articles
            self.__shuffle_negative_transactions__()

            # Shuffle the training indices
            np.random.shuffle(self.training_indices)

            # Run the SGD
            self.__sgd__(learning_rate, reg_w, reg_v)

    # Shuffle negative samples
    def __shuffle_negative_transactions__(self):
        self.negative_transactions['article_index'] = self.negative_transactions['article_index'].sample(frac=1)

    # Predict the articles for each user
    def predict(self, customers, last_bought_articles):
        recommendations = []

        # Create the articles matrix
        len_articles = len(self.article_index2id)
        articles = np.eye(len_articles)

        # Compute the matrix product between articles and the bias vector that apply to articles
        len_customers = len(self.customer_index2id)
        article_bias = np.dot(articles, self.biases[len_customers:len_customers + len_articles])

        # Compute the matrix product between articles and the vectors from params that apply to articles
        article_params = np.dot(articles, self.params[:, len_customers:len_customers + len_articles].T)

        # Compute the matrix product between articles and the vectors from params to the square that apply to articles
        article_square_params = np.dot(articles, self.params[:, len_customers:len_customers + len_articles].T ** 2)

        for customer, last_bought_article in zip(customers, last_bought_articles):
            # If the customer is not in the trained ones return the default recommendation
            if customer not in self.customer_id2index.keys():
                recommendations.append(' '.join(default_recommendation))

            # Else use the factorization machine
            else:
                customer_idx = self.customer_id2index[customer]
                last_bought_idx = self.article_id2index[last_bought_article] + len_customers + len_articles

                # Make a prediction for each article using the one hot matrix
                bias_product = self.biases[customer_idx] + article_bias + self.biases[last_bought_idx]
                params_product = self.params[:, customer_idx] + article_params + self.params[:, last_bought_idx]
                params_product_square = self.params[:, customer_idx] ** 2 + article_square_params + self.params[:,
                                                                                                last_bought_idx] ** 2

                predictions = self.global_bias + bias_product + 0.5 * np.sum(
                    params_product ** 2 - params_product_square, axis=1)

                # Sort the predictions and keep the 12 higher
                recommended_indexes = predictions.argsort()[-12:]

                # Keep the recommendations for this customer
                recommendations.append(' '.join([self.article_index2id[item_idx] for item_idx in recommended_indexes]))

        return pd.DataFrame({
            'customer_id': customers,
            'article_id recommendations': recommendations,
        })

LOAD TRANSACTIONS DATA

In [4]:
# Load data set
transactions = pd.read_csv(r"C:\Users\Baski\Desktop\SeniorProject\transactions_train.csv", dtype={'article_id': str})
transactions.drop(['sales_channel_id', 'price'], inplace=True, axis=1)


In [5]:
# Filter transactions by date (approximately one week start-end date)
start_date = datetime.datetime(2020, 9, 14)
transactions["t_dat"] = pd.to_datetime(transactions["t_dat"])
transactions = transactions.loc[transactions["t_dat"] >= start_date]

In [6]:
# Filter transactions by number of an article has been bought
article_bought_count = transactions[['article_id', 't_dat']].groupby('article_id').count().reset_index().rename(
    columns={'t_dat': 'count'})
most_bought_articles = article_bought_count[article_bought_count['count'] > 10]['article_id'].values
transactions = transactions[transactions['article_id'].isin(most_bought_articles)]
transactions = transactions.reset_index(drop=True)

LOAD CUSTOMERS DATA

In [7]:
# Make the predictions for 1000 customers
customers = pd.read_csv(r"C:\Users\Baski\Desktop\SeniorProject\sample_submission.csv", nrows=1000).customer_id.values

# Last bought articles for each customer
last_bought_articles = transactions.sort_values(['customer_id', 't_dat'], ascending=False).drop_duplicates(
    ['customer_id'], keep='first')


In [8]:
# Create an array with the last bought article for each customer
def get_last_bought_article_per_customer(customers, last_bought_articles):
    last_articles = []
    transaction_customers = last_bought_articles.customer_id.values
    for customer in tqdm(customers):
        if customer in transaction_customers:
            last_articles.append(
                last_bought_articles[last_bought_articles['customer_id'] == customer].article_id.values[0])
        else:
            last_articles.append(None)
    return np.array(last_articles)

In [9]:
# Get the last bought articles
last_articles = get_last_bought_article_per_customer(customers, last_bought_articles)

100%|██████████| 1000/1000 [00:01<00:00, 604.41it/s]


CALCULATE DEFAULT RECOMMENDATION

In [10]:
# Calculate time decaying popularity
transactions['pop_factor'] = transactions['t_dat'].apply(lambda x: 1 / (datetime.datetime(2020, 9, 23) - x).days)
transactions_by_article = transactions[['article_id', 'pop_factor']].groupby('article_id').sum().reset_index()
default_recommendation = transactions_by_article.sort_values(by='pop_factor', ascending=False)['article_id'].values[:12]

PREPARE DATA AND FEATURES

In [11]:
# Sort the transactions by customer and date and assign the last item bought to each transaction
transactions = transactions.sort_values(['customer_id', 't_dat'], axis=0)
transactions['last_bought_id'] = pd.concat(
    [pd.Series([transactions['article_id'].values[-1]]), transactions['article_id']])[:-1].values
transactions.drop(['t_dat', 'pop_factor'], inplace=True, axis=1)

# Get the users and articles
customer_values = np.unique(transactions.customer_id.values)
article_values = np.unique(transactions.article_id.values)

# Map customer and article ids to indices
customer_id2index = {c: i for i, c in enumerate(customer_values)}
article_id2index = {a: i for i, a in enumerate(article_values)}
customer_index2id = {i: c for c, i in customer_id2index.items()}
article_index2id = {i: a for a, i in article_id2index.items()}

# Assign the customer and article indices to the transactions and drop the ids
transactions['customer_index'] = transactions.customer_id.map(customer_id2index)
transactions['article_index'] = transactions.article_id.map(article_id2index)
transactions['last_bought_index'] = transactions.last_bought_id.map(article_id2index)
transactions.drop(['customer_id', 'article_id', 'last_bought_id'], inplace=True, axis=1)

TRAIN THE MODEL

In [12]:
# Initiate the class for recommendation system
recSystem = Recommend(transactions, customer_id2index, customer_index2id, article_id2index, article_index2id,
                      default_recommendation, num_components=20)

# Train the model
recSystem.fit()

Epoch: 0
Epoch: 1
Epoch: 2
Epoch: 3
Epoch: 4
Epoch: 5
Epoch: 6
Epoch: 7
Epoch: 8
Epoch: 9


MAKE ITEM PREDICTIONS 

In [13]:
# Make a prediction for each customer
predictions = recSystem.predict(customers, last_articles)
predictions.head()

Unnamed: 0,customer_id,article_id recommendations
0,00000dbacae5abe5e23885899a1fa44253a17956c6d1c3...,0924243002 0924243001 0918522001 0751471001 08...
1,0000423b00ade91418cceaf3b26c6af3dd342b51fd051e...,0924243002 0924243001 0918522001 0751471001 08...
2,000058a12d5b43e67d225668fa1f8d618c13dc232df0ca...,0888229007 0923569002 0809320001 0214844002 06...
3,00005ca1c9ed5f5146b52ac8639a40ca9d57aeff4d1bd2...,0924243002 0924243001 0918522001 0751471001 08...
4,00006413d8573cd20ed7128e53b7b13819fe5cfc2d801f...,0924243002 0924243001 0918522001 0751471001 08...


EXTRACT PREDICTIONS TO A .CSV FILE

In [14]:
# Extract predictions to a .csv file
predictions.to_csv('recommendation.csv', index=False)