In [None]:
# default_exp transforms.matrix

# Matrix
> Implementation of utilities for transforming data into matrix formats.

In [None]:
#hide
from nbdev.showdoc import *
from fastcore.nb_imports import *
from fastcore.test import *

In [None]:
#export
import numpy as np
from scipy.sparse import csr_matrix

## generate rating matrix

In [None]:
#export
def generate_rating_matrix(user_seq, num_users, num_items, n):
    """
    Converts user-items sequences into a sparse rating matrix

    Args:
        user_seq (list): a list of list where each inner list is a sequence of items for a user
        num_users (int): number of users
        num_items (int): number of items
        n (int): number of items to ignore from the last for each inner list, for valid/test samples
    Returns:
        csr_matrix: user item rating matrix
    """
    row = []
    col = []
    data = []
    for user_id, item_list in enumerate(user_seq):
        for item in item_list[:-n]: #
            row.append(user_id)
            col.append(item)
            data.append(1)

    row = np.array(row)
    col = np.array(col)
    data = np.array(data)
    return csr_matrix((data, (row, col)), shape=(num_users, num_items))

Tests

In [None]:
def test_generate_rating_matrix(num_users=3, num_items=8, n=2, neg_case=False):
    """
    Tests the `generate_rating_matrix` method
    """
    user_seq = [
                [0,2,1,4],
                [1,2,5,7],
                [0,7,4,4,6,1]
    ]
    if neg_case:
        user_seq[0,2] = -1

    result = generate_rating_matrix(user_seq, num_users, num_items, n)
    return result.todense().astype('int32')

In [None]:
output = test_generate_rating_matrix(num_users=3, num_items=8)
expected = np.array([[1, 0, 1, 0, 0, 0, 0, 0],
                     [0, 1, 1, 0, 0, 0, 0, 0],
                     [1, 0, 0, 0, 2, 0, 0, 1]])
test_eq(output, expected)

In [None]:
output = test_generate_rating_matrix(num_users=4, num_items=8)
expected = np.array([[1, 0, 1, 0, 0, 0, 0, 0],
                     [0, 1, 1, 0, 0, 0, 0, 0],
                     [1, 0, 0, 0, 2, 0, 0, 1],
                     [0, 0, 0, 0, 0, 0, 0, 0]])
test_eq(output, expected)

In [None]:
test_fail(lambda: test_generate_rating_matrix(num_users=4, num_items=8, neg_case=True),
          msg='list indices must be integers or slices, not tuple')

test_generate_rating_matrix(num_users=2, num_items=8, neg_case=False)

test_fail(lambda: test_generate_rating_matrix(num_users=3, num_items=5, neg_case=True),
          msg='column index exceeds matrix dimensions')

In [None]:
#hide
!pip install -q watermark
%reload_ext watermark
%watermark -a "Sparsh A." -m -iv -u -t -d

Author: Sparsh A.

Last updated: 2021-12-18 06:58:48

Compiler    : GCC 7.5.0
OS          : Linux
Release     : 5.4.104+
Machine     : x86_64
Processor   : x86_64
CPU cores   : 2
Architecture: 64bit

pandas : 1.1.5
numpy  : 1.19.5
IPython: 5.5.0

