# Demo: Iterative Approach to ML-based Item-wise Collaborative Filtering

In [1]:
import pandas as pd
import numpy as np
import sys
sys.path.insert(1, '../resype')
%load_ext autoreload
%autoreload 2 

## Prepare data

In [2]:
movies_df = pd.DataFrame({'M1': [2, None, 3, None, 1, 5],
                          'M2': [None, 1, None, 5, 1, None],
                          'M3': [5, 4, 1, 1, 4, None],
                          'M4': [5, 4, None, None, None, 1]},
                         index=[f'U{i+1}' for i in range(6)])
movies_df

Unnamed: 0,M1,M2,M3,M4
U1,2.0,,5.0,5.0
U2,,1.0,4.0,4.0
U3,3.0,,1.0,
U4,,5.0,1.0,
U5,1.0,1.0,4.0,
U6,5.0,,,1.0


In [3]:
movie_transactions = pd.melt(
    movies_df.reset_index(), id_vars=['index'], 
    value_vars=movies_df.columns).dropna().reset_index(drop=True)
movie_transactions.columns = ["user_id", 'item_id', 'rating']

In [4]:
movie_transactions

Unnamed: 0,user_id,item_id,rating
0,U1,M1,2.0
1,U3,M1,3.0
2,U5,M1,1.0
3,U6,M1,5.0
4,U2,M2,1.0
5,U4,M2,5.0
6,U5,M2,1.0
7,U1,M3,5.0
8,U2,M3,4.0
9,U3,M3,1.0


## Load resype

In [5]:
from collab_filtering import CollabFilteringModel
re = CollabFilteringModel(movie_transactions)

In [6]:
utility_matrix = re.construct_utility_matrix()
utility_matrix

item_id,M1,M2,M3,M4
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
U1,2.0,,5.0,5.0
U2,,1.0,4.0,4.0
U3,3.0,,1.0,
U4,,5.0,1.0,
U5,1.0,1.0,4.0,
U6,5.0,,,1.0


## Train iterative model using `train_model_iterative`

#### Create model object (load from sklearn)

In [7]:
from sklearn.ensemble import RandomForestRegressor
rs_model1 = RandomForestRegressor(random_state=202109)

#### Train model

In [8]:
re.utility_matrix

item_id,M1,M2,M3,M4
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
U1,2.0,,5.0,5.0
U2,,1.0,4.0,4.0
U3,3.0,,1.0,
U4,,5.0,1.0,
U5,1.0,1.0,4.0,
U6,5.0,,,1.0


In [9]:
utility_matrix_imputed, metrics, trained_model = re.train_model_iterative(
    re.utility_matrix, rs_model1, return_models=True)

#### Prediction

In [10]:
utility_matrix_imputed

item_id,M1,M2,M3,M4
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
U1,-2.0,-1.45,1.0,1.0
U2,-1.45,-2.0,1.0,1.0
U3,1.0,1.14,-1.0,-1.16
U4,1.22,2.0,-2.0,-1.16
U5,-1.0,-1.0,2.0,0.91
U6,2.0,1.14,-1.15,-2.0


#### Convert to scale of original ratings

In [11]:
utility_matrix_imputed.add(re.utility_matrix.mean(axis=1), axis=0)

item_id,M1,M2,M3,M4
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
U1,2.0,2.55,5.0,5.0
U2,1.55,1.0,4.0,4.0
U3,3.0,3.14,1.0,0.84
U4,4.22,5.0,1.0,1.84
U5,1.0,1.0,4.0,2.91
U6,5.0,4.14,1.85,1.0


## Train iterative model using `fit`

#### Create model object (load from sklearn)

#### Train model

In [12]:
re.fit(rs_model1, method='iterative', return_models=True)

#### Prediction

In [13]:
re.utility_matrix_preds

item_id,M1,M2,M3,M4
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
U1,-2.0,-1.45,1.0,1.0
U2,-1.45,-2.0,1.0,1.0
U3,1.0,1.14,-1.0,-1.16
U4,1.22,2.0,-2.0,-1.16
U5,-1.0,-1.0,2.0,0.91
U6,2.0,1.14,-1.15,-2.0


#### Trained models

In [14]:
re.trained_models

{'M1': RandomForestRegressor(random_state=202109),
 'M2': RandomForestRegressor(random_state=202109),
 'M3': RandomForestRegressor(random_state=202109),
 'M4': RandomForestRegressor(random_state=202109)}

### Unit test

In [15]:
import unittest
import pandas as pd
from pandas._testing import assert_index_equal
from pandas._testing import assert_frame_equal


class TestTrainIterativeModel(unittest.TestCase):
    from collab_filtering import CollabFilteringModel
    movies_df = pd.DataFrame({'M1': [2, None, 3, None, 1, 5],
                              'M2': [None, 1, None, 5, 1, None],
                              'M3': [5, 4, 1, 1, 4, None],
                              'M4': [5, 4, None, None, None, 1]},
                             index=[f'U{i+1}' for i in range(6)])
    movie_transactions = pd.melt(
        movies_df.reset_index(), id_vars=['index'],
        value_vars=movies_df.columns).dropna().reset_index(drop=True)
    movie_transactions.columns = ["user_id", 'item_id', 'rating']

    def test_initialize_models_itemwise(self):
        re_test = CollabFilteringModel(movie_transactions)
        um = re_test.construct_utility_matrix()
        rf = RandomForestRegressor(random_state=202109)
        self.assertEqual(len(
            re_test.initialize_models_itemwise(um, rf)), um.shape[1])
        self.assertListEqual(sorted(um.columns),
                             sorted(re_test.initialize_models_itemwise(
                                 um, rf, suffix="").keys()))
        self.assertEqual(sorted(um.columns)[0]+'model',
                         sorted(re_test.initialize_models_itemwise(
                             um, rf).keys())[0])

    def test_initialize_models_userwise(self):
        re_test = CollabFilteringModel(movie_transactions)
        um = re_test.construct_utility_matrix()
        rf = RandomForestRegressor(random_state=202109)
        self.assertEqual(len(
            re_test.initialize_models_userwise(um, rf)), um.shape[0])
        self.assertListEqual(sorted(um.index),
                             sorted(re_test.initialize_models_userwise(
                                 um, rf, suffix="").keys()))
        self.assertEqual(sorted(um.index)[0]+'model',
                         sorted(re_test.initialize_models_userwise(
                             um, rf).keys())[0])

    def test_eval_convergence_criterion(self):
        from sklearn.metrics import mean_squared_error
        re_test = CollabFilteringModel(movie_transactions)
        um = re_test.construct_utility_matrix()
        pred_curr = [0, 0, 1]
        pred_prev = [1, 0, 1]
        pred_curr2 = [0, 0, 0.5]

        self.assertAlmostEqual(mean_squared_error(pred_curr, pred_prev),
                               re.eval_convergence_criterion(
                                   pred_curr, pred_prev, stopping_criterion='mse')[0])
        self.assertFalse(re.eval_convergence_criterion(
            pred_curr, pred_prev, stopping_criterion='mse')[1])
        self.assertFalse(re.eval_convergence_criterion(
            pred_curr, pred_prev, stopping_criterion='mse',
            mse_threshold=0.1)[1])
        self.assertTrue(re.eval_convergence_criterion(
            pred_curr, pred_prev, stopping_criterion='mse',
            mse_threshold=0.4)[1])
        self.assertTrue(re.eval_convergence_criterion(
            pred_curr, pred_curr2, stopping_criterion='mse',
            mse_threshold=0.1)[1])
        self.assertTrue(re.eval_convergence_criterion(
            pred_curr, pred_curr2, stopping_criterion='mse')[1])
        self.assertFalse(re.eval_convergence_criterion(
            pred_curr, pred_prev,  scaled=True, rating_max=1)[1])
        self.assertTrue(re.eval_convergence_criterion(
            pred_curr, pred_prev,  scaled=True, rating_max=5)[1])
        self.assertEqual(0, re.eval_convergence_criterion(
            pred_curr, pred_curr, stopping_criterion='stdev_abs',
            stdev_threshold=0.5)[0])
        self.assertTrue(re.eval_convergence_criterion(
            pred_curr, pred_prev, stopping_criterion='stdev_abs',
            stdev_threshold=0.5)[1])
        self.assertFalse(re.eval_convergence_criterion(
            pred_curr, pred_prev, stopping_criterion='stdev_abs',
            stdev_threshold=0.4)[1])
        self.assertFalse(re.eval_convergence_criterion(
            pred_curr, pred_prev, stopping_criterion='stdev_abs',
            stdev_threshold=0.1, scaled=True,
            scaling_method='max', rating_max=1)[1])

    def test_train_model_iterative(self):
        re_test = CollabFilteringModel(movie_transactions)
        um = re_test.construct_utility_matrix()
        rf = RandomForestRegressor(random_state=202109)

        self.assertEqual(len(re_test.train_model_iterative(um, rf)), 3)
        self.assertEqual(
            len(re_test.train_model_iterative(um, rf, return_models=True)), 3)
        self.assertEqual(
            len(re_test.train_model_iterative(um, rf, return_models=True)[2]),
            um.shape[1])
        self.assertEqual(
            len(re_test.train_model_iterative(um, rf, return_models=False)), 2)
        self.assertEqual(um.shape,
                         re_test.train_model_iterative(
                             um, rf, return_models=False)[0].shape)
        self.assertEqual(um.shape,
                         re_test.train_model_iterative(
                             um, rf, return_models=True)[0].shape)

    def test_fit(self):
        re_test = CollabFilteringModel(movie_transactions)
        um = re_test.construct_utility_matrix()
        rf = RandomForestRegressor(random_state=202109)
        re_test.fit(rf, method='iterative')
        um_preds = re_test.utility_matrix_preds
        self.assertFalse(um_preds.isnull().any().any())
        
        rf = RandomForestRegressor(random_state=202109)
        re_test.fit(rf, method='iterative', return_models=True)
        um_preds = re_test.utility_matrix_preds
        self.assertFalse(um_preds.isnull().any().any())
        self.assertEqual(len(re_test.trained_models), um.shape[1])

unittest.main(argv=[''], verbosity=2, exit=False)

test_eval_convergence_criterion (__main__.TestTrainIterativeModel) ... ok
test_fit (__main__.TestTrainIterativeModel) ... ok
test_initialize_models_itemwise (__main__.TestTrainIterativeModel) ... ok
test_initialize_models_userwise (__main__.TestTrainIterativeModel) ... ok
test_train_model_iterative (__main__.TestTrainIterativeModel) ... ok

----------------------------------------------------------------------
Ran 5 tests in 7.376s

OK


<unittest.main.TestProgram at 0x7f26f01024f0>