# Demo: ML-based Collaborative Filtering on Utility Matrix with Reduced Dimensions

In [1]:
import pandas as pd
import numpy as np
import sys
sys.path.insert(1, '../resype')
%load_ext autoreload
%autoreload 2 

## Prepare data

In [2]:
np.random.seed(202109)
rating_vals = np.hstack([np.arange(1,6), [np.nan]])
userids = np.arange(10)
itemids = np.arange(100)
random_ratings = np.random.choice(rating_vals, size=len(userids)*len(itemids))

In [3]:
transactions = pd.DataFrame(
    {'user_id': userids.repeat(len(itemids)),
     'item_id': itemids.reshape((-1, 1)).repeat(len(userids), axis=1).T.flatten(),
     'rating': random_ratings}).drop_duplicates()

In [4]:
transactions

Unnamed: 0,user_id,item_id,rating
0,0,0,2.0
1,0,1,
2,0,2,
3,0,3,5.0
4,0,4,4.0
...,...,...,...
995,9,95,
996,9,96,2.0
997,9,97,
998,9,98,5.0


## Load resype

In [5]:
from resype import Resype

In [6]:
re = Resype(transactions)

In [7]:
utility_matrix = re.construct_utility_matrix()
utility_matrix

item_id,0,1,2,3,4,5,6,7,8,9,...,90,91,92,93,94,95,96,97,98,99
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,2.0,,,5.0,4.0,4.0,3.0,4.0,4.0,3.0,...,4.0,,1.0,1.0,,4.0,5.0,5.0,1.0,5.0
1,1.0,3.0,5.0,2.0,,5.0,3.0,3.0,1.0,,...,4.0,4.0,3.0,,2.0,1.0,4.0,1.0,3.0,4.0
2,2.0,1.0,2.0,4.0,5.0,2.0,2.0,1.0,1.0,5.0,...,5.0,2.0,5.0,4.0,2.0,4.0,3.0,,2.0,2.0
3,5.0,5.0,4.0,5.0,2.0,4.0,1.0,3.0,1.0,4.0,...,4.0,2.0,3.0,2.0,4.0,3.0,4.0,4.0,1.0,5.0
4,4.0,1.0,2.0,3.0,2.0,2.0,,5.0,,4.0,...,1.0,,,4.0,,3.0,3.0,2.0,,
5,1.0,5.0,,4.0,2.0,4.0,3.0,,2.0,,...,2.0,4.0,1.0,1.0,1.0,5.0,4.0,1.0,3.0,3.0
6,4.0,2.0,2.0,4.0,1.0,5.0,1.0,3.0,3.0,,...,1.0,5.0,5.0,3.0,2.0,2.0,4.0,3.0,2.0,
7,,3.0,2.0,2.0,,5.0,5.0,5.0,4.0,5.0,...,1.0,2.0,4.0,3.0,2.0,3.0,1.0,1.0,2.0,
8,1.0,,2.0,2.0,1.0,2.0,2.0,2.0,,3.0,...,5.0,2.0,3.0,2.0,2.0,4.0,3.0,1.0,1.0,
9,2.0,1.0,3.0,3.0,4.0,2.0,,,2.0,2.0,...,5.0,1.0,4.0,2.0,3.0,,2.0,,5.0,2.0


## Train model using `train_model_svd`

#### Create model object (load from sklearn)

In [8]:
from sklearn.ensemble import RandomForestRegressor
rs_model1 = RandomForestRegressor(random_state=202109)

#### Train model

In [9]:
np.random.seed(202109)

In [10]:
utility_matrix_imputed = re.train_model_svd(
    re.utility_matrix, rs_model1, d=10, return_models=False)

#### Prediction

In [11]:
utility_matrix_imputed

item_id,0,1,2,3,4,5,6,7,8,9,...,90,91,92,93,94,95,96,97,98,99
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,2.0,2.99,3.01,5.0,4.0,4.0,3.0,4.0,4.0,3.0,...,4.0,2.5,1.0,1.0,2.81,4.0,5.0,5.0,1.0,5.0
1,1.0,3.0,5.0,2.0,2.31,5.0,3.0,3.0,1.0,3.82,...,4.0,4.0,3.0,2.07,2.0,1.0,4.0,1.0,3.0,4.0
2,2.0,1.0,2.0,4.0,5.0,2.0,2.0,1.0,1.0,5.0,...,5.0,2.0,5.0,4.0,2.0,4.0,3.0,1.9,2.0,2.0
3,5.0,5.0,4.0,5.0,2.0,4.0,1.0,3.0,1.0,4.0,...,4.0,2.0,3.0,2.0,4.0,3.0,4.0,4.0,1.0,5.0
4,4.0,1.0,2.0,3.0,2.0,2.0,2.33,5.0,2.44,4.0,...,1.0,2.76,2.63,4.0,2.18,3.0,3.0,2.0,1.75,3.61
5,1.0,5.0,3.09,4.0,2.0,4.0,3.0,2.86,2.0,3.55,...,2.0,4.0,1.0,1.0,1.0,5.0,4.0,1.0,3.0,3.0
6,4.0,2.0,2.0,4.0,1.0,5.0,1.0,3.0,3.0,3.21,...,1.0,5.0,5.0,3.0,2.0,2.0,4.0,3.0,2.0,3.61
7,2.71,3.0,2.0,2.0,2.91,5.0,5.0,5.0,4.0,5.0,...,1.0,2.0,4.0,3.0,2.0,3.0,1.0,1.0,2.0,3.66
8,1.0,2.91,2.0,2.0,1.0,2.0,2.0,2.0,2.23,3.0,...,5.0,2.0,3.0,2.0,2.0,4.0,3.0,1.0,1.0,3.05
9,2.0,1.0,3.0,3.0,4.0,2.0,2.56,3.32,2.0,2.0,...,5.0,1.0,4.0,2.0,3.0,3.53,2.0,2.3,5.0,2.0


## Train iterative model using `fit`

#### Create model object (load from sklearn)

#### Train model

In [12]:
rs_model2 = RandomForestRegressor(random_state=202109)
re.fit(rs_model2, method='svd', d=10)

#### Prediction

In [13]:
re.utility_matrix_preds

item_id,0,1,2,3,4,5,6,7,8,9,...,90,91,92,93,94,95,96,97,98,99
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,2.0,3.05,3.0,5.0,4.0,4.0,3.0,4.0,4.0,3.0,...,4.0,2.54,1.0,1.0,2.8,4.0,5.0,5.0,1.0,5.0
1,1.0,3.0,5.0,2.0,2.41,5.0,3.0,3.0,1.0,3.83,...,4.0,4.0,3.0,2.06,2.0,1.0,4.0,1.0,3.0,4.0
2,2.0,1.0,2.0,4.0,5.0,2.0,2.0,1.0,1.0,5.0,...,5.0,2.0,5.0,4.0,2.0,4.0,3.0,1.93,2.0,2.0
3,5.0,5.0,4.0,5.0,2.0,4.0,1.0,3.0,1.0,4.0,...,4.0,2.0,3.0,2.0,4.0,3.0,4.0,4.0,1.0,5.0
4,4.0,1.0,2.0,3.0,2.0,2.0,2.35,5.0,2.4,4.0,...,1.0,2.73,2.63,4.0,2.18,3.0,3.0,2.0,1.75,3.58
5,1.0,5.0,3.09,4.0,2.0,4.0,3.0,2.84,2.0,3.54,...,2.0,4.0,1.0,1.0,1.0,5.0,4.0,1.0,3.0,3.0
6,4.0,2.0,2.0,4.0,1.0,5.0,1.0,3.0,3.0,3.19,...,1.0,5.0,5.0,3.0,2.0,2.0,4.0,3.0,2.0,3.62
7,2.63,3.0,2.0,2.0,2.94,5.0,5.0,5.0,4.0,5.0,...,1.0,2.0,4.0,3.0,2.0,3.0,1.0,1.0,2.0,3.67
8,1.0,2.88,2.0,2.0,1.0,2.0,2.0,2.0,2.23,3.0,...,5.0,2.0,3.0,2.0,2.0,4.0,3.0,1.0,1.0,3.12
9,2.0,1.0,3.0,3.0,4.0,2.0,2.56,3.33,2.0,2.0,...,5.0,1.0,4.0,2.0,3.0,3.51,2.0,2.35,5.0,2.0
