# Getting Started

## Introduction
ReSyPE (pronounced *recipe*) is a Python library built for both practitioners and researchers that wish to quickly develop and deploy ML-based recommender systems.

The library provides an end-to-end pipeline that includes:

1. Loading transaction, user feature, and item feature datasets
2. Interchangable methods for user and item clustering
3. Modular framework for machine learning models
4. Iterative and decomposition-based techniques


## Installation

`pip install resype`


## Load the Data

In [26]:
import pandas as pd
import numpy as np
from resype.resype import Resype

%load_ext autoreload
%autoreload 2 

In [27]:
# load transaction list
transaction_list = pd.read_csv("sample_data/ratings.csv")[['userId', 'movieId', 'rating']]
transaction_list = transaction_list.sample(20)
transaction_list.columns = ["user_id", 'item_id', 'rating']

## Preprocess

In [28]:
re = Resype(transaction_list)
re.transaction_list.head()

Unnamed: 0,user_id,item_id,rating
44904,298,6297,2.0
18858,122,778,5.0
83217,527,1270,4.0
55914,369,1625,4.5
54024,356,6370,4.5


In [29]:
# construct utlity matrix
re.construct_utility_matrix()

item_id,552,778,956,1046,1193,1270,1380,1438,1498,1625,1752,1962,2947,3052,4024,6182,6297,6370,6755,61024
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
18,,,,,,,,,,,,,,4.5,,,,,,
58,3.0,,,,,,,,,,,,,,,,,,,
108,,,,,,,,,,,,,,,1.0,,,,,
122,,5.0,,,,,,,,,,,,,,,,,,
200,,,,,,,,1.0,,,,,,,,,,,,
288,,,,,,,,,,,,4.0,,,,,,,,
290,,,,,,,,,,,,,5.0,,,,,,,
298,,,,,,,,,,,,,,,,,2.0,,,
301,,,,,4.5,,,,,,,,,,,,,,,
356,,,,,,,,,,,,,,,,,,4.5,,


## Train on Unclustered Matrix

### Iterative Approach

In [24]:
# import sklearn Model
from sklearn.ensemble import RandomForestRegressor
model = RandomForestRegressor()

In [6]:
# fit and predict
re.fit(model, method='iterative')
re.utility_matrix_preds.head()

item_id,58,366,368,1183,1282,1608,1961,2915,3111,3481,3791,4007,6851,8528,36519,41569,60684,74458,81834,89492
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
19,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
51,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
57,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
68,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
73,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [7]:
# recommend
user_list = [0, 1, 2] # indices
top_n = 10
re.get_rec(user_list, top_n)

Unnamed: 0,user_id,rank_1,rank_2,rank_3,rank_4,rank_5,rank_6,rank_7,rank_8,rank_9,rank_10
0,0,19.0,18.0,1.0,2.0,3.0,4.0,5.0,6.0,8.0,9.0
1,1,19.0,18.0,1.0,2.0,3.0,4.0,6.0,7.0,8.0,9.0
2,2,19.0,18.0,1.0,2.0,4.0,5.0,6.0,7.0,8.0,9.0


### SVD Approach

In [8]:
# fit and predict
model = RandomForestRegressor()
re.fit(model, method='svd')
re.utility_matrix_preds.head()

item_id,58,366,368,1183,1282,1608,1961,2915,3111,3481,3791,4007,6851,8528,36519,41569,60684,74458,81834,89492
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
19,0.0,0.0,-0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,-0.333333,0.166667,0.0,0.0,0.5,0.0,0.0,0.0
51,0.0,0.0,-0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,-0.333333,0.166667,0.0,0.0,0.5,0.0,0.0,0.0
57,0.0,0.0,-0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,-0.333333,0.166667,0.0,0.0,0.5,0.0,0.0,0.0
68,0.0,0.0,-0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,-0.333333,0.166667,0.0,0.0,0.5,0.0,0.0,0.0
73,0.0,0.0,-0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,-0.333333,0.166667,0.0,0.0,0.5,0.0,0.0,0.0


In [9]:
# recommend
user_list = [0, 1, 2] # indices
top_n = 10
re.get_rec(user_list, top_n)

Unnamed: 0,user_id,rank_1,rank_2,rank_3,rank_4,rank_5,rank_6,rank_7,rank_8,rank_9,rank_10
0,0,16.0,13.0,11.0,19.0,8.0,1.0,3.0,4.0,5.0,6.0
1,1,16.0,13.0,11.0,19.0,8.0,1.0,3.0,4.0,6.0,7.0
2,2,16.0,13.0,11.0,19.0,8.0,1.0,4.0,5.0,6.0,7.0


## Train on Clustered matrix

In [30]:
from sklearn.cluster import KMeans
km_users = KMeans(n_clusters=10)
km_items = KMeans(n_clusters=10)

user_model, user_cluster_map, util_matrix_w_users = re.cluster_users(km_users)
item_model, item_cluster_map, util_matrix_w_items = re.cluster_items(km_items)

In [33]:
re.cluster_assignment()
re.utility_matrix_agg(u_agg="mean", i_agg="mean")

i_cluster,0,1,2,3,4,5,6,7,8,9
u_cluster,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,0.318182,0.190083,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.363636
1,0.0,0.363636,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0
3,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0
6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.5,0.0
7,0.0,0.409091,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.5,0.0,0.0
9,0.0,0.0,0.0,0.0,0.0,0.0,4.5,0.0,0.0,0.0


In [32]:
from sklearn.ensemble import RandomForestRegressor
model1 = RandomForestRegressor()
re.fit(model_object=model1, method='iterative', n_synth_data=5, p=0.1)
re.utility_matrix_preds

i_cluster,0,1,2,3,4,5,6,7,8,9
u_cluster,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,0.236465,0.122222,-0.081717,-0.076999,-0.111626,-0.081717,-0.081717,-0.081717,0.007163,0.176267
1,0.008199,0.261514,-0.052666,-0.041354,-0.056148,-0.092515,0.00094,0.014821,-0.061421,-0.05399
2,-0.555556,-0.555556,0.571901,-0.555556,-0.555556,4.444444,-0.555556,-0.072757,-0.555556,-0.555556
3,-0.583333,-0.580611,4.416667,0.54157,-0.416576,-0.35384,-0.583333,-0.496313,-0.556115,-0.583333
4,-0.547222,-0.547222,0.236498,4.452778,-0.547222,-0.547222,-0.392926,-0.547222,-0.547222,-0.547222
5,-0.547222,-0.506998,-0.547222,-0.547222,4.452778,-0.547222,-0.547222,-0.547222,0.334069,-0.547222
6,-0.35053,-0.327081,-0.391414,0.215503,-0.391616,-0.391224,-0.392831,-0.364714,3.202361,-0.378914
7,-0.044773,0.364318,-0.044773,-0.044773,-0.078297,-0.078604,0.034298,-0.060403,-0.044773,-0.044773
8,-0.515,-0.479531,-0.515,-0.515,-0.515,-0.002752,-0.040606,3.985,-0.178517,-0.515
9,-0.4925,-0.4925,-0.451798,-0.4925,-0.4925,-0.432633,4.0075,-0.018071,-0.4925,-0.450795


In [34]:
# predict top item clusters per user
user_list = [0, 1, 2] # index
top_n = 5 # top n clusters
re.get_rec(user_list, top_n)

Unnamed: 0,user_id,rank_1,rank_2,rank_3,rank_4,rank_5
0,0,8.0,3.0,7.0,6.0,5.0
1,1,7.0,0.0,6.0,3.0,2.0
2,2,2.0,7.0,9.0,8.0,6.0


In [None]:
# predict top items per user
