# Getting Started

## Introduction
ReSyPE (pronounced *recipe*) is a Python library built for both practitioners and researchers that wish to quickly develop and deploy ML-based recommender systems.

The library provides an end-to-end pipeline that includes:

1. Loading transaction, user feature, and item feature datasets
2. Interchangable methods for user and item clustering
3. Modular framework for machine learning models
4. Iterative and decomposition-based techniques


## Installation

`pip install resype`


## Load the Data

In [1]:
import pandas as pd
import numpy as np
from resype.resype import Resype

%load_ext autoreload
%autoreload 2 

In [2]:
# load transaction list
transaction_list = pd.read_csv("sample_data/ratings.csv")[['userId', 'movieId', 'rating']]
transaction_list = transaction_list.sample(20)
transaction_list.columns = ["user_id", 'item_id', 'rating']

## Preprocess

In [3]:
re = Resype(transaction_list)
re.transaction_list.head(3)

Unnamed: 0,user_id,item_id,rating
43114,288,42015,2.5
1573,16,260,3.0
60919,393,79132,5.0


In [4]:
# construct utlity matrix
re.construct_utility_matrix()
re.utility_matrix.head(3)

item_id,50,169,223,260,527,589,1249,2100,3948,4236,4896,5505,6893,27416,33646,42015,70305,71838,79132
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
16,,,,3.0,,,,,,,,,,,,,,,
68,,,,,,,,,,,,,3.5,,,,,,
111,,,,,,,,,,,,,,,4.5,,,,


## Train on Unclustered Matrix

### Iterative Approach

In [5]:
# import sklearn Model
from sklearn.ensemble import RandomForestRegressor
model = RandomForestRegressor()

In [6]:
# fit and predict
re.fit(model, method='iterative')
re.utility_matrix_preds.head(3)

item_id,50,169,223,260,527,589,1249,2100,3948,4236,4896,5505,6893,27416,33646,42015,70305,71838,79132
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
16,0.0,0.0,0.0,0.0,0.0,-0.5,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
68,0.0,0.0,0.0,0.0,0.0,-0.5,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
111,0.0,0.0,0.0,0.0,0.0,-0.5,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [8]:
# recommend
user_list = [0, 1, 2] # indices
top_n = 10
re.get_rec(user_list, top_n)
re.df_rec

Unnamed: 0,user_id,rank_1,rank_2,rank_3,rank_4,rank_5,rank_6,rank_7,rank_8,rank_9,rank_10
0,16,4236.0,71838.0,169.0,223.0,527.0,1249.0,2100.0,3948.0,79132.0,4896.0
1,68,4236.0,71838.0,169.0,223.0,260.0,527.0,1249.0,2100.0,3948.0,79132.0
2,111,4236.0,71838.0,169.0,223.0,260.0,527.0,1249.0,2100.0,3948.0,79132.0


### SVD Approach

In [9]:
# fit and predict
model = RandomForestRegressor()
re.fit(model, method='svd')
re.utility_matrix_preds.head(3)

item_id,50,169,223,260,527,589,1249,2100,3948,4236,4896,5505,6893,27416,33646,42015,70305,71838,79132
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
16,0.0,0.0,0.0,0.0,0.0,-0.5,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
68,0.0,0.0,0.0,0.0,0.0,-0.5,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
111,0.0,0.0,0.0,0.0,0.0,-0.5,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [10]:
# recommend
user_list = [0, 1, 2] # indices
top_n = 10
re.get_rec(user_list, top_n)
re.df_rec

Unnamed: 0,user_id,rank_1,rank_2,rank_3,rank_4,rank_5,rank_6,rank_7,rank_8,rank_9,rank_10
0,16,4236.0,71838.0,169.0,223.0,527.0,1249.0,2100.0,3948.0,79132.0,4896.0
1,68,4236.0,71838.0,169.0,223.0,260.0,527.0,1249.0,2100.0,3948.0,79132.0
2,111,4236.0,71838.0,169.0,223.0,260.0,527.0,1249.0,2100.0,3948.0,79132.0


## Train on Clustered matrix

In [11]:
from sklearn.cluster import KMeans
km_users = KMeans(n_clusters=10)
km_items = KMeans(n_clusters=10)

user_model, user_cluster_map, util_matrix_w_users = re.cluster_users(km_users)
item_model, item_cluster_map, util_matrix_w_items = re.cluster_items(km_items)

In [12]:
re.cluster_assignment()
re.utility_matrix_agg(u_agg="mean", i_agg="mean")
re.utility_matrix.head(3)

i_cluster,0,1,2,3,4,5,6,7,8,9
u_cluster,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,0.0,0.0,0.0,3.75,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.339506,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [13]:
from sklearn.ensemble import RandomForestRegressor
model1 = RandomForestRegressor()
re.fit(model_object=model1, method='iterative', n_synth_data=5, p=0.1)
re.utility_matrix_preds.head(3)

i_cluster,0,1,2,3,4,5,6,7,8,9
u_cluster,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,-0.429167,-0.399127,0.301079,3.320833,-0.372719,-0.293759,-0.429167,-0.429167,-0.429167,-0.131331
1,-0.033307,0.303104,-0.036403,-0.036403,-0.05639,-0.057958,-0.036403,-0.036403,-0.036403,-0.036403
2,-0.45,-0.372461,3.534402,-0.45,-0.45,-0.45,-0.00973,-0.45,-0.310778,-0.277154


In [16]:
# predict top item clusters per user index
user_list = [0, 1, 2] # index
top_n = 5 # top n clusters
re.get_rec(user_list, top_n, re.user_assignment)

Unnamed: 0,user_id,rank_1,rank_2,rank_3,rank_4,rank_5
0,0,0.0,9.0,8.0,7.0,6.0
1,1,0.0,9.0,8.0,7.0,6.0
2,2,4.0,5.0,6.0,3.0,9.0


In [17]:
# predict top items per user_id
top_n = 5 # top n clusters
re.get_rec_item(top_n)

Unnamed: 0,user_id,rank_1,rank_2,rank_3,rank_4,rank_5
0,16,527.0,71838.0,589.0,4236.0,33646.0
1,68,527.0,71838.0,589.0,4236.0,33646.0
2,111,79132.0,223.0,4896.0,50.0,71838.0
