# Book Reading Propensity Model

In [1]:
from propensity_utils import *

## Set the Parameters

In [2]:
# Set the parameters
random_state = 42
algo_params = {'n_factors': 128, 
               'n_epochs': 100, 
               'lr_all': 0.005, 
               'reg_all': 0.1,
               'random_state':random_state}

## Train/Test Data Paths

In [3]:
train_val_path = './data/goodreads_2016_train_val.csv'
test_path = './data/goodreads_2016_test.csv'

## Propensity Class and Its Methods

### Instantiate a new model

In [4]:
propensity = Propensity(algo_class=SVD, algo_params=algo_params, train_data_path=train_val_path)

### Evaluate the model via cross-validation on the train set

In [5]:
%%time
propensity.cross_validate(cv=3)

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.7964  0.7947  0.7951  0.7954  0.0007  
MAE (testset)     0.6087  0.6074  0.6082  0.6081  0.0005  
FCP (testset)     0.6166  0.6186  0.6190  0.6181  0.0010  
Fit time          122.56  149.55  149.43  140.51  12.69   
Test time         28.16   30.76   31.61   30.18   1.46    
CPU times: user 7min 1s, sys: 1min 47s, total: 8min 48s
Wall time: 9min 51s


### Train the model, and evaluate it on the test set

In [6]:
%%time
propensity.train_and_test(testset_path=test_path, verbose=True)

Evaluating the model performance on the test set
RMSE: 0.8599
MAE:  0.6681
FCP:  0.5877
CPU times: user 2min 23s, sys: 44.2 s, total: 3min 7s
Wall time: 3min 39s


### Estimate the confidence on the train set

In [7]:
%%time
propensity.estimate_confidence(n_cv_folds=3, verbose=True)

Estimating the Confidence on the Train Set...
Fold  1: C = 0.70444. Time elapsed:  6.12 minutes
Fold  2: C = 0.70346. Time elapsed:  6.45 minutes
Fold  3: C = 0.70477. Time elapsed:  8.96 minutes
************************************************************
Estimated Confidence (on a 3-fold CV): 0.70
************************************************************
CPU times: user 10min 43s, sys: 6min 30s, total: 17min 14s
Wall time: 21min 35s


0.7042216144751977

### Train the model

In [8]:
%%time
propensity.train_model()

CPU times: user 2min 23s, sys: 1min 9s, total: 3min 33s
Wall time: 4min 21s


### Make a prediction

In [9]:
%%time
user_id, item_id = '8842281e1d1347389f2ab93d60773d4d', 76620
res = propensity.infer_propensity_for_pair(user_id, item_id, verbose=False)
res

CPU times: user 336 µs, sys: 1.33 ms, total: 1.67 ms
Wall time: 8.24 ms


(1, 0.7042216144751977)

In [13]:
res_df = propensity.infer_propensity_from_df(test_path, verbose=False)
res_df.head()

Unnamed: 0,user_id,book_id,would_recommend_pred,confidence
0,8842281e1d1347389f2ab93d60773d4d,29058155,1,0.704222
1,8842281e1d1347389f2ab93d60773d4d,76620,1,0.704222
2,01ec1a320ffded6b2dd47833f2c8e4fb,30853358,1,0.704222
3,4b3636a043e5c99fa27ac897ccfa1151,34084,0,0.704222
4,afc070543f19028dc7e7f084a0079f72,18131,1,0.704222
