In [1]:
# !pip install scikit-surprise
# !conda install -c conda-forge scikit-surprise

In [94]:
import pandas as pd
import re
import numpy as np
import random


pd.set_option('display.max_columns', 300)
pd.set_option('display.max_rows', 200)

import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

sns.set(palette="magma_r")

In [115]:
my_seed = 55
random.seed(my_seed)
np.random.seed(my_seed)

In [3]:
df_users = pd.read_csv("../..//data/processed/users.csv")
df_items = pd.read_csv("../..//data/processed/items.csv")


In [4]:
pivot = df_items.groupby(["user_id", "store_id", "product_category_id"])[["product_id"]].count().reset_index()
pivot["items"] = pivot["store_id"] + " " + [str(e).strip() for e in pivot["product_category_id"]]
pivot = pivot.drop(["store_id", "product_category_id"], axis= 1)

In [5]:
pivot.head()

Unnamed: 0,user_id,product_id,items
0,1485369350003,2,dsw 210
1,1485369350003,1,hm 124
2,1485369350003,1,jcrewfactory 111
3,1485369350003,1,jcrewfactory 114
4,1485369350003,5,loft 111


In [103]:
"""
This function creates a sparse matrix and a simple group by for user - product combinations
When store_cat is set to True, it uses product category and store as proxy for product 
Note : change this to create sparse matrices instead of returning pandas dataframes
"""
def get_user_prod_matrix(df, store_cat = True):
    df_items["store_cat"] = df_items.apply(lambda x : x["store_id"] + " - "+ str(x["product_category_id"]), axis=1)
    
    if store_cat:
        sparse = pd.pivot_table(df_items, index="user_id", columns="store_cat", values="product_id", aggfunc="count")
    else:
        sparse = pd.crosstab(index = df_items["user_id"], columns = df_items["product_id"], values="product_id", aggfunc="count")
    
    grp = sparse.stack().dropna().reset_index()       
    grp = grp.rename(columns={0:"rating"} )
    return (grp, sparse)

In [104]:
x, y = get_user_prod_matrix(df_items)
# print(x.head())
x2, y2 = get_user_prod_matrix(df_items, False)
# print(x2.head())


In [173]:
from surprise import Reader
from surprise import SVD, SVDpp
from surprise import KNNBasic
from surprise import accuracy
from surprise import Dataset
from surprise.model_selection import train_test_split
from surprise.model_selection import cross_validate, GridSearchCV

In [204]:
x=x.fillna(0)
reader = Reader(rating_scale=(1,5))
data = Dataset.load_from_df(x,reader)

tset1 = data.build_full_trainset()
anti1 = tset1.build_anti_testset()

trainset, testset = train_test_split(data, test_size=.25, random_state=10)
algo = SVD(n_factors=5,random_state=10)
algo = algo.fit(trainset)
predict = algo.test(testset)
accuracy.rmse(predict)

RMSE: 4.0688


4.068773512816049

In [107]:
cv = cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)
algo.test(testset)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    6.8289  8.3608  8.4316  4.6840  3.8117  6.4234  1.8866  
MAE (testset)     2.1747  2.1785  2.1460  2.0837  1.9424  2.1051  0.0881  
Fit time          0.05    0.03    0.04    0.03    0.03    0.04    0.01    
Test time         0.00    0.00    0.00    0.00    0.00    0.00    0.00    


[Prediction(uid=1490404459978, iid='victoriassecret - 141', r_ui=2.0, est=5, details={'was_impossible': False}),
 Prediction(uid=1497978280616, iid='necessaryclothing - 320', r_ui=1.0, est=1.540877158058814, details={'was_impossible': False}),
 Prediction(uid=1516248416260, iid='sephora - 500', r_ui=4.0, est=4.168304322383113, details={'was_impossible': False}),
 Prediction(uid=1528340569558, iid='kohls - 320', r_ui=1.0, est=1.650160247489883, details={'was_impossible': False}),
 Prediction(uid=1541778813846, iid='piperlime - 144', r_ui=1.0, est=1.5458646197505763, details={'was_impossible': False}),
 Prediction(uid=1542061238350, iid='6pmcom - 210', r_ui=1.0, est=1.9168836490140986, details={'was_impossible': False}),
 Prediction(uid=1534200663687, iid='express - 114', r_ui=3.0, est=1.6308818572189565, details={'was_impossible': False}),
 Prediction(uid=1534459103461, iid='poshmark - 111', r_ui=1.0, est=2.8209632334332797, details={'was_impossible': False}),
 Prediction(uid=1541215258

In [185]:
# x2=x2.fillna(0)
reader2 = Reader(rating_scale=(1,5))
data2 = Dataset.load_from_df(x2,reader)
trainset2, testset2 = train_test_split(data2, test_size=.25, random_state=10)
algo2 = SVD(n_factors=5, random_state=10)
algo2 = algo.fit(trainset2)
predict2 = algo.test(testset2)
accuracy.rmse(predict2)

RMSE: 0.1443


0.14427418217587726

In [126]:
cv2 = cross_validate(algo2, data2, measures=['RMSE', 'MAE'], cv=5, verbose=True)
algo.test(testset2)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.1418  0.1227  0.1664  0.1255  0.1490  0.1411  0.0160  
MAE (testset)     0.0351  0.0319  0.0375  0.0346  0.0354  0.0349  0.0018  
Fit time          0.14    0.09    0.09    0.09    0.09    0.10    0.02    
Test time         0.01    0.01    0.01    0.01    0.01    0.01    0.00    


[Prediction(uid=1549462213888, iid='62715111', r_ui=1.0, est=1.0246504155487735, details={'was_impossible': False}),
 Prediction(uid=1495121490933, iid='2a20aac0', r_ui=1.0, est=1.004060699176792, details={'was_impossible': False}),
 Prediction(uid=1515379240540, iid='2af16ad9', r_ui=1.0, est=1, details={'was_impossible': False}),
 Prediction(uid=1537259845783, iid='becbc828', r_ui=1.0, est=1.0269742400852886, details={'was_impossible': False}),
 Prediction(uid=1504447154564, iid='2a6b830e', r_ui=1.0, est=1.017064698237499, details={'was_impossible': False}),
 Prediction(uid=1515379240540, iid='79adfc3f', r_ui=1.0, est=1.00202234932828, details={'was_impossible': False}),
 Prediction(uid=1537259845783, iid='2477213a', r_ui=1.0, est=1.0246200916152466, details={'was_impossible': False}),
 Prediction(uid=1534445874747, iid='2a80a6be', r_ui=1.0, est=1.023144770464831, details={'was_impossible': False}),
 Prediction(uid=1537259845783, iid='124bcf1b', r_ui=1.0, est=1.016579813212234, detail

In [59]:
cv

{'test_rmse': array([10.05974865,  6.99188515,  3.95033637,  6.09526087,  4.51125928]),
 'test_mae': array([2.36225249, 2.23099655, 1.91466   , 2.07412915, 1.90249346]),
 'fit_time': (0.058667659759521484,
  0.03324127197265625,
  0.03481316566467285,
  0.035155296325683594,
  0.03383517265319824),
 'test_time': (0.0041201114654541016,
  0.004255056381225586,
  0.0044689178466796875,
  0.004040956497192383,
  0.003859996795654297)}

In [132]:
param_grid = {'n_epochs': [5, 10], 'lr_all': [1e-4, 0.001, 0.002, 0.005],
              'reg_all': [0.4, 0.6]}
gs = GridSearchCV(SVD, param_grid, measures=['rmse', 'mae'], cv=3, refit=True)

gs.fit(data)

# best RMSE score
print(gs.best_score['rmse'])

# combination of parameters that gave the best RMSE score
print(gs.best_params['rmse'])

gs.test(testset)

6.453836127419716
{'n_epochs': 10, 'lr_all': 0.005, 'reg_all': 0.4}


[Prediction(uid=1490404459978, iid='victoriassecret - 141', r_ui=2.0, est=5, details={'was_impossible': False}),
 Prediction(uid=1497978280616, iid='necessaryclothing - 320', r_ui=1.0, est=1.940837866943865, details={'was_impossible': False}),
 Prediction(uid=1516248416260, iid='sephora - 500', r_ui=4.0, est=3.4610570784795076, details={'was_impossible': False}),
 Prediction(uid=1528340569558, iid='kohls - 320', r_ui=1.0, est=2.729565602163965, details={'was_impossible': False}),
 Prediction(uid=1541778813846, iid='piperlime - 144', r_ui=1.0, est=1.8622072906817158, details={'was_impossible': False}),
 Prediction(uid=1542061238350, iid='6pmcom - 210', r_ui=1.0, est=2.0197342239577045, details={'was_impossible': False}),
 Prediction(uid=1534200663687, iid='express - 114', r_ui=3.0, est=1.961447820210484, details={'was_impossible': False}),
 Prediction(uid=1534459103461, iid='poshmark - 111', r_ui=1.0, est=2.3683757858992434, details={'was_impossible': False}),
 Prediction(uid=1541215258

In [164]:
param_grid = {'n_epochs': [5, 10], 'lr_all': [1e-4, 0.001, 0.002, 0.005],
              'reg_all': [0.4, 0.6]}
gs = GridSearchCV(SVD, param_grid, measures=['rmse', 'mae'], cv=3, refit="mae")

gs.fit(data2)

# best RMSE score
print(gs.best_score['rmse'])
print(gs.best_score['mae'])



# combination of parameters that gave the best RMSE score
print(gs.best_params['rmse'])
print(gs.best_params['mae'])



gs.test(testset2)

0.14176671440400299
0.034732395115637534
{'n_epochs': 5, 'lr_all': 0.005, 'reg_all': 0.6}
{'n_epochs': 10, 'lr_all': 0.005, 'reg_all': 0.6}


[Prediction(uid=1549462213888, iid='62715111', r_ui=1.0, est=1.0179548627209334, details={'was_impossible': False}),
 Prediction(uid=1495121490933, iid='2a20aac0', r_ui=1.0, est=1.008653904100433, details={'was_impossible': False}),
 Prediction(uid=1515379240540, iid='2af16ad9', r_ui=1.0, est=1.0136092182117067, details={'was_impossible': False}),
 Prediction(uid=1537259845783, iid='becbc828', r_ui=1.0, est=1.023112518242016, details={'was_impossible': False}),
 Prediction(uid=1504447154564, iid='2a6b830e', r_ui=1.0, est=1.0085244333300885, details={'was_impossible': False}),
 Prediction(uid=1515379240540, iid='79adfc3f', r_ui=1.0, est=1.0052913355413415, details={'was_impossible': False}),
 Prediction(uid=1537259845783, iid='2477213a', r_ui=1.0, est=1.0237811710392333, details={'was_impossible': False}),
 Prediction(uid=1534445874747, iid='2a80a6be', r_ui=1.0, est=1.0180500026904022, details={'was_impossible': False}),
 Prediction(uid=1537259845783, iid='124bcf1b', r_ui=1.0, est=1.025

In [165]:
gs.best_estimator["rmse"]

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x11075e550>

In [166]:
gs.best_estimator["mae"]

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x1a232ac668>

In [167]:
gs.best_score

{'rmse': 0.14176671440400299, 'mae': 0.034732395115637534}

In [168]:
gs.best_params

{'rmse': {'n_epochs': 5, 'lr_all': 0.005, 'reg_all': 0.6},
 'mae': {'n_epochs': 10, 'lr_all': 0.005, 'reg_all': 0.6}}

In [172]:
gs.best_estimator["rmse"].fit(trainset2).test(testset2)

[Prediction(uid=1549462213888, iid='62715111', r_ui=1.0, est=1.0067277535362404, details={'was_impossible': False}),
 Prediction(uid=1495121490933, iid='2a20aac0', r_ui=1.0, est=1.0117773990935435, details={'was_impossible': False}),
 Prediction(uid=1515379240540, iid='2af16ad9', r_ui=1.0, est=1.0150817240603396, details={'was_impossible': False}),
 Prediction(uid=1537259845783, iid='becbc828', r_ui=1.0, est=1.0202545982133924, details={'was_impossible': False}),
 Prediction(uid=1504447154564, iid='2a6b830e', r_ui=1.0, est=1.009773171990581, details={'was_impossible': False}),
 Prediction(uid=1515379240540, iid='79adfc3f', r_ui=1.0, est=1.0150817240603396, details={'was_impossible': False}),
 Prediction(uid=1537259845783, iid='2477213a', r_ui=1.0, est=1.0202545982133924, details={'was_impossible': False}),
 Prediction(uid=1534445874747, iid='2a80a6be', r_ui=1.0, est=1.0103357821037335, details={'was_impossible': False}),
 Prediction(uid=1537259845783, iid='124bcf1b', r_ui=1.0, est=1.02

In [170]:
gs.best_estimator["mae"].fit(trainset2).test(testset2)

[Prediction(uid=1549462213888, iid='62715111', r_ui=1.0, est=1.0052220414872048, details={'was_impossible': False}),
 Prediction(uid=1495121490933, iid='2a20aac0', r_ui=1.0, est=1.0119111604238475, details={'was_impossible': False}),
 Prediction(uid=1515379240540, iid='2af16ad9', r_ui=1.0, est=1.006342046573792, details={'was_impossible': False}),
 Prediction(uid=1537259845783, iid='becbc828', r_ui=1.0, est=1.0203022609964436, details={'was_impossible': False}),
 Prediction(uid=1504447154564, iid='2a6b830e', r_ui=1.0, est=1.009192057945203, details={'was_impossible': False}),
 Prediction(uid=1515379240540, iid='79adfc3f', r_ui=1.0, est=1.006342046573792, details={'was_impossible': False}),
 Prediction(uid=1537259845783, iid='2477213a', r_ui=1.0, est=1.0203022609964436, details={'was_impossible': False}),
 Prediction(uid=1534445874747, iid='2a80a6be', r_ui=1.0, est=1.0139961649781664, details={'was_impossible': False}),
 Prediction(uid=1537259845783, iid='124bcf1b', r_ui=1.0, est=1.0203

In [163]:
gs.cv_results

{'split0_test_rmse': array([0.15195546, 0.15201922, 0.1513006 , 0.15132328, 0.15137133,
        0.15146497, 0.15144536, 0.15136976, 0.15200688, 0.15186813,
        0.15132639, 0.15138415, 0.15164988, 0.15134703, 0.15135058,
        0.15147762]),
 'split1_test_rmse': array([0.1326742 , 0.1330479 , 0.13260595, 0.13217257, 0.13218805,
        0.13245215, 0.13247015, 0.13249342, 0.13270432, 0.13218708,
        0.13250124, 0.13250508, 0.1324097 , 0.13210773, 0.13247249,
        0.13234713]),
 'split2_test_rmse': array([0.14209846, 0.14227852, 0.14187969, 0.14201871, 0.14179959,
        0.14188676, 0.14185828, 0.14173885, 0.14203093, 0.14208615,
        0.14175386, 0.14161662, 0.14179877, 0.14170688, 0.14178625,
        0.14168862]),
 'mean_test_rmse': array([0.14224271, 0.14244854, 0.14192875, 0.14183819, 0.14178632,
        0.14193463, 0.14192459, 0.14186734, 0.14224737, 0.14204712,
        0.1418605 , 0.14183529, 0.14195278, 0.14172055, 0.14186977,
        0.14183779]),
 'std_test_rmse': 

In [179]:
param_grid = {'n_epochs': [5, 10], 'lr_all': [1e-4, 0.001, 0.002, 0.005],
              'reg_all': [0.4, 0.6]}
gs = GridSearchCV(SVDpp, param_grid, measures=['rmse', 'mae'], cv=3, refit=True, n_jobs=-1)

gs.fit(data2)

# best RMSE score
print(gs.best_score['rmse'])
print(gs.best_score['mae'])



# combination of parameters that gave the best RMSE score
print(gs.best_params['rmse'])
print(gs.best_params['mae'])



gs.test(testset2)

0.1417781740828751
0.03471592616520962
{'n_epochs': 10, 'lr_all': 0.001, 'reg_all': 0.6}
{'n_epochs': 5, 'lr_all': 0.001, 'reg_all': 0.4}


[Prediction(uid=1549462213888, iid='62715111', r_ui=1.0, est=1, details={'was_impossible': False}),
 Prediction(uid=1495121490933, iid='2a20aac0', r_ui=1.0, est=1.0146404422753463, details={'was_impossible': False}),
 Prediction(uid=1515379240540, iid='2af16ad9', r_ui=1.0, est=1, details={'was_impossible': False}),
 Prediction(uid=1537259845783, iid='becbc828', r_ui=1.0, est=1.023243753953332, details={'was_impossible': False}),
 Prediction(uid=1504447154564, iid='2a6b830e', r_ui=1.0, est=1, details={'was_impossible': False}),
 Prediction(uid=1515379240540, iid='79adfc3f', r_ui=1.0, est=1.0150872315475716, details={'was_impossible': False}),
 Prediction(uid=1537259845783, iid='2477213a', r_ui=1.0, est=1.0137699478568842, details={'was_impossible': False}),
 Prediction(uid=1534445874747, iid='2a80a6be', r_ui=1.0, est=1.0170936149459577, details={'was_impossible': False}),
 Prediction(uid=1537259845783, iid='124bcf1b', r_ui=1.0, est=1.0172492521106327, details={'was_impossible': False}),

In [180]:
param_grid = {'n_epochs': [5, 10], 'lr_all': [1e-4, 0.001, 0.002, 0.005],
              'reg_all': [0.4, 0.6]}
gs = GridSearchCV(SVD, param_grid, measures=['rmse', 'mae'], cv=3, refit=True, n_jobs=-1)

gs.fit(data2)

# best RMSE score
print(gs.best_score['rmse'])
print(gs.best_score['mae'])



# combination of parameters that gave the best RMSE score
print(gs.best_params['rmse'])
print(gs.best_params['mae'])



gs.test(testset2)

0.14065022422763077
0.03458817993650201
{'n_epochs': 10, 'lr_all': 0.002, 'reg_all': 0.6}
{'n_epochs': 5, 'lr_all': 0.005, 'reg_all': 0.4}


[Prediction(uid=1549462213888, iid='62715111', r_ui=1.0, est=1, details={'was_impossible': False}),
 Prediction(uid=1495121490933, iid='2a20aac0', r_ui=1.0, est=1.0130130322809616, details={'was_impossible': False}),
 Prediction(uid=1515379240540, iid='2af16ad9', r_ui=1.0, est=1.0117285325486207, details={'was_impossible': False}),
 Prediction(uid=1537259845783, iid='becbc828', r_ui=1.0, est=1.0210944735668999, details={'was_impossible': False}),
 Prediction(uid=1504447154564, iid='2a6b830e', r_ui=1.0, est=1, details={'was_impossible': False}),
 Prediction(uid=1515379240540, iid='79adfc3f', r_ui=1.0, est=1.0803196398607906, details={'was_impossible': False}),
 Prediction(uid=1537259845783, iid='2477213a', r_ui=1.0, est=1.0206801678629278, details={'was_impossible': False}),
 Prediction(uid=1534445874747, iid='2a80a6be', r_ui=1.0, est=1, details={'was_impossible': False}),
 Prediction(uid=1537259845783, iid='124bcf1b', r_ui=1.0, est=1.0203255492697847, details={'was_impossible': False})

In [181]:
param_grid = {'n_epochs': [5, 10], 'lr_all': [1e-4, 0.001, 0.002, 0.005],
              'reg_all': [0.4, 0.6]}
gs = GridSearchCV(SVD, param_grid, measures=['rmse', 'mae'], cv=3, refit=True, n_jobs=-1)

gs.fit(data)

# best RMSE score
print(gs.best_score['rmse'])
print(gs.best_score['mae'])



# combination of parameters that gave the best RMSE score
print(gs.best_params['rmse'])
print(gs.best_params['mae'])



gs.test(testset)

6.441544952452877
2.1453335605100476
{'n_epochs': 10, 'lr_all': 0.005, 'reg_all': 0.4}
{'n_epochs': 10, 'lr_all': 0.005, 'reg_all': 0.6}


[Prediction(uid=1490404459978, iid='victoriassecret - 141', r_ui=2.0, est=5, details={'was_impossible': False}),
 Prediction(uid=1497978280616, iid='necessaryclothing - 320', r_ui=1.0, est=1.9359429099073124, details={'was_impossible': False}),
 Prediction(uid=1516248416260, iid='sephora - 500', r_ui=4.0, est=3.437024463627714, details={'was_impossible': False}),
 Prediction(uid=1528340569558, iid='kohls - 320', r_ui=1.0, est=2.635346538975453, details={'was_impossible': False}),
 Prediction(uid=1541778813846, iid='piperlime - 144', r_ui=1.0, est=1.8647071435982658, details={'was_impossible': False}),
 Prediction(uid=1542061238350, iid='6pmcom - 210', r_ui=1.0, est=2.008345050389305, details={'was_impossible': False}),
 Prediction(uid=1534200663687, iid='express - 114', r_ui=3.0, est=2.015228298120966, details={'was_impossible': False}),
 Prediction(uid=1534459103461, iid='poshmark - 111', r_ui=1.0, est=2.4869792599619, details={'was_impossible': False}),
 Prediction(uid=1541215258673,

In [205]:
param_grid = {'n_epochs': [5, 10], 'lr_all': [1e-4, 0.001, 0.002, 0.005],
            'reg_all': [0.4, 0.6]}
gs = GridSearchCV(SVDpp, param_grid, measures=['rmse', 'mae'], cv=3, refit=True, n_jobs=-1)

gs.fit(data)

# best RMSE score
print(gs.best_score['rmse'])
print(gs.best_score['mae'])



# combination of parameters that gave the best RMSE score
print(gs.best_params['rmse'])
print(gs.best_params['mae'])



gs.test(anti1)

6.367531928780601
2.1333235474854453
{'n_epochs': 10, 'lr_all': 0.005, 'reg_all': 0.6}
{'n_epochs': 10, 'lr_all': 0.005, 'reg_all': 0.4}


[Prediction(uid=1485369350003, iid='abercrombiefitch - 111', r_ui=2.736004714201532, est=3.1634558614982526, details={'was_impossible': False}),
 Prediction(uid=1485369350003, iid='anthropologie - 141', r_ui=2.736004714201532, est=3.61794588525788, details={'was_impossible': False}),
 Prediction(uid=1485369350003, iid='asos - 111', r_ui=2.736004714201532, est=3.0135622556265673, details={'was_impossible': False}),
 Prediction(uid=1485369350003, iid='asos - 430', r_ui=2.736004714201532, est=2.940082626205258, details={'was_impossible': False}),
 Prediction(uid=1485369350003, iid='athleta - 160', r_ui=2.736004714201532, est=4.086858540626914, details={'was_impossible': False}),
 Prediction(uid=1485369350003, iid='baublebar - 410', r_ui=2.736004714201532, est=3.3895774817674837, details={'was_impossible': False}),
 Prediction(uid=1485369350003, iid='bonobos - 111', r_ui=2.736004714201532, est=3.123343279335318, details={'was_impossible': False}),
 Prediction(uid=1485369350003, iid='bonobo

In [202]:
xx = data.df


In [203]:
xx[xx.user_id == 1485369350003]

Unnamed: 0,user_id,store_cat,rating
0,1485369350003,dsw - 210,2.0
1,1485369350003,hm - 124,1.0
2,1485369350003,jcrewfactory - 111,1.0
3,1485369350003,jcrewfactory - 114,1.0
4,1485369350003,loft - 111,5.0
5,1485369350003,loft - 112,3.0
6,1485369350003,loft - 114,15.0
7,1485369350003,loft - 123,6.0
8,1485369350003,loft - 124,14.0
9,1485369350003,loft - 141,6.0


In [208]:
tset1.global_mean

2.736004714201532

In [209]:
anti2 = tset1.build_anti_testset(fill=0)

In [213]:
gs.test(anti2)

[Prediction(uid=1485369350003, iid='abercrombiefitch - 111', r_ui=0.0, est=3.1634558614982526, details={'was_impossible': False}),
 Prediction(uid=1485369350003, iid='anthropologie - 141', r_ui=0.0, est=3.61794588525788, details={'was_impossible': False}),
 Prediction(uid=1485369350003, iid='asos - 111', r_ui=0.0, est=3.0135622556265673, details={'was_impossible': False}),
 Prediction(uid=1485369350003, iid='asos - 430', r_ui=0.0, est=2.940082626205258, details={'was_impossible': False}),
 Prediction(uid=1485369350003, iid='athleta - 160', r_ui=0.0, est=4.086858540626914, details={'was_impossible': False}),
 Prediction(uid=1485369350003, iid='baublebar - 410', r_ui=0.0, est=3.3895774817674837, details={'was_impossible': False}),
 Prediction(uid=1485369350003, iid='bonobos - 111', r_ui=0.0, est=3.123343279335318, details={'was_impossible': False}),
 Prediction(uid=1485369350003, iid='bonobos - 112', r_ui=0.0, est=3.1204218705659046, details={'was_impossible': False}),
 Prediction(uid=14

In [217]:
test1 = tset1.build_testset()

In [218]:
gs.test(test1)

[Prediction(uid=1485369350003, iid='dsw - 210', r_ui=2.0, est=2.65308386627518, details={'was_impossible': False}),
 Prediction(uid=1485369350003, iid='hm - 124', r_ui=1.0, est=2.902641734295128, details={'was_impossible': False}),
 Prediction(uid=1485369350003, iid='jcrewfactory - 111', r_ui=1.0, est=2.8064369017860145, details={'was_impossible': False}),
 Prediction(uid=1485369350003, iid='jcrewfactory - 114', r_ui=1.0, est=2.83497653049924, details={'was_impossible': False}),
 Prediction(uid=1485369350003, iid='loft - 111', r_ui=5.0, est=4.491460566227094, details={'was_impossible': False}),
 Prediction(uid=1485369350003, iid='loft - 112', r_ui=3.0, est=3.2943118224111467, details={'was_impossible': False}),
 Prediction(uid=1485369350003, iid='loft - 114', r_ui=15.0, est=4.546807721038849, details={'was_impossible': False}),
 Prediction(uid=1485369350003, iid='loft - 123', r_ui=6.0, est=3.394562676780339, details={'was_impossible': False}),
 Prediction(uid=1485369350003, iid='loft -