In [1]:
import pandas as pd
import sys
sys.path.insert(0, "../..")
from skpref.random_utility import BradleyTerry
from skpref.task import ChoiceTask
from sklearn.model_selection import train_test_split
import pickle
from collections import Counter
from skpref.model_selection import GridSearchCV

In [2]:
with open('../examples/data/product_choices.pickle', 'rb') as handle:
    choice_data = pickle.load(handle)
    
with open('../examples/data/product_info.pickle', 'rb') as handle:
    product_data = pickle.load(handle)

In [3]:
choice_data.head()

Unnamed: 0,STORE_ID,BASKET_ID,choice,alternatives
0,286,29483925362,[6396581],"[5582789, 5584007, 951703, 6396581]"
1,286,29773175255,[5584007],"[5582789, 5584007, 951703, 6396581]"
2,286,30673440294,[5584007],"[5582789, 5584007, 951703, 6396581]"
3,286,32269485930,[951703],"[5582789, 5584007, 951703, 6396581]"
4,286,40876776136,[5582789],"[5582789, 5584007, 951703, 6396581]"


In [4]:
product_data.head()

Unnamed: 0,PRODUCT_ID,prod_size,price,price_per_size
0,849098,7.0,1.02195,0.145993
1,951703,4.0,0.568998,0.142249
2,5578643,6.0,0.393425,0.065571
3,5582789,6.0,0.392923,0.065487
4,5584007,6.0,0.393564,0.065594


In [5]:
train, test = train_test_split(choice_data, random_state=1, test_size=0.1)

In [6]:
products_bought_train = ChoiceTask(train, 'alternatives', 'choice', features_to_use=['price_per_size', 'prod_size'], secondary_table=product_data,
                             secondary_to_primary_link={"PRODUCT_ID": ['alternatives', 'choice']})

products_bought_test = ChoiceTask(test, 'alternatives', 'choice', features_to_use=['price_per_size', 'prod_size'], secondary_table=product_data,
                             secondary_to_primary_link={"PRODUCT_ID": ['alternatives', 'choice']})

In [7]:
mybt = BradleyTerry(method='BFGS', alpha=1e-5)
mybt.fit_task(products_bought_train)

In [8]:
# find most popular product ids
counter = Counter()
j = []
for i in choice_data.choice.values:
    j = j + [k for k in i]
for z in j:
    counter[z] += 1

print('Most popular product_ids based on amounts purchased')
print(counter.most_common())
print("Bradley Terry rank of items given the covariates")
print(mybt.rank_entities(ascending=False))

Most popular product_ids based on amounts purchased
[(5584007, 626), (5582789, 503), (5585727, 439), (5586076, 331), (951703, 266), (5584100, 248), (849098, 219), (5578643, 210), (6396581, 156), (5591746, 2)]
Bradley Terry rank of items given the covariates
[5584007 5582789  951703 5585727 5586076  849098 5578643 5584100 6396581
 5591746]


In [23]:
test

Unnamed: 0,STORE_ID,BASKET_ID,choice,alternatives
1204,367,40618321983,[5584007],"[5582789, 5584007, 5586076, 5578643, 5584100, ..."
50,292,41573721394,[5584007],"[5582789, 5584007, 5586076, 5578643, 5584100, ..."
812,346,31623965995,"[5584007, 5585727]","[5582789, 5584007, 5586076, 951703, 6396581, 5..."
1197,367,35688388981,"[5584007, 5585727]","[5582789, 5584007, 5586076, 5578643, 5584100, ..."
920,361,29035745176,[5586076],"[5582789, 5584007, 5586076, 5578643, 5584100, ..."
1606,401,30033682934,[5584007],"[5582789, 5584007, 5586076, 5578643, 5584100, ..."
540,327,40186485704,[5585727],"[5584007, 5586076, 5584100, 951703, 6396581, 8..."
397,319,31769262113,[5582789],"[5582789, 5584007, 5586076, 5584100, 951703, 6..."
970,362,41466426671,[951703],"[5582789, 5584007, 5586076, 5578643, 5584100, ..."
936,361,31687291076,[6396581],"[5582789, 5584007, 5586076, 5578643, 5584100, ..."


In [22]:
mybt.predict_proba_task(products_bought_test, outcome=5584007)

In [10]:
mybt.predict_choice_task(products_bought_test)

array([5584007, 5584007, 5584007, ...,  951703,  849098, 5585727],
      dtype=int64)

In [9]:
mybt.predict_task(products_bought_test)

ValueError: cannot label index with a null key

# Example using Grid Search

In [None]:
products_bought_train = ChoiceTask(train, 'alternatives', 'choice', features_to_use=None)
products_bought_test = ChoiceTask(test, 'alternatives', 'choice', features_to_use=None)
to_tune = {'alpha': [0.1, 0.5, 1], 'method': ['BFGS', 'Newton-CG']}
gs_bt = GridSearchCV(BradleyTerry(), to_tune,  cv=3)
gs_bt.fit_task(products_bought_train)
gs_bt.inspect_results()

In [None]:
gs_bt.rank_entities(ascending=False)

In [None]:
gs_bt.predict_proba_task(products_bought_test)