# Bradley Terry example notebook
This notebook fits a simple Bradley Terry model without covariates on the 2016 NBA season and predicts the encounters of the 2017 season.

In [1]:
import sys
sys.path.insert(0, "../..")
from skpref.random_utility import BradleyTerry
from skpref.task import PairwiseComparisonTask
from skpref.base import ClassificationReducer
import pandas as pd
import numpy as np
from skpref.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score

In [2]:
NBA_file_loc = 'data/NBA_matches.csv'

In [3]:
NBA_results = pd.read_csv(NBA_file_loc)
season_split = 2016
train_data = NBA_results[NBA_results.season_start == season_split].copy()
test_data = NBA_results[NBA_results.season_start == season_split+1].copy()

In [4]:
NBA_results_task_train = PairwiseComparisonTask(
    primary_table=train_data,
    primary_table_alternatives_names=['team1', 'team2'],
    primary_table_target_name ='team1_wins',
    target_column_correspondence='team1',
    features_to_use=None
)

NBA_results_task_predict = PairwiseComparisonTask(
    primary_table=test_data,
    primary_table_alternatives_names=['team1', 'team2'],
    primary_table_target_name ='team1_wins',
    target_column_correspondence='team1',
    features_to_use=None
)

## Let's fit a reduction to logistic regression first

In [5]:
my_log_red = ClassificationReducer(LogisticRegression())
my_log_red.fit_task(NBA_results_task_train)
my_log_red.predict_task(NBA_results_task_predict)



array([0, 0, 0, ..., 1, 1, 0], dtype=int64)

In [6]:
# All this learns so far is the home team advantage, since its the only covariate in the test_data table
my_log_red.predict_proba_task(NBA_results_task_predict)

array([[0.58323397, 0.41676603],
       [0.58323397, 0.41676603],
       [0.58323397, 0.41676603],
       ...,
       [0.41684703, 0.58315297],
       [0.41684703, 0.58315297],
       [0.58323397, 0.41676603]])

## Now let's fit a Bradley Terry model

In [7]:
mybt = BradleyTerry(method='BFGS', alpha=1e-5)
mybt.fit_task(NBA_results_task_train)
mybt.rank_entities(ascending=False)

['Golden State Warriors',
 'San Antonio Spurs',
 'Houston Rockets',
 'Boston Celtics',
 'Los Angeles Clippers',
 'Utah Jazz',
 'Cleveland Cavaliers',
 'Toronto Raptors',
 'Washington Wizards',
 'Oklahoma City Thunder',
 'Memphis Grizzlies',
 'Atlanta Hawks',
 'Portland Trail Blazers',
 'Milwaukee Bucks',
 'Indiana Pacers',
 'Miami Heat',
 'Chicago Bulls',
 'Denver Nuggets',
 'Detroit Pistons',
 'Charlotte Hornets',
 'New Orleans Pelicans',
 'Dallas Mavericks',
 'Sacramento Kings',
 'Minnesota Timberwolves',
 'New York Knicks',
 'Orlando Magic',
 'Philadelphia 76ers',
 'Los Angeles Lakers',
 'Phoenix Suns',
 'Brooklyn Nets']

In [8]:
NBA_results_task_predict.primary_table.head()

Unnamed: 0,team1,team2,season_start,team1_wins,team_1_home
7380,Atlanta Hawks,Dallas Mavericks,2017,1,0
7381,Atlanta Hawks,Charlotte Hornets,2017,0,0
7382,Atlanta Hawks,Brooklyn Nets,2017,0,0
7383,Atlanta Hawks,Miami Heat,2017,0,0
7384,Atlanta Hawks,Chicago Bulls,2017,0,0


In [9]:
NBA_results_task_predict.primary_table.tail()

Unnamed: 0,team1,team2,season_start,team1_wins,team_1_home
9835,Washington Wizards,Houston Rockets,2017,0,0
9836,Washington Wizards,Cleveland Cavaliers,2017,0,0
9837,Washington Wizards,Atlanta Hawks,2017,0,1
9838,Washington Wizards,Boston Celtics,2017,1,1
9839,Washington Wizards,Orlando Magic,2017,0,0


In [10]:
mybt.predict_proba_task(NBA_results_task_predict)

array([0.60677663, 0.58070679, 0.77310272, ..., 0.57782378, 0.44533727,
       0.7343335 ])

In [11]:
mybt.predict_choice_task(NBA_results_task_predict)

array(['Atlanta Hawks', 'Atlanta Hawks', 'Atlanta Hawks', ...,
       'Washington Wizards', 'Boston Celtics', 'Washington Wizards'],
      dtype=object)

In [12]:
mybt.predict_task(NBA_results_task_predict)

array([1, 1, 1, ..., 1, 0, 1])

## Run model with team salary budget

In [13]:
NBA_team_salary_budget = pd.read_csv('data/team_salary_budgets.csv')
NBA_team_salary_budget.head()

Unnamed: 0,team,season_start,salary
0,Atlanta Hawks,2014,58337671
1,Atlanta Hawks,2015,71378126
2,Atlanta Hawks,2016,95957250
3,Atlanta Hawks,2017,99375302
4,Boston Celtics,2014,59418142


In [14]:
NBA_results_task_train = PairwiseComparisonTask(
    primary_table=train_data,
    primary_table_alternatives_names=['team1', 'team2'],
    primary_table_target_name ='team1_wins',
    target_column_correspondence='team1',
    features_to_use=['salary'],
    secondary_table=NBA_team_salary_budget,
    secondary_to_primary_link={
        'team': ['team1', 'team2'],
        'season_start': 'season_start'
    })

NBA_results_task_predict = PairwiseComparisonTask(
    primary_table=test_data,
    primary_table_alternatives_names=['team1', 'team2'],
    primary_table_target_name ='team1_wins',
    target_column_correspondence='team1',
    features_to_use=['salary'],
    secondary_table=NBA_team_salary_budget,
    secondary_to_primary_link={
        'team': ['team1', 'team2'],
        'season_start': 'season_start'
    })

# Let's fit a reduction to logistic regression with the salary covariate

In [15]:
my_log_red = ClassificationReducer(LogisticRegression())
my_log_red.fit_task(NBA_results_task_train)
my_log_red.predict_task(NBA_results_task_predict)



array([1, 0, 1, ..., 1, 1, 1], dtype=int64)

In [16]:
# All this learns so far is the home team advantage, since its the only covariate in the test_data table
my_log_red.predict_proba_task(NBA_results_task_predict)

array([[0.44748774, 0.55251226],
       [0.56864298, 0.43135702],
       [0.48680567, 0.51319433],
       ...,
       [0.40573964, 0.59426036],
       [0.46504682, 0.53495318],
       [0.39150446, 0.60849554]])

# Bradley Terry model with salary covariate

In [17]:
mybt = BradleyTerry(method='BFGS', alpha=1e-5)
mybt.fit_task(NBA_results_task_train)
mybt.rank_entities(ascending=False)

array(['Golden State Warriors', 'San Antonio Spurs', 'Houston Rockets',
       'Utah Jazz', 'Boston Celtics', 'Oklahoma City Thunder',
       'Washington Wizards', 'Toronto Raptors', 'Los Angeles Clippers',
       'Denver Nuggets', 'Atlanta Hawks', 'Indiana Pacers',
       'Chicago Bulls', 'Cleveland Cavaliers', 'Memphis Grizzlies',
       'Miami Heat', 'Milwaukee Bucks', 'Charlotte Hornets',
       'Minnesota Timberwolves', 'Portland Trail Blazers',
       'New Orleans Pelicans', 'Sacramento Kings', 'Detroit Pistons',
       'Dallas Mavericks', 'Philadelphia 76ers', 'New York Knicks',
       'Phoenix Suns', 'Los Angeles Lakers', 'Orlando Magic',
       'Brooklyn Nets'], dtype=object)

In [18]:
mybt.predict_proba_task(NBA_results_task_predict)

array([0.69081236, 0.47898395, 0.74603095, ..., 0.65408203, 0.42937596,
       0.82114576])

In [19]:
mybt.predict_choice_task(NBA_results_task_predict)

array(['Atlanta Hawks', 'Charlotte Hornets', 'Atlanta Hawks', ...,
       'Washington Wizards', 'Boston Celtics', 'Washington Wizards'],
      dtype=object)

In [20]:
mybt.predict_task(NBA_results_task_predict)

array([1, 0, 1, ..., 1, 0, 1])

# Example using GridSearchCV()

In [21]:
to_tune = {'alpha': [1, 2, 4], 'method': ['BFGS']}
gs_bt = GridSearchCV(BradleyTerry(), to_tune,  cv=3, scoring='neg_log_loss')
gs_bt.fit_task(NBA_results_task_train)
gs_bt.inspect_results()

The model with the best parameters was:
BradleyTerry(alpha=2, initial_params=None, max_iter=None, method='BFGS',
       tol=1e-05)
With a score of -0.6265008194657992
All the trials results summarised in descending score
   alpha method  mean_test_score
1      2   BFGS        -0.626501
0      1   BFGS        -0.626742
2      4   BFGS        -0.628853


In [22]:
# Showing that sklearn.metrics works also
to_tune = {'alpha': [1, 2, 4], 'method': ['BFGS']}
gs_bt = GridSearchCV(BradleyTerry(), to_tune,  cv=3, scoring=f1_score)
gs_bt.fit_task(NBA_results_task_train)
gs_bt.inspect_results()

The model with the best parameters was:
BradleyTerry(alpha=4, initial_params=None, max_iter=None, method='BFGS',
       tol=1e-05)
With a score of 0.6337744652191033
All the trials results summarised in descending score
   alpha method  mean_test_score
2      4   BFGS         0.633774
1      2   BFGS         0.631136
0      1   BFGS         0.630085


In [23]:
to_tune = {'C': [0.5, 1, 2, 4, 8], 'solver': ['saga'], 'penalty': ['l1','l2'], 'fit_intercept': [True, False]}
gs_lr = GridSearchCV(ClassificationReducer(LogisticRegression()), to_tune,  cv=3, scoring='neg_log_loss')
gs_lr.fit_task(NBA_results_task_train)
gs_lr.inspect_results()

The model with the best parameters was:
ClassificationReducer(model=LogisticRegression(C=4, class_weight=None, dual=False, fit_intercept=False,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='saga',
          tol=0.0001, verbose=0, warm_start=False))
With a score of -0.6865146258304833
All the trials results summarised in descending score
    model__C  model__fit_intercept model__penalty model__solver  \
15       4.0                 False             l2          saga   
11       2.0                 False             l2          saga   
9        2.0                  True             l2          saga   
0        0.5                  True             l1          saga   
17       8.0                  True             l2          saga   
12       4.0                  True             l1          saga   
4        1.0                  True             l1          saga   
3        0.5                 False        

In [24]:
gs_lr.predict_task(NBA_results_task_predict)

array([1, 0, 1, ..., 1, 1, 1], dtype=int64)

In [25]:
gs_lr.predict_proba_task(NBA_results_task_predict)

array([[0.44759906, 0.55240094],
       [0.56879714, 0.43120286],
       [0.4869326 , 0.5130674 ],
       ...,
       [0.40586469, 0.59413531],
       [0.46519783, 0.53480217],
       [0.39162283, 0.60837717]])

In [26]:
gs_bt.predict_proba_task(NBA_results_task_predict)

array([0.66884072, 0.47279872, 0.70427284, ..., 0.64116639, 0.44976495,
       0.78764947])

In [27]:
gs_bt.rank_entities(ascending=False)

array(['Golden State Warriors', 'San Antonio Spurs', 'Houston Rockets',
       'Utah Jazz', 'Boston Celtics', 'Oklahoma City Thunder',
       'Washington Wizards', 'Toronto Raptors', 'Los Angeles Clippers',
       'Denver Nuggets', 'Atlanta Hawks', 'Indiana Pacers',
       'Chicago Bulls', 'Cleveland Cavaliers', 'Memphis Grizzlies',
       'Miami Heat', 'Milwaukee Bucks', 'Charlotte Hornets',
       'Minnesota Timberwolves', 'Portland Trail Blazers',
       'Detroit Pistons', 'New Orleans Pelicans', 'Sacramento Kings',
       'Philadelphia 76ers', 'Dallas Mavericks', 'New York Knicks',
       'Phoenix Suns', 'Los Angeles Lakers', 'Orlando Magic',
       'Brooklyn Nets'], dtype=object)