# Bradley Terry example notebook
This notebook fits a simple Bradley Terry model without covariates on the 2016 NBA season and predicts the encounters of the 2017 season.

In [1]:
import sys
sys.path.insert(0, "..")
from random_utility import BradleyTerry
from task import ChoiceTask
import pandas as pd
import numpy as np
from model_selection import GridSearchCV

In [2]:
NBA_file_loc = 'data/NBA_matches.csv'

In [3]:
NBA_results = pd.read_csv(NBA_file_loc)
season_split = 2016
train_data = NBA_results[NBA_results.season_start == season_split].copy()
test_data = NBA_results[NBA_results.season_start == season_split+1].copy()

In [4]:
NBA_results_task_train = ChoiceTask(primary_table=train_data,
                                    primary_table_alternatives_names=['team1', 'team2'],
                                    primary_table_target_name ='team1_wins',
                                    target_column_correspondence='team1',
                                    features_to_use=None)

NBA_results_task_predict = ChoiceTask(primary_table=test_data,
                                      primary_table_alternatives_names=['team1', 'team2'],
                                      primary_table_target_name ='team1_wins',
                                      target_column_correspondence='team1',
                                      features_to_use=None)

In [5]:
mybt = BradleyTerry(method='BFGS', alpha=1e-5)
mybt.fit_task(NBA_results_task_train)
mybt.rank_entities(ascending=False)

['Golden State Warriors',
 'San Antonio Spurs',
 'Houston Rockets',
 'Boston Celtics',
 'Los Angeles Clippers',
 'Utah Jazz',
 'Cleveland Cavaliers',
 'Toronto Raptors',
 'Washington Wizards',
 'Oklahoma City Thunder',
 'Memphis Grizzlies',
 'Atlanta Hawks',
 'Portland Trail Blazers',
 'Milwaukee Bucks',
 'Indiana Pacers',
 'Miami Heat',
 'Chicago Bulls',
 'Denver Nuggets',
 'Detroit Pistons',
 'Charlotte Hornets',
 'New Orleans Pelicans',
 'Dallas Mavericks',
 'Sacramento Kings',
 'Minnesota Timberwolves',
 'New York Knicks',
 'Orlando Magic',
 'Philadelphia 76ers',
 'Los Angeles Lakers',
 'Phoenix Suns',
 'Brooklyn Nets']

In [6]:
NBA_results_task_predict.primary_table.head()

Unnamed: 0,team1,team2,season_start,team1_wins,team_1_home
7380,Atlanta Hawks,Dallas Mavericks,2017,1,0
7381,Atlanta Hawks,Charlotte Hornets,2017,0,0
7382,Atlanta Hawks,Brooklyn Nets,2017,0,0
7383,Atlanta Hawks,Miami Heat,2017,0,0
7384,Atlanta Hawks,Chicago Bulls,2017,0,0


In [7]:
NBA_results_task_predict.primary_table.tail()

Unnamed: 0,team1,team2,season_start,team1_wins,team_1_home
9835,Washington Wizards,Houston Rockets,2017,0,0
9836,Washington Wizards,Cleveland Cavaliers,2017,0,0
9837,Washington Wizards,Atlanta Hawks,2017,0,1
9838,Washington Wizards,Boston Celtics,2017,1,1
9839,Washington Wizards,Orlando Magic,2017,0,0


In [8]:
mybt.predict_proba_task(NBA_results_task_predict)

array([0.60677663, 0.58070679, 0.77310272, ..., 0.57782378, 0.44533727,
       0.7343335 ])

In [9]:
mybt.predict_choice_task(NBA_results_task_predict)

array(['Atlanta Hawks', 'Atlanta Hawks', 'Atlanta Hawks', ...,
       'Washington Wizards', 'Boston Celtics', 'Washington Wizards'],
      dtype=object)

In [10]:
mybt.predict_task(NBA_results_task_predict)

array([1, 1, 1, ..., 1, 0, 1])

## Run model with team salary budget

In [11]:
NBA_team_salary_budget = pd.read_csv('data/team_salary_budgets.csv')
NBA_team_salary_budget.head()

Unnamed: 0,team,season_start,salary
0,Atlanta Hawks,2014,58337671
1,Atlanta Hawks,2015,71378126
2,Atlanta Hawks,2016,95957250
3,Atlanta Hawks,2017,99375302
4,Boston Celtics,2014,59418142


In [12]:
NBA_results_task_train = ChoiceTask(primary_table=train_data,
                                    primary_table_alternatives_names=['team1', 'team2'],
                                    primary_table_target_name ='team1_wins',
                                    target_column_correspondence='team1',
                                    features_to_use=['salary'],
                                    secondary_table=NBA_team_salary_budget,
                                    secondary_to_primary_link={'team': ['team1', 'team2'], 'season_start': 'season_start'})

NBA_results_task_predict = ChoiceTask(primary_table=test_data,
                                      primary_table_alternatives_names=['team1', 'team2'],
                                      primary_table_target_name ='team1_wins',
                                      target_column_correspondence='team1',
                                      features_to_use=['salary'],
                                      secondary_table=NBA_team_salary_budget,
                                      secondary_to_primary_link={'team': ['team1', 'team2'], 'season_start': 'season_start'})

In [13]:
mybt = BradleyTerry(method='BFGS', alpha=1e-5)
mybt.fit_task(NBA_results_task_train)
mybt.rank_entities(ascending=False)

array(['Golden State Warriors', 'San Antonio Spurs', 'Houston Rockets',
       'Utah Jazz', 'Boston Celtics', 'Oklahoma City Thunder',
       'Washington Wizards', 'Toronto Raptors', 'Los Angeles Clippers',
       'Denver Nuggets', 'Atlanta Hawks', 'Indiana Pacers',
       'Chicago Bulls', 'Cleveland Cavaliers', 'Memphis Grizzlies',
       'Miami Heat', 'Milwaukee Bucks', 'Charlotte Hornets',
       'Minnesota Timberwolves', 'Portland Trail Blazers',
       'New Orleans Pelicans', 'Sacramento Kings', 'Detroit Pistons',
       'Dallas Mavericks', 'Philadelphia 76ers', 'New York Knicks',
       'Phoenix Suns', 'Los Angeles Lakers', 'Orlando Magic',
       'Brooklyn Nets'], dtype=object)

In [14]:
mybt.predict_proba_task(NBA_results_task_predict)

array([0.69081236, 0.47898395, 0.74603095, ..., 0.65408203, 0.42937596,
       0.82114576])

In [15]:
mybt.predict_choice_task(NBA_results_task_predict)

array(['Atlanta Hawks', 'Charlotte Hornets', 'Atlanta Hawks', ...,
       'Washington Wizards', 'Boston Celtics', 'Washington Wizards'],
      dtype=object)

In [16]:
mybt.predict_task(NBA_results_task_predict)

array([1, 0, 1, ..., 1, 0, 1])

# Example using GridSearchCV()

In [17]:
to_tune = {'alpha': [1, 2, 4], 'method': ['BFGS']}
gs_bt = GridSearchCV(BradleyTerry(), to_tune,  cv=3)
gs_bt.fit_task(NBA_results_task_train)
gs_bt.inspect_results()

The model with the best parameters was:
BradleyTerry(alpha=2, initial_params=None, max_iter=None, method='BFGS',
       tol=1e-05)
With a score of -0.6265008194657992
All the trials results summarised in descending score
   alpha method  mean_test_score
1      2   BFGS        -0.626501
0      1   BFGS        -0.626742
2      4   BFGS        -0.628853


In [18]:
gs_bt.predict_proba_task(NBA_results_task_predict)

array([0.67872102, 0.47571081, 0.72269123, ..., 0.64673093, 0.4409164 ,
       0.80274851])

In [19]:
gs_bt.rank_entities(ascending=False)

array(['Golden State Warriors', 'San Antonio Spurs', 'Houston Rockets',
       'Utah Jazz', 'Boston Celtics', 'Oklahoma City Thunder',
       'Washington Wizards', 'Toronto Raptors', 'Los Angeles Clippers',
       'Denver Nuggets', 'Atlanta Hawks', 'Indiana Pacers',
       'Chicago Bulls', 'Cleveland Cavaliers', 'Memphis Grizzlies',
       'Miami Heat', 'Milwaukee Bucks', 'Charlotte Hornets',
       'Minnesota Timberwolves', 'Portland Trail Blazers',
       'New Orleans Pelicans', 'Detroit Pistons', 'Sacramento Kings',
       'Dallas Mavericks', 'Philadelphia 76ers', 'New York Knicks',
       'Phoenix Suns', 'Los Angeles Lakers', 'Orlando Magic',
       'Brooklyn Nets'], dtype=object)