In [1]:
import json
import numpy as np
import os
import pandas as pd
import pickle
from sklearn.model_selection import train_test_split

In [2]:
_RANDOM_STATE = 1

# Data

In [3]:
comparisons = pickle.load(open('data/comparisons_berlin.p', 'rb'))
print(comparisons.shape)

(7281, 12)


Split data into training and testing

In [4]:
X_train, X_test, = train_test_split(comparisons, test_size=0.15, random_state=_RANDOM_STATE, )# shuffle=False)

# Original Elo

In [5]:
class OriginalELo(object):
    def __init__(self, k_factor, elo_width, starting_elo):
        self.k_factor = k_factor
        self.elo_width = elo_width
        self.starting_elo = starting_elo
        self.items = set()
        self.items_elo = dict()
        
    def initialize_items(self, items):
        """Initialize the items available to `items`."""
        self.items = set(items)
    
    def initialize_elos(self, ):
        """Set the initial starting elo for all available items."""
        for item in self.items:
            self.items_elo[item] = self.starting_elo
    
    def expected_result(self, elo_a, elo_b):
        """Expected probability of item with elo_a winning vs. item with elo_b."""
        
        expect_a = 1.0/(1+10**((elo_b - elo_a)/self.elo_width))
        return expect_a
    
    def update_elo(self, winner_elo, loser_elo, tie=False):
        """Update elo for the winning item and losing item."""
        
        R = 1
        if tie:
            R = .5
        
        expected_win = self.expected_result(winner_elo, loser_elo)  
        change_in_elo = self.k_factor * (R-expected_win)
        
        winner_elo += change_in_elo
        loser_elo -= change_in_elo
        return winner_elo, loser_elo
    
    def add_comparison(self, w_item, l_item, tie=False):
        """Process comparison between winning item and losing item."""
        current_winner_elo = self.items_elo[w_item]
        current_loser_elo = self.items_elo[l_item]
        
        updated_winner_elo, updated_loser_elo = self.update_elo(current_winner_elo, current_loser_elo, tie=tie)
        
        self.items_elo[w_item] = updated_winner_elo
        self.items_elo[l_item] = updated_loser_elo
        

## Initialize Elo

In [6]:
starting_elo = 1500
elo_width = 400
k_factor = 32

elo = OriginalELo(k_factor=k_factor, 
                  elo_width=elo_width, 
                  starting_elo=starting_elo)

### Initialize Elo items

In [7]:
elo.initialize_items(list(comparisons.Winner.values) + list(comparisons.Loser.values))

### Initialize items ratings

In [8]:
elo.initialize_elos()

## Training

### Compute ratings

In [9]:
for i, row in X_train.iterrows():
    w_item = row.Winner
    l_item = row.Loser 
    tie = True if row.Tie else False
    
    elo.add_comparison(w_item, l_item, tie=tie)    

### Show ratings

In [10]:
scores = []

for item, item_elo in elo.items_elo.items():
    scores.append({
        'score': item_elo,
        'image': item,
        'image_path': os.path.join('images','berlin', item + '.jpg' )
    })
scores_df = pd.DataFrame(scores).set_index('image', drop=False)

## Metrics

In [11]:
def compute_probabilities(elo_a, elo_b, allow_ties=False):
    """
    Expected probabilities of winning, drawing, or losing.
    Reference for draws formula: `Mathematical Model of Ranking Accuracy and Popularity Promotion`
    https://www.researchgate.net/publication/309662241_Mathematical_Model_of_Ranking_Accuracy_and_Popularity_Promotion
    """
      
    p_win = 1. / (1+10**((-elo_a + elo_b)/elo_width))
    p_los = 1. / (1+10**((elo_a - elo_b)/elo_width))
    
    if allow_ties:
        p_tie = (1 / (np.sqrt(2 * np.pi) * np.e)) * np.exp(-1 * (( (elo_a-elo_b)/(elo_width/2) )**2) / (2*np.e**2))
        p_win = p_win - 0.5 * p_tie  
        p_los = p_los - 0.5 * p_tie
        
        return  p_win, p_los, p_tie

    return p_win, p_los

In [12]:
def compute_logloss(df):
    log_loss = []
    for i, row in df.iterrows():
        # p_win, p_los, p_tie = compute_probabilities(elo.items_elo[row.Winner], elo.items_elo[row.Loser])
        p_win, p_los = compute_probabilities(elo.items_elo[row.Winner], elo.items_elo[row.Loser])

        if row.score == -1 or row.score == 1:
            log_loss.append(np.log(p_win))
        else:
            log_loss.append(np.log(p_tie))
    
    return log_loss

In [13]:
def compute_accuracy(df):
    accuracy = []
    
    for i, row in df.iterrows():
        # p_win, p_los, p_tie = compute_probabilities(elo.items_elo[row.Winner], elo.items_elo[row.Loser])
        p_win, p_los = compute_probabilities(elo.items_elo[row.Winner], elo.items_elo[row.Loser])

        if row.score == -1 or row.score == 1:
            accuracy.append(int(p_win > p_los))
        else:
            accuracy.append(int(p_win > p_los))
    
    return accuracy

#### Compute metrics for training

In [14]:
log_loss_train = compute_logloss(X_train[X_train.score != 0])
accuracy_train = compute_accuracy(X_train[X_train.score != 0])

## Testing

#### Compute metrics for testing

In [15]:
log_loss_test = compute_logloss(X_test[X_test.score != 0])
accuracy_test = compute_accuracy(X_test[X_test.score != 0])

## Aggregate results

In [16]:
results = {
    'model': 'elo',
    'train_logloss': -1 * np.mean(log_loss_train),
    'test_logloss': -1 * np.mean(log_loss_test),
    'train_accuracy': np.mean(accuracy_train),
    'test_accuracy': np.mean(accuracy_test),
    'seed': _RANDOM_STATE
}
pickle.dump(results,  open('output/{}_modelresults_SEED{}.p'.format(results['model'], _RANDOM_STATE), 'wb'))
print(json.dumps(results, indent=4))

{
    "model": "elo",
    "train_logloss": 0.5802742696389508,
    "test_logloss": 0.6584216675828181,
    "train_accuracy": 0.9046766169154229,
    "test_accuracy": 0.6561443066516347,
    "seed": 1
}


In [17]:
pickle.dump(scores_df,  open('output/{}_scores_SEED{}.p'.format(results['model'], _RANDOM_STATE), 'wb'))