In [1]:
import itertools
import json
import math
import numpy as np
import os
import pickle
import pandas as pd
from sklearn.model_selection import train_test_split
import trueskill as ts

In [2]:
_RANDOM_STATE = 1

# Data

In [3]:
comparisons = pickle.load(open('data/comparisons_berlin.p', 'rb'))
print(comparisons.shape)

(7281, 12)


Split data into training and testing

In [4]:
X_train, X_test, = train_test_split(comparisons, test_size=0.15, random_state=_RANDOM_STATE, )# shuffle=False)
print('Train:', X_train.shape)
print('Test:', X_test.shape)

Train: (6188, 12)
Test: (1093, 12)


In [5]:
unique_images = pd.unique(comparisons[['image_l', 'image_r']].values.ravel('K'))

# TrueSkill

## Initialize TrueSkill scores

In [6]:
scores = {}

for image in unique_images:
    scores[image] = ts.Rating()

## Compute scores based on comparisons

In [7]:
for i, row in X_train.iterrows():
    # Define the players in this round
    player1 = scores[row['image_l']]
    player2 = scores[row['image_r']]
    
    # Process match
    if row['score'] == -1:
        score = [0, 1]
    elif row['score'] == 0:
        score = [0, 0]
    elif row['score'] == 1:
        score = [1, 0]
    
    [player1], [player2] = ts.rate([[player1], [player2]], ranks=score)

    # Update scores
    scores[row['image_l']] = player1
    scores[row['image_r']] = player2

## Organize data scores

In [8]:
scores_df = pd.DataFrame(scores).T
scores_df.columns = ['score', 'sigma']

Get image paths

In [9]:
scores_df['image_path'] = scores_df.index
scores_df['image'] = scores_df.index

In [10]:
scores_df['image_path'] = scores_df['image_path'].apply(lambda x: os.path.join('images','berlin', x + '.jpg' ))

# Metrics

In [11]:
def compute_probabilities(team1, team2):
    BETA = ts.BETA
    delta_mu = sum(r.mu for r in team1) - sum(r.mu for r in team2)
    sum_sigma = sum(r.sigma ** 2 for r in itertools.chain(team1, team2))
    size = len(team1) + len(team2)
    denom = math.sqrt(size * (BETA * BETA) + sum_sigma)
    ts_ = ts.global_env()
    return ts_.cdf(delta_mu / denom), 1 - ts_.cdf(delta_mu / denom)

In [12]:
def compute_logloss(df):
    log_loss = []
    for i, row in df.iterrows():
        p_win, p_los = compute_probabilities([scores[row.Winner]], [scores[row.Loser]])

        if row.score == -1 or row.score == 1:
            log_loss.append(np.log(p_win))
        #else:
        #    log_loss_train.append(np.log(p_tie))
    
    return log_loss

In [13]:
def compute_accuracy(df):
    accuracy = []
    
    for i, row in df.iterrows():
        p_win, p_los = compute_probabilities([scores[row.Winner]], [scores[row.Loser]])
    
        if row.score == -1 or row.score == 1:
            accuracy.append(int(p_win > p_los))
    
    return accuracy

#### Compute metrics for training

In [14]:
log_loss_train = compute_logloss(X_train[X_train.score != 0])
accuracy_train = compute_accuracy(X_train[X_train.score != 0])

#### Compute metrics for testing

In [15]:
log_loss_test = compute_logloss(X_test[X_test.score != 0])
accuracy_test = compute_accuracy(X_test[X_test.score != 0])

## Aggregate results

In [16]:
results = {
    'model': 'trueskill',
    'train_logloss': -1 * np.mean(log_loss_train),
    'test_logloss': -1 * np.mean(log_loss_test),
    'train_accuracy': np.mean(accuracy_train),
    'test_accuracy': np.mean(accuracy_test),
    'seed': _RANDOM_STATE
}
pickle.dump(results,  open('output/{}_modelresults_SEED{}.p'.format(results['model'], _RANDOM_STATE), 'wb'))
print(json.dumps(results, indent=4))

{
    "model": "trueskill",
    "train_logloss": 0.3040553592309069,
    "test_logloss": 0.6266570490429713,
    "train_accuracy": 0.9383084577114428,
    "test_accuracy": 0.6538895152198422,
    "seed": 1
}


In [17]:
pickle.dump(scores_df,  open('output/{}_scores_SEED{}.p'.format(results['model'], _RANDOM_STATE), 'wb'))