# BoltzRank #
## Luca Negrini - Mat. 956516 ##
### From "BoltzRank: Learning to Maximize Expected Ranking Gain" ###
#### Maxims M. Volkovs, Richard S. Zemel ####

------
#### Initialization ####

In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
#%matplotlib notebook

%load_ext cython

# install lightgbm (required only on first run)
# import sys
# !{sys.executable} -m pip install lightgbm

import os
import os.path
import numpy as np
import lightgbm
import matplotlib.pyplot as plt

# see http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_svmlight_file.html
from sklearn.datasets import load_svmlight_file 

# datasets available at: 
# https://www.microsoft.com/en-us/research/project/mslr/
DATASET_FOLDER = "C:/opt/kiis-training/MSLR-WEB10K/Fold1/"
PERM_FOLDER = DATASET_FOLDER + "perms/"
METRIC_NAME = 'Custom-MSE'

-----
#### Data loading ####

In [2]:
def ensureFile(path):
    if not os.path.exists(path) or not os.path.isfile(path):
        raise FileNotFoundError("'" + path + "': no such file")        
    return path

def retrieveFileNames():
    folder = DATASET_FOLDER + '/' if DATASET_FOLDER[-1:] != '/' else DATASET_FOLDER
    train_file = ensureFile(folder + "train.txt")
    valid_file = ensureFile(folder + "vali.txt")
    test_file = ensureFile(folder + "test.txt")
    return train_file, valid_file, test_file

def loadDataset(path):
    return load_svmlight_file(path, query_id=True)

def loadLightGBM(svmlight_dataset):
    query_lens = [sum(1 for _ in group) for key, group in itertools.groupby(svmlight_dataset[2])]
    return lightgbm.Dataset(data=svmlight_dataset[0], label=svmlight_dataset[1], group=query_lens)

In [50]:
class Query:
    def __init__(self, qid):
        self.qid = qid
        self.labels_to_docs = {}
        self.perms = np.empty((1,1))
        self.probs = np.empty((1,1))
    def addlabel(self, label):
        if not label in self.labels_to_docs:
            self.labels_to_docs[label] = list()
    def adddoc(self, label, doc):
        self.labels_to_docs[label].append(doc)
    def finalize(self):
        self.labels = np.zeros(len(self.labels_to_docs.keys()), dtype=int)
        self.docs = np.empty(len(self.labels_to_docs.keys()), dtype=object)
        i = 0
        totaldocs = 0
        sorteddict = sorted(self.labels_to_docs.items(), reverse = True)
        for label, docs in sorteddict:
            self.labels[i] = label
            self.docs[i] = np.zeros(len(docs), dtype=int)
            for j in range(len(docs)):
                self.docs[i][j] = docs[j]
            i += 1
            totaldocs += len(docs)
        self.alldocs = np.concatenate(self.docs)
        self.flatlabels = np.zeros(totaldocs, dtype=np.double)
        i = 0
        for label, docs in sorteddict:
            for j in range(len(docs)):
                self.flatlabels[i] = label
                i += 1
        del self.labels_to_docs
    def setperms(self, perms):
        self.perms = perms
    def setprobs(self, probs):
        self.probs = probs
    def __repr__(self):  
        return str(self)
    def __str__(self):
        res = "Query " + str(self.qid) + "["
        for i in range(len(self.labels)):
            res += "\n" + str(self.labels[i]) + " -> " + str(self.docs[i])
        res += "]"
        for i in range(len(self.perms)):
            res += "\n[" + str(self.perms[i]) + "] -> " + str(self.probs[i])
        return res

In [54]:
# parameters:
#  dataset: svmlight_dataset 
#      the datset to process
# returned values:
#  query_to_labels_to_documents: dict(int -> dict(float -> list(int)))
#      a map containing, for each query in the dataset, the documents (row index in the dataset) provided 
#      in the input dataset grouped by label
#  doc_to_query: dict(document -> query)
#      a mapping between document (row index in the dataset) and the relative query
def mapQueryToDocuments(dataset):
    queries = {}
    doc_to_query = {} 
    alllabels = np.negative(np.ones(len(dataset[2]), dtype=np.double))
    for i in range(0, len(dataset[2])):
        if not dataset[2][i] in queries:
            queries[dataset[2][i]] = Query(dataset[2][i])
        query = queries[dataset[2][i]]
        query.addlabel(dataset[1][i])
        query.adddoc(dataset[1][i], i)
        doc_to_query[i] = query.qid
        alllabels[i] = dataset[1][i]
        
    for q in queries.values():
        q.finalize()
    
    return queries, alllabels, doc_to_query

-----
#### BoltzRank logic in Cython ####

In [87]:
%%cython

from cython.parallel import prange
from cython import boundscheck, wraparound

# The sign-preserving function introduced in formula (4) and defined in formula (5). The implementation follows 
# the one proposed near the end of Section 4.2.
#
# parameters:
#  x: double 
#      the difference in rankings between the two current documents
#  m: int
#      the number of documents available for the current query
# returned values:
#  val: double
#      the result of the function
@boundscheck(False)
@wraparound(False)
cdef double g_q(double x, int m) nogil:
    return (2 * x) / (m - 1)

# The energy function E(R|S) defined in formula (4). This represents the incompatibility between the 
# given ranking and the given score. More energy means less compatibility
#
# parameters:
#  rank: int[:]
#      the array of document IDs (row index in the dataset) representing the ranking
#  scores: double[:]
#      the array of document scores, score of document i will be retrieved from scores[i]
# returned values:
#  val: double
#      the energy of the ranking w.r.t. the target scores. The lower, the better.
@boundscheck(False)
@wraparound(False)
cdef double energy(int[:] rank, double[:] scores) nogil:
    cdef int m = len(rank)
    if m == 1 or m == 0:
        return 0
    cdef double factor = 2 / (m * (m - 1))
    cdef double res = 0
    cdef int j
    cdef int k
    for k in prange(m, schedule='static', num_threads=8):
        for j in range(k + 1, m):
            res += g_q(j - k, m) * (scores[rank[j]] - scores[rank[k]])
    return factor * res

from libc.math cimport exp

# The approximate rank probability P[Rq](R|S) defined in formula (12). This approximates the probability of a ranking
# given a scoring. Since the effective computation is intractable due to the exponentail number of possible rankings, 
# this is computed through a montecarlo estimate over sample ranking sets
#
# parameters:
#  sampleSet: int[:,:]
#      the array containing all sample rankings, expresed as arrays of document IDs (row index in the dataset)
#  rank: int[:]
#      the ranking whose probability is to be computed, expressed as array of document IDs (row index in the dataset)
#  scores: double[:]
#      the scores to be used for the energy computation, score of document i will be retrieved from scores[i]
# returned values:
#  val: double
#      the probability of the given ranking
@boundscheck(False)
@wraparound(False)
cpdef double approx_rank_probability(int[:,:] sampleSet, int[:] rank, double[:] scores) nogil:
    cdef double prob = exp(-energy(rank, scores))
    cdef double norm = 0
    cdef int r
    for r in prange(len(sampleSet), schedule='static', num_threads=8):
        norm += exp(-energy(sampleSet[r], scores))
    return prob / norm

from math import factorial
import numpy as np

#int[:,:] source, int i, int j, int count, int[:,:] perms
#return: number of not computed permutations
@boundscheck(False)
@wraparound(False)
def perform_permutation(query, int i, int j, int count, int[:,:] perms, int start):
    if not i in query.labels or not j in query.labels:
        # no swapping possible
        return count, start
    # find the indexes of the desired labels
    i = [k for k in range(len(query.labels)) if query.labels[k] == i][0]
    j = [k for k in range(len(query.labels)) if query.labels[k] == j][0]
    # find maximum amount of permutations that can be done
    cdef int _min = min(len(query.docs[i]), len(query.docs[j]))
    cdef int amount = min(count, _min)
    # randomly selects the indexes
    cdef int[:] first = np.random.choice(len(query.docs[i]), amount)
    cdef int[:] second = np.random.choice(len(query.docs[j]), amount)
    # perform the single-element permutations
    cdef int k
    cdef perm
    cdef int[:] p
    for k in range(amount):
        perm = query.docs.copy()
        perm[i][first[k]], perm[j][second[k]] = query.docs[j][second[k]], query.docs[i][first[k]]
        p = np.concatenate(perm)
        perms[start + k] = p
    return count - amount, start + amount

import random

@boundscheck(False)
@wraparound(False)
cdef bint contained(int[:,:] container, int[:] array) nogil:
    cdef bint match
    cdef int i
    cdef int j
    for i in prange(len(container), schedule='static', num_threads=8):
        if container[i][0] == -1 or len(container[i]) != len(array):
            continue
        else:
            match = True
            for j in range(len(container[i])):
                if container[i][j] != array[j]:
                    match = False
                    break
            if match:
                return True
    return False

cdef void setrow(int[:,:] container, int pos, int[:] array) nogil:
    cdef int i
    for i in prange(len(container[pos]), schedule='static', num_threads=8):
        container[pos][i] = array[i]
                

#source: label -> docid*, i: int, j: int, count: int, perms_with_prob: tuple<int> -> float
#return: number of not computed permutations
@boundscheck(False)
@wraparound(False)
def perform_permutation2(query, int i, int j, int count, int[:,:] perms, int start):
    if not i in query.labels or not j in query.labels:
        # no swapping possible
        return count, start
    # find the indexes of the desired labels
    i = [k for k in range(len(query.labels)) if query.labels[k] == i][0]
    j = [k for k in range(len(query.labels)) if query.labels[k] == j][0]
    cdef int c = 0
    cdef int _min = min(len(query.docs[i]), len(query.docs[j]))
    cdef int amount = max(1, int(_min * .5))
    limit = factorial(_min) / (factorial(amount) * factorial(_min - amount))
    cdef int k
    cdef int d
    for k in range(count):
        perm = query.docs.copy()
        first = random.sample(range(len(query.docs[i])), k=amount)
        second = random.sample(range(len(query.docs[j])), k=amount)
        for d in range(len(first)):
            perm[i][first[d]], perm[j][second[d]] = query.docs[j][second[d]], query.docs[i][first[d]]
        p = np.concatenate(perm)
        if not contained(perms, p):
            setrow(perms, start + c, p)
            c += 1
            if c == limit:
                return count - c, start + c
        else:
            k -= 1
    return 0, start + c
    
import itertools

@boundscheck(False)
@wraparound(False)
cdef int[:,:] allPerms(int[:] source, long long fact):
    cdef int i = 0
    cdef int k
    perm = itertools.permutations(source)
    cdef int[:,:] result = np.zeros((fact, len(source)), dtype=int)
    for p in perm:
        for k in range(len(p)):
            result[i][k] = p[k]
        i += 1
    return result

RANK_SAMPLE_SET_MAX_QUERY_PERMUTATIONS = 100
RANK_SAMPLE_SET_DISTRIBUTIONS = [
                                int(.20 * RANK_SAMPLE_SET_MAX_QUERY_PERMUTATIONS), # 4->0
                                int(.18 * RANK_SAMPLE_SET_MAX_QUERY_PERMUTATIONS), # 4->1
                                int(.14 * RANK_SAMPLE_SET_MAX_QUERY_PERMUTATIONS), # 4->2
                                int(.08 * RANK_SAMPLE_SET_MAX_QUERY_PERMUTATIONS), # 4->3
                                int(.14 * RANK_SAMPLE_SET_MAX_QUERY_PERMUTATIONS), # 3->0
                                int(.12 * RANK_SAMPLE_SET_MAX_QUERY_PERMUTATIONS), # 3->1
                                int(.06 * RANK_SAMPLE_SET_MAX_QUERY_PERMUTATIONS), # 3->2
                                int(.04 * RANK_SAMPLE_SET_MAX_QUERY_PERMUTATIONS), # 2->0
                                int(.02 * RANK_SAMPLE_SET_MAX_QUERY_PERMUTATIONS), # 2->1
                                int(.02 * RANK_SAMPLE_SET_MAX_QUERY_PERMUTATIONS)  # 1->0
                                ]

#cdef int[:,:] process_query(int query, double[:] labels, int[:,:] docs, double[:] probs) nogil:
@boundscheck(False)
@wraparound(False)
def process_query(query, alllables):
    cdef int carry = 0
    fact = factorial(len(query.alldocs))
    s = " (" + str(RANK_SAMPLE_SET_MAX_QUERY_PERMUTATIONS) + " permutations computed)"
    cdef perms
    cdef int last = 0
    if fact <= RANK_SAMPLE_SET_MAX_QUERY_PERMUTATIONS:
        # evaluate all possible permutations, each one representing a different ranking
        perms = allPerms(query.alldocs, fact)
        s = " (all " + str(fact) + " permutations computed)"
    else:
        perms = np.negative(np.ones((RANK_SAMPLE_SET_MAX_QUERY_PERMUTATIONS, len(query.alldocs)), dtype=int))
        # switch the labels of the documents, then sort the documents by label to obtain a ranking
        carry, last = perform_permutation2(query, 4, 0, RANK_SAMPLE_SET_DISTRIBUTIONS[0], perms, last)
        carry, last = perform_permutation2(query, 4, 1, RANK_SAMPLE_SET_DISTRIBUTIONS[1] + carry, perms, last)
        carry, last = perform_permutation2(query, 4, 2, RANK_SAMPLE_SET_DISTRIBUTIONS[2] + carry, perms, last)
        carry, last = perform_permutation2(query, 4, 3, RANK_SAMPLE_SET_DISTRIBUTIONS[3] + carry, perms, last)
        carry, last = perform_permutation2(query, 3, 0, RANK_SAMPLE_SET_DISTRIBUTIONS[4] + carry, perms, last)
        carry, last = perform_permutation2(query, 3, 1, RANK_SAMPLE_SET_DISTRIBUTIONS[5] + carry, perms, last)
        carry, last = perform_permutation2(query, 3, 2, RANK_SAMPLE_SET_DISTRIBUTIONS[6] + carry, perms, last)
        carry, last = perform_permutation2(query, 2, 0, RANK_SAMPLE_SET_DISTRIBUTIONS[7] + carry, perms, last)
        carry, last = perform_permutation2(query, 2, 1, RANK_SAMPLE_SET_DISTRIBUTIONS[8] + carry, perms, last)
        carry, last = perform_permutation2(query, 1, 0, RANK_SAMPLE_SET_DISTRIBUTIONS[9] + carry, perms, last)
        if carry != 0:
            if not query.alldocs in perms:
                perms[last] = query.alldocs
                s = " (missing " + str(carry - 1) + " permutations - considering also dataset ranking)"
            else:
                s = " (missing " + str(carry) + " permutations) "
        perms = perms[perms.max(axis=1)>=0]
    cdef double[:] probs = np.zeros(len(perms))
    cdef int i
    for i in range(len(perms)):
        probs[i] = approx_rank_probability(perms, perms[i], alllables)
    query.setperms(perms)
    query.setprobs(probs)
    print("query " + str(query.qid) + " done " + s)
    return query

from libc.math cimport log

@boundscheck(False)
@wraparound(False)
cpdef double cross_entropy(int[:,:] perms, double[:] probs, double[:] scores) nogil:
    cdef double result = 0
    cdef int i
    for i in prange(len(perms), schedule='static', num_threads=8):
        result += probs[i] * log(approx_rank_probability(perms, perms[i], scores))
    return -result



In [6]:
train_file, valid_file, test_file = retrieveFileNames()

print("training file: " + train_file)
print("validation file: " + valid_file)
print("test file: " + test_file)
    
print("loading datasets... ")
import time
start = time.process_time()
train_dataset = loadDataset(train_file)
print("train dataset loading took " + str(time.process_time() - start) + " s")
start = time.process_time()
valid_dataset = loadDataset(valid_file)
print("validation dataset loading took " + str(time.process_time() - start) + " s")
start = time.process_time()
test_dataset = loadDataset(test_file)
print("test dataset loading took " + str(time.process_time() - start) + " s")

import itertools
print("converting datasets to LightGBM format... ")
train_lgb = loadLightGBM(train_dataset)
valid_lgb = loadLightGBM(valid_dataset)
test_lgb = loadLightGBM(test_dataset)

print("done")

training file: C:/opt/kiis-training/MSLR-WEB10K/Fold1/train.txt
validation file: C:/opt/kiis-training/MSLR-WEB10K/Fold1/vali.txt
test file: C:/opt/kiis-training/MSLR-WEB10K/Fold1/test.txt
loading datasets... 
train dataset loading took 67.09375 s
validation dataset loading took 22.375 s
test dataset loading took 21.953125 s
converting datasets to LightGBM format... 
done


In [55]:
print("creating query-documents mappings...")
queries, alllables, doc_to_query = mapQueryToDocuments(train_dataset)
print("done")

import multiprocessing
from joblib import Parallel, delayed, parallel_backend
print("creating sample sets...")
start = time.process_time()

#num_cores = multiprocessing.cpu_count()
#with parallel_backend('threading', n_jobs=num_cores):
#    result_list = Parallel()(delayed(process_query)(query) for query in queries.values())
#result_list = Parallel(n_jobs=num_cores)(delayed(process_query)(query) for query in queries.values())
#for q in result_list:
    #queries[q.qid] = q
for q in queries.values():
    queries[q.qid] = process_query(q, alllables)
print("sample set creation took " + str(time.process_time() - start) + " s")
print("done")

creating query-documents mappings...
done
creating sample sets...
query 1 done  (100 permutations computed)
query 16 done  (100 permutations computed)
query 31 done  (100 permutations computed)
query 46 done  (100 permutations computed)
query 61 done  (100 permutations computed)
query 76 done  (100 permutations computed)
query 91 done  (100 permutations computed)
query 106 done  (missing 99 permutations - considering also dataset ranking)
query 121 done  (100 permutations computed)
query 136 done  (100 permutations computed)
query 151 done  (100 permutations computed)
query 166 done  (100 permutations computed)
query 181 done  (100 permutations computed)
query 196 done  (100 permutations computed)
query 211 done  (missing 20 permutations) 
query 226 done  (100 permutations computed)
query 241 done  (100 permutations computed)
query 256 done  (100 permutations computed)
query 271 done  (100 permutations computed)
query 286 done  (missing 99 permutations - considering also dataset rankin

query 2716 done  (100 permutations computed)
query 2731 done  (100 permutations computed)
query 2746 done  (100 permutations computed)
query 2761 done  (100 permutations computed)
query 2776 done  (100 permutations computed)
query 2791 done  (100 permutations computed)
query 2806 done  (100 permutations computed)
query 2821 done  (100 permutations computed)
query 2836 done  (100 permutations computed)
query 2851 done  (100 permutations computed)
query 2866 done  (missing 93 permutations) 
query 2881 done  (100 permutations computed)
query 2896 done  (100 permutations computed)
query 2911 done  (100 permutations computed)
query 2926 done  (100 permutations computed)
query 2941 done  (100 permutations computed)
query 2956 done  (100 permutations computed)
query 2971 done  (100 permutations computed)
query 2986 done  (100 permutations computed)
query 3001 done  (100 permutations computed)
query 3016 done  (100 permutations computed)
query 3031 done  (100 permutations computed)
query 3046 

query 5446 done  (100 permutations computed)
query 5461 done  (100 permutations computed)
query 5476 done  (missing 75 permutations) 
query 5491 done  (100 permutations computed)
query 5506 done  (100 permutations computed)
query 5521 done  (100 permutations computed)
query 5536 done  (100 permutations computed)
query 5551 done  (100 permutations computed)
query 5566 done  (100 permutations computed)
query 5581 done  (100 permutations computed)
query 5596 done  (missing 76 permutations) 
query 5611 done  (missing 91 permutations) 
query 5626 done  (100 permutations computed)
query 5641 done  (100 permutations computed)
query 5656 done  (missing 10 permutations) 
query 5671 done  (missing 99 permutations) 
query 5686 done  (100 permutations computed)
query 5701 done  (100 permutations computed)
query 5716 done  (100 permutations computed)
query 5731 done  (100 permutations computed)
query 5746 done  (100 permutations computed)
query 5761 done  (100 permutations computed)
query 5776 done

query 8191 done  (100 permutations computed)
query 8206 done  (100 permutations computed)
query 8221 done  (100 permutations computed)
query 8236 done  (100 permutations computed)
query 8251 done  (100 permutations computed)
query 8266 done  (100 permutations computed)
query 8281 done  (100 permutations computed)
query 8296 done  (100 permutations computed)
query 8311 done  (100 permutations computed)
query 8326 done  (missing 63 permutations) 
query 8341 done  (100 permutations computed)
query 8356 done  (100 permutations computed)
query 8371 done  (100 permutations computed)
query 8386 done  (100 permutations computed)
query 8401 done  (100 permutations computed)
query 8416 done  (100 permutations computed)
query 8431 done  (100 permutations computed)
query 8446 done  (100 permutations computed)
query 8461 done  (100 permutations computed)
query 8476 done  (100 permutations computed)
query 8491 done  (100 permutations computed)
query 8506 done  (100 permutations computed)
query 8521 

query 10891 done  (100 permutations computed)
query 10906 done  (100 permutations computed)
query 10921 done  (100 permutations computed)
query 10936 done  (100 permutations computed)
query 10951 done  (all 2 permutations computed)
query 10966 done  (100 permutations computed)
query 10981 done  (100 permutations computed)
query 10996 done  (100 permutations computed)
query 11011 done  (100 permutations computed)
query 11026 done  (100 permutations computed)
query 11041 done  (100 permutations computed)
query 11056 done  (100 permutations computed)
query 11071 done  (100 permutations computed)
query 11086 done  (100 permutations computed)
query 11101 done  (100 permutations computed)
query 11116 done  (100 permutations computed)
query 11131 done  (100 permutations computed)
query 11146 done  (100 permutations computed)
query 11161 done  (100 permutations computed)
query 11176 done  (100 permutations computed)
query 11191 done  (missing 60 permutations) 
query 11206 done  (100 permutatio

query 13561 done  (100 permutations computed)
query 13576 done  (missing 92 permutations) 
query 13591 done  (100 permutations computed)
query 13606 done  (all 24 permutations computed)
query 13621 done  (100 permutations computed)
query 13636 done  (100 permutations computed)
query 13651 done  (100 permutations computed)
query 13666 done  (100 permutations computed)
query 13681 done  (100 permutations computed)
query 13696 done  (100 permutations computed)
query 13711 done  (missing 99 permutations - considering also dataset ranking)
query 13726 done  (all 2 permutations computed)
query 13741 done  (missing 80 permutations) 
query 13756 done  (100 permutations computed)
query 13771 done  (100 permutations computed)
query 13786 done  (100 permutations computed)
query 13801 done  (100 permutations computed)
query 13816 done  (missing 99 permutations - considering also dataset ranking)
query 13831 done  (100 permutations computed)
query 13846 done  (100 permutations computed)
query 13861

query 16186 done  (100 permutations computed)
query 16201 done  (100 permutations computed)
query 16216 done  (100 permutations computed)
query 16231 done  (100 permutations computed)
query 16246 done  (all 6 permutations computed)
query 16261 done  (100 permutations computed)
query 16276 done  (100 permutations computed)
query 16291 done  (100 permutations computed)
query 16306 done  (100 permutations computed)
query 16321 done  (100 permutations computed)
query 16336 done  (100 permutations computed)
query 16351 done  (100 permutations computed)
query 16366 done  (100 permutations computed)
query 16381 done  (100 permutations computed)
query 16396 done  (100 permutations computed)
query 16411 done  (missing 99 permutations) 
query 16426 done  (missing 97 permutations) 
query 16441 done  (100 permutations computed)
query 16456 done  (missing 99 permutations - considering also dataset ranking)
query 16471 done  (100 permutations computed)
query 16486 done  (100 permutations computed)
q

query 18826 done  (100 permutations computed)
query 18841 done  (100 permutations computed)
query 18856 done  (100 permutations computed)
query 18871 done  (100 permutations computed)
query 18886 done  (100 permutations computed)
query 18901 done  (100 permutations computed)
query 18916 done  (100 permutations computed)
query 18931 done  (missing 39 permutations) 
query 18946 done  (100 permutations computed)
query 18961 done  (100 permutations computed)
query 18976 done  (100 permutations computed)
query 18991 done  (100 permutations computed)
query 19006 done  (100 permutations computed)
query 19021 done  (100 permutations computed)
query 19036 done  (missing 98 permutations) 
query 19051 done  (100 permutations computed)
query 19066 done  (100 permutations computed)
query 19081 done  (100 permutations computed)
query 19096 done  (100 permutations computed)
query 19111 done  (100 permutations computed)
query 19126 done  (100 permutations computed)
query 19141 done  (missing 91 permut

query 21466 done  (100 permutations computed)
query 21481 done  (missing 99 permutations - considering also dataset ranking)
query 21496 done  (100 permutations computed)
query 21511 done  (100 permutations computed)
query 21526 done  (100 permutations computed)
query 21541 done  (100 permutations computed)
query 21556 done  (100 permutations computed)
query 21571 done  (100 permutations computed)
query 21586 done  (100 permutations computed)
query 21601 done  (100 permutations computed)
query 21616 done  (100 permutations computed)
query 21631 done  (100 permutations computed)
query 21646 done  (100 permutations computed)
query 21661 done  (100 permutations computed)
query 21676 done  (missing 92 permutations) 
query 21691 done  (all 2 permutations computed)
query 21706 done  (100 permutations computed)
query 21721 done  (100 permutations computed)
query 21736 done  (missing 88 permutations) 
query 21751 done  (100 permutations computed)
query 21766 done  (100 permutations computed)
q

query 24046 done  (100 permutations computed)
query 24061 done  (100 permutations computed)
query 24076 done  (100 permutations computed)
query 24091 done  (100 permutations computed)
query 24106 done  (100 permutations computed)
query 24121 done  (100 permutations computed)
query 24136 done  (100 permutations computed)
query 24151 done  (missing 95 permutations) 
query 24166 done  (missing 99 permutations - considering also dataset ranking)
query 24181 done  (100 permutations computed)
query 24196 done  (100 permutations computed)
query 24211 done  (100 permutations computed)
query 24226 done  (100 permutations computed)
query 24241 done  (100 permutations computed)
query 24256 done  (100 permutations computed)
query 24271 done  (100 permutations computed)
query 24286 done  (100 permutations computed)
query 24301 done  (100 permutations computed)
query 24316 done  (100 permutations computed)
query 24331 done  (100 permutations computed)
query 24346 done  (100 permutations computed)
qu

query 26686 done  (100 permutations computed)
query 26701 done  (100 permutations computed)
query 26716 done  (missing 91 permutations) 
query 26731 done  (missing 94 permutations) 
query 26746 done  (100 permutations computed)
query 26761 done  (100 permutations computed)
query 26776 done  (100 permutations computed)
query 26791 done  (100 permutations computed)
query 26806 done  (100 permutations computed)
query 26821 done  (100 permutations computed)
query 26836 done  (100 permutations computed)
query 26851 done  (100 permutations computed)
query 26866 done  (100 permutations computed)
query 26881 done  (100 permutations computed)
query 26896 done  (100 permutations computed)
query 26911 done  (100 permutations computed)
query 26926 done  (100 permutations computed)
query 26941 done  (100 permutations computed)
query 26956 done  (100 permutations computed)
query 26971 done  (100 permutations computed)
query 26986 done  (100 permutations computed)
query 27001 done  (100 permutations 

query 29326 done  (100 permutations computed)
query 29341 done  (100 permutations computed)
query 29356 done  (100 permutations computed)
query 29371 done  (100 permutations computed)
query 29386 done  (100 permutations computed)
query 29401 done  (100 permutations computed)
query 29416 done  (100 permutations computed)
query 29431 done  (missing 88 permutations) 
query 29446 done  (100 permutations computed)
query 29461 done  (100 permutations computed)
query 29476 done  (100 permutations computed)
query 29491 done  (100 permutations computed)
query 29506 done  (missing 80 permutations) 
query 29521 done  (100 permutations computed)
query 29536 done  (100 permutations computed)
query 29551 done  (100 permutations computed)
query 29566 done  (100 permutations computed)
query 29581 done  (100 permutations computed)
query 29596 done  (missing 74 permutations) 
query 29611 done  (100 permutations computed)
query 29626 done  (100 permutations computed)
query 29641 done  (100 permutations c

query 2044 done  (100 permutations computed)
query 2059 done  (missing 84 permutations) 
query 2074 done  (100 permutations computed)
query 2089 done  (100 permutations computed)
query 2104 done  (100 permutations computed)
query 2119 done  (100 permutations computed)
query 2134 done  (100 permutations computed)
query 2149 done  (100 permutations computed)
query 2164 done  (100 permutations computed)
query 2179 done  (missing 15 permutations) 
query 2194 done  (100 permutations computed)
query 2209 done  (100 permutations computed)
query 2224 done  (100 permutations computed)
query 2239 done  (100 permutations computed)
query 2254 done  (100 permutations computed)
query 2269 done  (100 permutations computed)
query 2284 done  (100 permutations computed)
query 2299 done  (100 permutations computed)
query 2314 done  (100 permutations computed)
query 2329 done  (100 permutations computed)
query 2344 done  (100 permutations computed)
query 2359 done  (100 permutations computed)
query 2374 d

query 4759 done  (100 permutations computed)
query 4774 done  (100 permutations computed)
query 4789 done  (100 permutations computed)
query 4804 done  (100 permutations computed)
query 4819 done  (100 permutations computed)
query 4834 done  (100 permutations computed)
query 4849 done  (100 permutations computed)
query 4864 done  (100 permutations computed)
query 4879 done  (100 permutations computed)
query 4894 done  (100 permutations computed)
query 4909 done  (100 permutations computed)
query 4924 done  (missing 65 permutations) 
query 4939 done  (missing 25 permutations) 
query 4954 done  (100 permutations computed)
query 4969 done  (100 permutations computed)
query 4984 done  (100 permutations computed)
query 4999 done  (100 permutations computed)
query 5014 done  (missing 3 permutations) 
query 5029 done  (missing 90 permutations) 
query 5044 done  (missing 90 permutations) 
query 5059 done  (100 permutations computed)
query 5074 done  (100 permutations computed)
query 5089 done 

query 7489 done  (100 permutations computed)
query 7504 done  (100 permutations computed)
query 7519 done  (100 permutations computed)
query 7534 done  (100 permutations computed)
query 7549 done  (100 permutations computed)
query 7564 done  (100 permutations computed)
query 7579 done  (100 permutations computed)
query 7594 done  (100 permutations computed)
query 7609 done  (100 permutations computed)
query 7624 done  (100 permutations computed)
query 7639 done  (missing 7 permutations) 
query 7654 done  (100 permutations computed)
query 7669 done  (100 permutations computed)
query 7684 done  (100 permutations computed)
query 7699 done  (100 permutations computed)
query 7714 done  (100 permutations computed)
query 7729 done  (100 permutations computed)
query 7744 done  (100 permutations computed)
query 7759 done  (100 permutations computed)
query 7774 done  (100 permutations computed)
query 7789 done  (100 permutations computed)
query 7804 done  (100 permutations computed)
query 7819 d

query 10219 done  (100 permutations computed)
query 10234 done  (missing 99 permutations) 
query 10249 done  (100 permutations computed)
query 10264 done  (100 permutations computed)
query 10279 done  (100 permutations computed)
query 10294 done  (100 permutations computed)
query 10309 done  (100 permutations computed)
query 10324 done  (100 permutations computed)
query 10339 done  (100 permutations computed)
query 10354 done  (100 permutations computed)
query 10369 done  (100 permutations computed)
query 10384 done  (missing 59 permutations) 
query 10399 done  (missing 98 permutations) 
query 10414 done  (100 permutations computed)
query 10429 done  (100 permutations computed)
query 10444 done  (100 permutations computed)
query 10459 done  (missing 99 permutations) 
query 10474 done  (100 permutations computed)
query 10489 done  (100 permutations computed)
query 10504 done  (100 permutations computed)
query 10519 done  (100 permutations computed)
query 10534 done  (100 permutations co

query 12889 done  (100 permutations computed)
query 12904 done  (100 permutations computed)
query 12919 done  (missing 90 permutations) 
query 12934 done  (100 permutations computed)
query 12949 done  (100 permutations computed)
query 12964 done  (100 permutations computed)
query 12979 done  (100 permutations computed)
query 12994 done  (100 permutations computed)
query 13009 done  (100 permutations computed)
query 13024 done  (100 permutations computed)
query 13039 done  (100 permutations computed)
query 13054 done  (100 permutations computed)
query 13069 done  (100 permutations computed)
query 13084 done  (100 permutations computed)
query 13099 done  (100 permutations computed)
query 13114 done  (100 permutations computed)
query 13129 done  (missing 99 permutations - considering also dataset ranking)
query 13144 done  (100 permutations computed)
query 13159 done  (100 permutations computed)
query 13174 done  (100 permutations computed)
query 13189 done  (100 permutations computed)
qu

query 15514 done  (100 permutations computed)
query 15529 done  (100 permutations computed)
query 15544 done  (missing 80 permutations) 
query 15559 done  (100 permutations computed)
query 15574 done  (100 permutations computed)
query 15589 done  (100 permutations computed)
query 15604 done  (100 permutations computed)
query 15619 done  (100 permutations computed)
query 15634 done  (missing 23 permutations) 
query 15649 done  (100 permutations computed)
query 15664 done  (100 permutations computed)
query 15679 done  (100 permutations computed)
query 15694 done  (missing 80 permutations) 
query 15709 done  (100 permutations computed)
query 15724 done  (100 permutations computed)
query 15739 done  (100 permutations computed)
query 15754 done  (100 permutations computed)
query 15769 done  (100 permutations computed)
query 15784 done  (missing 84 permutations) 
query 15799 done  (100 permutations computed)
query 15814 done  (100 permutations computed)
query 15829 done  (100 permutations co

query 18169 done  (100 permutations computed)
query 18184 done  (100 permutations computed)
query 18199 done  (missing 98 permutations) 
query 18214 done  (100 permutations computed)
query 18229 done  (100 permutations computed)
query 18244 done  (100 permutations computed)
query 18259 done  (100 permutations computed)
query 18274 done  (missing 98 permutations) 
query 18289 done  (100 permutations computed)
query 18304 done  (100 permutations computed)
query 18319 done  (100 permutations computed)
query 18334 done  (100 permutations computed)
query 18349 done  (100 permutations computed)
query 18364 done  (100 permutations computed)
query 18379 done  (100 permutations computed)
query 18394 done  (100 permutations computed)
query 18409 done  (100 permutations computed)
query 18424 done  (100 permutations computed)
query 18439 done  (100 permutations computed)
query 18454 done  (100 permutations computed)
query 18469 done  (100 permutations computed)
query 18484 done  (100 permutations 

query 20839 done  (100 permutations computed)
query 20854 done  (100 permutations computed)
query 20869 done  (100 permutations computed)
query 20884 done  (100 permutations computed)
query 20899 done  (missing 94 permutations) 
query 20914 done  (100 permutations computed)
query 20929 done  (missing 94 permutations) 
query 20944 done  (100 permutations computed)
query 20959 done  (100 permutations computed)
query 20974 done  (100 permutations computed)
query 20989 done  (missing 99 permutations - considering also dataset ranking)
query 21004 done  (missing 30 permutations) 
query 21019 done  (100 permutations computed)
query 21034 done  (100 permutations computed)
query 21049 done  (100 permutations computed)
query 21064 done  (missing 76 permutations) 
query 21079 done  (100 permutations computed)
query 21094 done  (100 permutations computed)
query 21109 done  (100 permutations computed)
query 21124 done  (100 permutations computed)
query 21139 done  (100 permutations computed)
query

query 23554 done  (100 permutations computed)
query 23569 done  (100 permutations computed)
query 23584 done  (100 permutations computed)
query 23599 done  (100 permutations computed)
query 23614 done  (100 permutations computed)
query 23629 done  (100 permutations computed)
query 23644 done  (100 permutations computed)
query 23659 done  (100 permutations computed)
query 23674 done  (100 permutations computed)
query 23689 done  (missing 93 permutations) 
query 23704 done  (100 permutations computed)
query 23719 done  (100 permutations computed)
query 23734 done  (100 permutations computed)
query 23749 done  (100 permutations computed)
query 23764 done  (missing 59 permutations) 
query 23779 done  (100 permutations computed)
query 23794 done  (100 permutations computed)
query 23809 done  (100 permutations computed)
query 23824 done  (100 permutations computed)
query 23839 done  (100 permutations computed)
query 23854 done  (100 permutations computed)
query 23869 done  (100 permutations 

query 26209 done  (100 permutations computed)
query 26224 done  (100 permutations computed)
query 26239 done  (100 permutations computed)
query 26254 done  (missing 88 permutations) 
query 26269 done  (100 permutations computed)
query 26284 done  (100 permutations computed)
query 26299 done  (missing 56 permutations) 
query 26314 done  (missing 76 permutations) 
query 26329 done  (100 permutations computed)
query 26344 done  (100 permutations computed)
query 26359 done  (100 permutations computed)
query 26374 done  (100 permutations computed)
query 26389 done  (missing 95 permutations) 
query 26404 done  (100 permutations computed)
query 26419 done  (100 permutations computed)
query 26434 done  (100 permutations computed)
query 26449 done  (100 permutations computed)
query 26464 done  (missing 99 permutations - considering also dataset ranking)
query 26479 done  (100 permutations computed)
query 26494 done  (100 permutations computed)
query 26509 done  (missing 98 permutations) 
query 

query 28909 done  (100 permutations computed)
query 28924 done  (all 1 permutations computed)
query 28939 done  (100 permutations computed)
query 28954 done  (100 permutations computed)
query 28969 done  (100 permutations computed)
query 28984 done  (100 permutations computed)
query 28999 done  (100 permutations computed)
query 29014 done  (100 permutations computed)
query 29029 done  (100 permutations computed)
query 29044 done  (100 permutations computed)
query 29059 done  (100 permutations computed)
query 29074 done  (100 permutations computed)
query 29089 done  (100 permutations computed)
query 29104 done  (100 permutations computed)
query 29119 done  (missing 99 permutations - considering also dataset ranking)
query 29134 done  (100 permutations computed)
query 29149 done  (100 permutations computed)
query 29164 done  (100 permutations computed)
query 29179 done  (100 permutations computed)
query 29194 done  (100 permutations computed)
query 29209 done  (100 permutations computed)

query 1597 done  (100 permutations computed)
query 1612 done  (100 permutations computed)
query 1627 done  (missing 10 permutations) 
query 1642 done  (100 permutations computed)
query 1657 done  (100 permutations computed)
query 1672 done  (100 permutations computed)
query 1687 done  (100 permutations computed)
query 1702 done  (100 permutations computed)
query 1717 done  (100 permutations computed)
query 1732 done  (100 permutations computed)
query 1747 done  (100 permutations computed)
query 1762 done  (100 permutations computed)
query 1777 done  (missing 53 permutations) 
query 1792 done  (100 permutations computed)
query 1807 done  (100 permutations computed)
query 1822 done  (100 permutations computed)
query 1837 done  (100 permutations computed)
query 1852 done  (100 permutations computed)
query 1867 done  (100 permutations computed)
query 1882 done  (100 permutations computed)
query 1897 done  (missing 28 permutations) 
query 1912 done  (100 permutations computed)
query 1927 do

query 4327 done  (100 permutations computed)
query 4342 done  (100 permutations computed)
query 4357 done  (100 permutations computed)
query 4372 done  (100 permutations computed)
query 4387 done  (100 permutations computed)
query 4402 done  (100 permutations computed)
query 4417 done  (100 permutations computed)
query 4432 done  (missing 94 permutations) 
query 4447 done  (100 permutations computed)
query 4462 done  (100 permutations computed)
query 4477 done  (100 permutations computed)
query 4492 done  (100 permutations computed)
query 4507 done  (100 permutations computed)
query 4522 done  (100 permutations computed)
query 4537 done  (100 permutations computed)
query 4552 done  (100 permutations computed)
query 4567 done  (missing 99 permutations) 
query 4582 done  (100 permutations computed)
query 4597 done  (100 permutations computed)
query 4612 done  (100 permutations computed)
query 4627 done  (100 permutations computed)
query 4642 done  (100 permutations computed)
query 4657 d

query 7057 done  (100 permutations computed)
query 7072 done  (100 permutations computed)
query 7087 done  (100 permutations computed)
query 7102 done  (missing 92 permutations) 
query 7117 done  (100 permutations computed)
query 7132 done  (100 permutations computed)
query 7147 done  (100 permutations computed)
query 7162 done  (100 permutations computed)
query 7177 done  (100 permutations computed)
query 7192 done  (100 permutations computed)
query 7207 done  (missing 24 permutations) 
query 7222 done  (100 permutations computed)
query 7237 done  (100 permutations computed)
query 7252 done  (missing 97 permutations) 
query 7267 done  (100 permutations computed)
query 7282 done  (100 permutations computed)
query 7297 done  (100 permutations computed)
query 7312 done  (missing 97 permutations) 
query 7327 done  (100 permutations computed)
query 7342 done  (100 permutations computed)
query 7357 done  (missing 59 permutations) 
query 7372 done  (100 permutations computed)
query 7387 done

query 9772 done  (100 permutations computed)
query 9787 done  (missing 96 permutations) 
query 9802 done  (100 permutations computed)
query 9817 done  (100 permutations computed)
query 9832 done  (100 permutations computed)
query 9847 done  (100 permutations computed)
query 9862 done  (100 permutations computed)
query 9877 done  (100 permutations computed)
query 9892 done  (100 permutations computed)
query 9907 done  (missing 97 permutations) 
query 9922 done  (100 permutations computed)
query 9937 done  (100 permutations computed)
query 9952 done  (100 permutations computed)
query 9967 done  (100 permutations computed)
query 9982 done  (100 permutations computed)
query 9997 done  (missing 99 permutations) 
query 10012 done  (missing 88 permutations) 
query 10027 done  (100 permutations computed)
query 10042 done  (100 permutations computed)
query 10057 done  (100 permutations computed)
query 10072 done  (100 permutations computed)
query 10087 done  (100 permutations computed)
query 10

query 12442 done  (100 permutations computed)
query 12457 done  (100 permutations computed)
query 12472 done  (100 permutations computed)
query 12487 done  (100 permutations computed)
query 12502 done  (missing 92 permutations) 
query 12517 done  (100 permutations computed)
query 12532 done  (100 permutations computed)
query 12547 done  (100 permutations computed)
query 12562 done  (100 permutations computed)
query 12577 done  (100 permutations computed)
query 12592 done  (missing 99 permutations) 
query 12607 done  (100 permutations computed)
query 12622 done  (100 permutations computed)
query 12637 done  (100 permutations computed)
query 12652 done  (100 permutations computed)
query 12667 done  (missing 99 permutations) 
query 12682 done  (missing 80 permutations) 
query 12697 done  (100 permutations computed)
query 12712 done  (100 permutations computed)
query 12727 done  (100 permutations computed)
query 12742 done  (100 permutations computed)
query 12757 done  (100 permutations co

query 15037 done  (100 permutations computed)
query 15052 done  (missing 99 permutations - considering also dataset ranking)
query 15067 done  (100 permutations computed)
query 15082 done  (100 permutations computed)
query 15097 done  (missing 98 permutations) 
query 15112 done  (100 permutations computed)
query 15127 done  (100 permutations computed)
query 15142 done  (100 permutations computed)
query 15157 done  (100 permutations computed)
query 15172 done  (100 permutations computed)
query 15187 done  (missing 28 permutations) 
query 15202 done  (100 permutations computed)
query 15217 done  (missing 90 permutations) 
query 15232 done  (100 permutations computed)
query 15247 done  (missing 99 permutations - considering also dataset ranking)
query 15262 done  (missing 99 permutations) 
query 15277 done  (100 permutations computed)
query 15292 done  (100 permutations computed)
query 15307 done  (100 permutations computed)
query 15322 done  (100 permutations computed)
query 15337 done  

query 17662 done  (100 permutations computed)
query 17677 done  (100 permutations computed)
query 17692 done  (100 permutations computed)
query 17707 done  (100 permutations computed)
query 17722 done  (100 permutations computed)
query 17737 done  (100 permutations computed)
query 17752 done  (100 permutations computed)
query 17767 done  (100 permutations computed)
query 17782 done  (100 permutations computed)
query 17797 done  (100 permutations computed)
query 17812 done  (100 permutations computed)
query 17827 done  (100 permutations computed)
query 17842 done  (100 permutations computed)
query 17857 done  (100 permutations computed)
query 17872 done  (missing 99 permutations - considering also dataset ranking)
query 17887 done  (missing 99 permutations) 
query 17902 done  (100 permutations computed)
query 17917 done  (100 permutations computed)
query 17932 done  (100 permutations computed)
query 17947 done  (100 permutations computed)
query 17962 done  (100 permutations computed)
qu

query 20287 done  (100 permutations computed)
query 20302 done  (100 permutations computed)
query 20317 done  (100 permutations computed)
query 20332 done  (100 permutations computed)
query 20347 done  (100 permutations computed)
query 20362 done  (100 permutations computed)
query 20377 done  (100 permutations computed)
query 20392 done  (100 permutations computed)
query 20407 done  (100 permutations computed)
query 20422 done  (missing 86 permutations) 
query 20437 done  (100 permutations computed)
query 20452 done  (100 permutations computed)
query 20467 done  (100 permutations computed)
query 20482 done  (missing 59 permutations) 
query 20497 done  (100 permutations computed)
query 20512 done  (100 permutations computed)
query 20527 done  (100 permutations computed)
query 20542 done  (100 permutations computed)
query 20557 done  (100 permutations computed)
query 20572 done  (100 permutations computed)
query 20587 done  (100 permutations computed)
query 20602 done  (100 permutations 

query 22912 done  (100 permutations computed)
query 22927 done  (100 permutations computed)
query 22942 done  (100 permutations computed)
query 22957 done  (100 permutations computed)
query 22972 done  (missing 98 permutations) 
query 22987 done  (100 permutations computed)
query 23002 done  (100 permutations computed)
query 23017 done  (missing 88 permutations) 
query 23032 done  (100 permutations computed)
query 23047 done  (100 permutations computed)
query 23062 done  (100 permutations computed)
query 23077 done  (100 permutations computed)
query 23092 done  (100 permutations computed)
query 23107 done  (100 permutations computed)
query 23122 done  (100 permutations computed)
query 23137 done  (100 permutations computed)
query 23152 done  (100 permutations computed)
query 23167 done  (100 permutations computed)
query 23182 done  (100 permutations computed)
query 23197 done  (100 permutations computed)
query 23212 done  (100 permutations computed)
query 23227 done  (missing 92 permut

query 25552 done  (100 permutations computed)
query 25567 done  (100 permutations computed)
query 25582 done  (100 permutations computed)
query 25597 done  (100 permutations computed)
query 25612 done  (100 permutations computed)
query 25627 done  (missing 74 permutations) 
query 25642 done  (100 permutations computed)
query 25657 done  (100 permutations computed)
query 25672 done  (100 permutations computed)
query 25687 done  (100 permutations computed)
query 25702 done  (100 permutations computed)
query 25717 done  (missing 88 permutations) 
query 25732 done  (missing 81 permutations) 
query 25747 done  (100 permutations computed)
query 25762 done  (100 permutations computed)
query 25777 done  (100 permutations computed)
query 25792 done  (100 permutations computed)
query 25807 done  (missing 95 permutations) 
query 25822 done  (100 permutations computed)
query 25837 done  (100 permutations computed)
query 25852 done  (100 permutations computed)
query 25867 done  (missing 76 permutat

query 28237 done  (100 permutations computed)
query 28252 done  (100 permutations computed)
query 28267 done  (missing 99 permutations) 
query 28282 done  (100 permutations computed)
query 28297 done  (100 permutations computed)
query 28312 done  (100 permutations computed)
query 28327 done  (100 permutations computed)
query 28342 done  (100 permutations computed)
query 28357 done  (100 permutations computed)
query 28372 done  (missing 80 permutations) 
query 28387 done  (100 permutations computed)
query 28402 done  (missing 84 permutations) 
query 28417 done  (missing 96 permutations) 
query 28432 done  (100 permutations computed)
query 28447 done  (100 permutations computed)
query 28462 done  (100 permutations computed)
query 28477 done  (100 permutations computed)
query 28492 done  (100 permutations computed)
query 28507 done  (100 permutations computed)
query 28522 done  (100 permutations computed)
query 28537 done  (100 permutations computed)
query 28552 done  (100 permutations co

In [None]:
def check(q, k):
    indexes = set(range(0, len(q.perms)))
    indexes.remove(k)
    result = []
    for i in range(len(q.perms[k])):
        tmp = set(indexes)
        for j in tmp:
            if q.perms[k][i] != q.perms[j][i]:
                indexes.remove(j)
    for w in indexes:
        if w < k:
            result.append((w, k))
        else: 
            result.append((k,w))
    return result

same = dict()
for q in queries.values():
    for i in range(len(q.perms)):
        r = check(q, i)
        if len(r) != 0:
            if not q.qid in same.keys():
                same[q.qid] = set()
            for t in r:
                same[q.qid].add(t)
            
print(str(len(same.keys())) + "/" + str(len(queries.keys())) + " queries have duplicate permutations")
for q, s in same.items():
    print("query " + str(q) + " has repeated permutations: " + str(s))
            

In [88]:
#
# Define evaluation metric and objective function
#

# current predictions, dataset => name, score, true iff higher means better
def mse_eval(preds, train_data):
    labels = train_data.get_label()
    avg_mse = 0.5 * np.mean( (labels-preds)**2 )
    return METRIC_NAME, avg_mse, False

# current predictions, dataset => first order derivative, second order derivative
def mse_grads(preds, train_data): 
    #labels = train_data.get_label()
    #lam = .9
    #lam * something * (1-lam)*cross_entropy(i, preds)
        
    global queries
    global doc_to_query
    
    query_to_entropy = {}
    for q in queries.values():
        query_to_entropy[q.qid] = cross_entropy(q.perms, q.probs, preds)
    
    gain = np.zeros_like(preds)
    for i in range(len(gain)):
        gain[i] = query_to_entropy[doc_to_query[i]]
    #grad = preds - labels 
    hess = np.ones_like(gain) 
    return gain, hess    

In [None]:
#
# Train the model
#

params = {
#    'objective':'lambdarank', # what to optimize during training
#    'max_position': 10,      # threshold used in optimizing lamdarank (NDCG)
    'learning_rate': 0.1,
    'num_leaves': 16,
    'min_data_in_leaf': 5,
    'metric': ['None'], #['ndcg'],       # what to use/print for evaluation
#    'ndcg_eval_at': 10
# try printing ndcg and testing
}    

print("training lightgbm...")
start = time.process_time()
lgbm_info = {}
lgbm_model = lightgbm.train(params, train_lgb, num_boost_round=100,
                            feval = mse_eval,
                            fobj  = mse_grads,
                            valid_sets   = [train_lgb, valid_lgb, test_lgb], 
                            valid_names  = ["train", "valid", "test"],
                            evals_result = lgbm_info,
                            verbose_eval = 1)
print("training took " + str(time.process_time() - start) + " s")
print("done")

# lgbm_info
    

training lightgbm...


In [None]:
#
# Plot the results
#

plt.figure(figsize=(9,6), tight_layout=True)

plt.plot(lgbm_info['train'][METRIC_NAME], label='training')
plt.plot(lgbm_info['valid'][METRIC_NAME], label='validation')
plt.plot(lgbm_info['test'][METRIC_NAME], label='test')

plt.grid()
plt.legend()
plt.xlabel("# Trees")
plt.ylabel(METRIC_NAME)
plt.title("Model Error")