## eval logger

Class that generates logs from an exploration policy

In [31]:
import Datasets
import Metrics
import Policy   
import numpy as np

## load data

In [2]:
mq2008Data = Datasets.Datasets()
mq2008Data.loadNpz('../../Data/MQ2008.npz', 'MQ2008')


Datasets:loadNpz [INFO] Loaded ../../Data/MQ2008.npz  NumQueries, [Min,Max]NumDocs, totalDocs, MaxNumFeatures:  784 5 121 121 47


## train deterministic policy

In [3]:
detLogger = Policy.DeterministicPolicy(mq2008Data, 'lasso')
detLogger.train(mq2008Data, range(47), False)

DeterministicPolicy:init [INFO] Dataset: MQ2008
DeterministicPolicy:train [INFO] lasso CVAlpha: 0.008000 Exponentiated Gains? False
DeterministicPolicy:train [INFO] Created lasso predictor using dataset MQ2008. Features: range(0, 47)


## create filtered data set: "new data"

In [4]:
#num_allowed_docs: (int) Creates a new dataset where the max docs per query is num_allowed_docs.
#                        Uses policyParams to rank and filter the original document set.
newData = detLogger.createFilteredDataset(num_allowed_docs= 5)

DeterministicPolicy:createFilteredDataset [INFO] MQ2008_lasso_body_5 MaxNumDocs 5


In [8]:
newData.features.shape, mq2008Data.features.shape

((784, 5, 47), (784, 121, 47))

### uniform policy

In [9]:
#All sub-classes of StochasticPolicy should supply a setupExploration method to set these members.
#det_policy:    DeterministicPolicy or None. If None, a document distribution
#                                           is sampled from a Dirichlet (uniform hyper-prior).
#               If det_policy is specified, documents are scored to get unnormalized
#                       probability distribution.
#temperature:   (float)   Multiplier for document scores. For the Dirichlet case, this constant is added 
#                       and the distribution is renormalized.

In [10]:
uniformPolicy = Policy.StochasticPolicy(newData, 0, detLogger)

StochasticPolicy:init [INFO] Dataset: MQ2008_lasso_body_5 
StochasticPolicy:init [INFO] Deterministic policy: MQ2008_lasso_body Temperature: 0


In [11]:
# ranking_size: (int) Size of rankings to be predicted (used to define gamma).
uniformPolicy.setupExploration(ranking_size = 8)

[Parallel(n_jobs=-2)]: Using backend LokyBackend with 7 concurrent workers.
[Parallel(n_jobs=-2)]: Done   1 tasks      | elapsed:    2.8s
[Parallel(n_jobs=-2)]: Done   2 tasks      | elapsed:    2.8s
[Parallel(n_jobs=-2)]: Done   3 tasks      | elapsed:    2.8s
[Parallel(n_jobs=-2)]: Done   4 tasks      | elapsed:    2.8s
[Parallel(n_jobs=-2)]: Done   5 tasks      | elapsed:    2.8s
[Parallel(n_jobs=-2)]: Done   6 tasks      | elapsed:    2.8s
[Parallel(n_jobs=-2)]: Done   7 tasks      | elapsed:    2.8s
[Parallel(n_jobs=-2)]: Done   8 tasks      | elapsed:    2.8s
[Parallel(n_jobs=-2)]: Done   9 tasks      | elapsed:    2.8s
[Parallel(n_jobs=-2)]: Done  10 tasks      | elapsed:    3.0s
[Parallel(n_jobs=-2)]: Done  11 tasks      | elapsed:    3.0s
[Parallel(n_jobs=-2)]: Done  12 tasks      | elapsed:    3.1s
[Parallel(n_jobs=-2)]: Done  13 tasks      | elapsed:    3.1s
[Parallel(n_jobs=-2)]: Done  14 tasks      | elapsed:    3.1s
[Parallel(n_jobs=-2)]: Done  15 tasks      | elapsed:   

In [21]:
from Logger import Logger

In [22]:
logger = Logger(uniformPolicy, ranking_size=8)

Logger:init [INFO] Policy: MQ2008_lasso_body_5_stoc0_(MQ2008_lasso_body) RankingSize: 8


In [24]:
rankingSize = 8
revenue = Metrics.Revenue(newData, rankingSize)


Revenue:init [INFO] RankingSize: 8


In [28]:
print("One sample: Query, Ranking, MetricValue")
print(logger.createOneSample(revenue), flush = True)


One sample: Query, Ranking, MetricValue
(352, array([ 2,  1,  3,  0,  4, -1, -1, -1], dtype=int32), 887.7917207679359)


In [35]:
queries, rankings, metricValues = logger.createLog(10, revenue)

.
Logger:createLog [INFO] LogSize: 10 Metric: Revenue


In [36]:
histogram = np.bincount(queries)

In [37]:
histogram

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [38]:
print("Histogram of seen queries", histogram, flush = True)
print("Num unique queries", np.sum(histogram > 0), flush = True)
print (revenue.name, metricValues.mean(dtype = np.longdouble), flush = True)

Histogram of seen queries [0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0