# Imports

In [1]:
from algorithms.trace_the_best import TraceTheBest
import numpy as np

# Testing Algorithm

In [2]:
ttb = TraceTheBest()

In [3]:
ttb.running_winner

17

In [4]:
ttb.actions

array([16,  4, 19,  2, 21, 12,  7, 28, 18,  9, 26,  6, 11, 27, 13, 17])

In [5]:
ttb.subset

array([ 0,  1,  3,  5,  8, 10, 14, 15, 20, 22, 23, 24, 25, 29])

In [7]:
ttb.subset.size

14

In [6]:
ttb.step()

In [7]:
wins = dict()
for arm in ttb.actions:
    wins[arm] = ttb.preference_estimate.get_wins(arm_index=arm)

In [8]:
wins

{1: 22.0,
 14: 10.0,
 5: 18.0,
 11: 14.0,
 2: 5.0,
 8: 15.0,
 16: 14.0,
 23: 23.0,
 7: 22.0,
 15: 21.0,
 27: 12.0,
 17: 5.0,
 9: 3.0,
 6: 14.0,
 25: 12.0}

In [9]:
ttb.empirical_winner

[23]

# Testing reading of features and running times

In [1]:
import csv 
import re
import os
from pathlib import Path
import numpy as np
from sklearn import preprocessing
import pandas as pd


In [2]:
running_times_file = os.path.join(f'{Path.cwd()}', 'Data_saps_swgcp_reduced\cpu_times_inst_param.csv')
running_times = []
with open(running_times_file, newline='') as csvfile:
    running_times_data = list(csv.reader(csvfile))
for i in range(1, len(running_times_data)):
    next_line = running_times_data[i][0]
    next_rt_vector = [float(s) for s in re.findall(r'-?\d+\.?\d*', next_line)][2:]
    running_times.append(next_rt_vector)
running_times = np.asarray(running_times)
lambda_ = 100
running_times = np.exp(-lambda_ * running_times)

In [3]:
running_times.shape

(5000, 20)

In [4]:
parametrizations_file = os.path.join(f'{Path.cwd()}', 'Data_saps_swgcp_reduced\Random_Parameters_SAPS.txt')
with open(parametrizations_file, "r") as f:
    lineList = f.readlines()
parametrizations = [float(s) for s in re.findall(r'-?\d+\.?\d*', lineList[0])]
parametrizations = np.reshape(parametrizations, (20, 4))
parametrizations = preprocessing.normalize(parametrizations)
parametrizations.shape

(20, 4)

In [6]:
from sklearn.feature_selection import VarianceThreshold

# read features
features_file = os.path.join(f'{Path.cwd()}','Data_saps_swgcp_reduced\\Reduced_Features_SWGCP_only_5000.csv')
features = []
with open(features_file, newline='') as csvfile:
    features_data = list(csv.reader(csvfile))
for i in range(1, len(features_data)):
    next_line = features_data[i]
    del next_line[0]
    next_feature_vector = [float(s) for s in next_line]
    features.append(next_feature_vector)
features = np.asarray(features)
# normalize#########
min_max_scaler = preprocessing.MinMaxScaler()
features = min_max_scaler.fit_transform(features)
# Drop Highly Correlated Features #######
df = pd.DataFrame(features)
# Create correlation matrix
corr_matrix = df.corr().abs()
# Select upper triangle of correlation matrix
upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
# Find index of feature columns with correlation greater than 0.95
to_drop = [column for column in upper.columns if any(upper[column] > 0.98)]
# Drop features 
df.drop(df[to_drop], axis=1, inplace=True)
features = df.to_numpy()
# Drop features with lower variance
selector = VarianceThreshold(0.001)
features = selector.fit_transform(features)
features.shape

In [10]:
n = parametrizations.shape[0]
d = parametrizations.shape[1] * features.shape[1]
theta_init = np.random.rand(d)

Xs = []
for t in range(features.shape[0]):
    X = np.zeros((n, d))
    next_context = features[t, :]
    for i in range(n):
        next_param = parametrizations[i]
        X[i, :] = np.kron(parametrizations[i], next_context)
    #        X[i,:] = np.concatenate((next_param,next_context), axis=0)
    #    X = preprocessing.normalize(X)
    #    min_max_scaler = preprocessing.MinMaxScaler()
    #    X = min_max_scaler.fit_transform(X)
    Xs.append(X)
Xs = np.array(Xs)
Xs.shape

(5000, 20, 32)

# Testing whether _cached_mean_estimate same as pairwise preference score


In [1]:
from stats.preference_estimate import PreferenceEstimate

preference_estimate = PreferenceEstimate(
    num_arms = 3,
    confidence_radius=lambda num_samples: 1/(num_samples + 1)
     )

In [2]:
preference_estimate.enter_sample(1, 2, 1)

In [3]:
preference_estimate._cached_mean_estimate

array([[0.5, 0.5, 0.5],
       [0.5, 0.5, 1. ],
       [0.5, 0. , 0.5]])

In [4]:
preference_estimate.wins

array([[0., 0., 0.],
       [0., 0., 1.],
       [0., 0., 0.]])

In [13]:
wins_1 = preference_estimate.get_wins(1)
wins_2 = preference_estimate.get_wins(2)
wins_0 = preference_estimate.get_wins(0)

In [8]:
wins_1 / wins_1 + wins_2

1.0

In [10]:
preference_estimate.enter_sample(0, 2, False)
preference_estimate.enter_sample(0, 1, False)
preference_estimate.enter_sample(2, 1, False)
preference_estimate.enter_sample(1, 0, True)
preference_estimate.enter_sample(0, 2, False)

In [11]:
preference_estimate.wins

array([[0., 0., 0.],
       [2., 0., 2.],
       [2., 0., 0.]])

In [12]:
preference_estimate._cached_mean_estimate

array([[0.5, 0. , 0. ],
       [1. , 0.5, 1. ],
       [1. , 0. , 0.5]])

In [26]:
wins_1 /( wins_1 + wins_0)

1.0

In [23]:
wins_1

4.0

In [29]:
preference_estimate.get_mean_estimate(1,2)

1.0

In [None]:
preference_estimate.enter_sample(0, 2, False)

In [28]:
wins_1 / (wins_1 + wins_2)

0.6666666666666666

In [36]:
prev = preference_estimate._cached_mean_estimate[1][2]
win_indicator = 0
new_mean = prev + (0-prev)/preference_estimate.get_num_samples(1, 2)

In [40]:
(0 - preference_estimate._cached_mean_estimate[1][2]) / preference_estimate.get_num_samples(1, 2)

-0.5

In [42]:
preference_estimate.get_mean_estimate_matrix()

array([[0.5, 0. , 0. ],
       [1. , 0.5, 1. ],
       [1. , 0. , 0.5]])