# Improved Cold-Start Recommendation via Two-Level Bandit Algorithms

In [1]:
!pip install xlutils

Collecting xlutils
  Downloading xlutils-2.0.0-py2.py3-none-any.whl (55 kB)
[?25l[K     |██████                          | 10 kB 20.3 MB/s eta 0:00:01[K     |████████████                    | 20 kB 27.3 MB/s eta 0:00:01[K     |█████████████████▉              | 30 kB 22.1 MB/s eta 0:00:01[K     |███████████████████████▉        | 40 kB 17.3 MB/s eta 0:00:01[K     |█████████████████████████████▊  | 51 kB 5.7 MB/s eta 0:00:01[K     |████████████████████████████████| 55 kB 2.1 MB/s 
Installing collected packages: xlutils
Successfully installed xlutils-2.0.0


## Imports

In [2]:
import math
from collections import defaultdict
from numpy.random import beta
from operator import itemgetter
import numpy as np
import random
from random import betavariate
from scipy.special import btdtri
import sys

## Utils

### Posterior

In [3]:
class Posterior:
    ''' Generic class for posteriors, empty for the time being''' 
    def __init__(self): pass

### Beta

In [4]:
class Beta(Posterior):
    """Manipulate posteriors of Bernoulli/Beta experiments.
    """
    def __init__(self, a=1, b=1):
        self.a = a
        self.b = b
        
    def reset(self, a=0, b=0):
        if a==0:
            a = self.a
        if b==0:
            b = self.b
        self.N = [a, b]

    def update(self, obs):
        self.N[int(obs)] += 1
        
    def sample(self):
        return betavariate(self.N[1], self.N[0])

    def quantile(self, p):
        return btdtri(self.N[1], self.N[0], p) # Bug: do not call btdtri with (0.5,0.5,0.5) in scipy < 0.9

## Random Choice

In [5]:
class RandomChoice(object):
    def __init__(self):
        self.flag_set = False

    def set_arms(self, n_arms):
        self.n = n_arms
        self.counts = [0] * n_arms
        self.values = [0.] * n_arms
        self.flag_set = True

    def choose_arm(self):
        if self.flag_set == False:
            print('Error: Random Choice not set. Aborting')
            import sys
            sys.exit(1)

        """Choose an random arm """
        # Explore (test all arms)
        return np.random.randint(self.n)

    def update(self, choosen_arm, reward):
        if self.flag_set == False:
            print('Error: Random Choice not set. Aborting')
            import sys
            sys.exit(1)

        """Update an arm with some reward value"""
        self.counts[choosen_arm] = self.counts[choosen_arm] + 0.1
        n = self.counts[choosen_arm]
        value = np.random.randint(self.n) #self.values[choosen_arm]
        
        # Running product
        new_value = ((n - 1) / float(n)) * value + (1 / float(n)) * reward
        self.values[choosen_arm] = new_value
        return

In [6]:
class RandomChoiceLevel2(object):
    def __init__(self):
        self.flag_set = False

    def set_arms(self, n_arms):
        self.n = n_arms
        self.counts = [0] * n_arms
        self.values = [0.] * n_arms
        self.flag_set = True

    def choose_arm(self):
        if self.flag_set == False:
            print('Error: Random Choice not set. Aborting')
            import sys
            sys.exit(1)

        """Choose an random arm """
        # Explore (test all arms)
        return np.random.randint(self.n)

    def update(self, choosen_arm, reward):
        if self.flag_set == False:
            print('Error: Random Choice not set. Aborting')
            import sys
            sys.exit(1)

        """Update an arm with some reward value"""
        self.counts[choosen_arm] = self.counts[choosen_arm] + 0.1
        n = self.counts[choosen_arm]
        value = np.random.randint(self.n) #self.values[choosen_arm]
        
        # Running product
        new_value = ((n - 1) / float(n)) * value + (1 / float(n)) * reward
        self.values[choosen_arm] = new_value
        return

## Epsilon greedy

In [7]:
class EpsilonGreedy(object):
    def __init__(self):
        self.flag_set = False

    def set_arms(self, n_arms):
        self.n = n_arms
        self.counts = [0] * n_arms   # number of likes
        self.values = [0.] * n_arms  # number of likes
        self.flag_set = True

    def choose_arm(self):
        if self.flag_set == False:
            print('Error: Epsilon-greedy not set. Aborting')
            import sys
            sys.exit(1)

        """Choose an arm for testing"""
        epsilon = 0.2
        if np.random.random() > epsilon:
            # Exploit (use best arm)
            return np.argmax(self.values)
        else:
            # Explore (test all arms)
            return np.random.randint(self.n)

    def update(self, arm, reward):
        if self.flag_set == False:
            print('Error: Epsilon-greedy not set. Aborting')
            import sys
            sys.exit(1)

        """Update an arm with some reward value"""  # Example: like = 1; no like = 0
        #print(arm, type(arm))
        self.counts[arm] = self.counts[arm] + 1
        n = self.counts[arm]
        value = self.values[arm]
        # Running product
        new_value = ((n - 1) / float(n)) * value + (1 / float(n)) * reward
        self.values[arm] = new_value
        return

In [8]:
class EpsilonGreedyLevel2(object):
    def __init__(self):
        self.flag_set = False

    def set_arms(self, n_arms):
        self.n = n_arms
        self.counts = [0] * n_arms   # number of likes
        self.values = [0.] * n_arms  # number of likes
        self.flag_set = True

    def choose_arm(self):
        if self.flag_set == False:
            print('Error: Epsilon-greedy not set. Aborting')
            import sys
            sys.exit(1)

        """Choose an arm for testing"""
        epsilon = 1.0
        if np.random.random() > epsilon:
            # Exploit (use best arm)
            return np.argmax(self.values)
        else:
            # Explore (test all arms)
            return np.random.randint(self.n)

    def update(self, arm, reward):
        if self.flag_set == False:
            print('Error: Epsilon-greedy not set. Aborting')
            import sys
            sys.exit(1)

        """Update an arm with some reward value"""  # Example: like = 1; no like = 0
        #print(arm, type(arm))
        self.counts[arm] = self.counts[arm] + 1
        n = self.counts[arm]
        value = self.values[arm]
        # Running product
        new_value = ((n - 1) / float(n)) * value + (1 / float(n)) * reward
        self.values[arm] = new_value
        return

## UCB

In [9]:
def ind_max(x):
    m = max(x)
    return x.index(m)

class UCB1(object):
    def __init__(self): #, counts, values):
        #self.counts = counts
        #self.values = values
        #return
        self.flag_set = False

    def set_arms(self, n_arms):
        self.n = n_arms
        self.counts = [0 for col in range(n_arms)]
        self.values = [0.0 for col in range(n_arms)]
        self.flag_set = True
        return

    def choose_arm(self):
        if self.flag_set == False:
            print('Error: UCB not set. Aborting')
            import sys
            sys.exit(1)

        n_arms = len(self.counts)
        for arm in range(n_arms):
            if self.counts[arm] == 0:
                return arm

        ucb_values = [0.0 for arm in range(n_arms)]
        total_counts = sum(self.counts)
        for arm in range(n_arms):
            bonus = math.sqrt((2 * math.log(total_counts)) / float(self.counts[arm]))
            ucb_values[arm] = self.values[arm] + bonus
        return ind_max(ucb_values)

    def update(self, chosen_arm, reward):
        if self.flag_set == False:
            print('Error: UCB not set. Aborting')
            import sys
            sys.exit(1)

        self.counts[chosen_arm] = self.counts[chosen_arm] + 1
        n = self.counts[chosen_arm]

        value = self.values[chosen_arm]
        new_value = ((n - 1) / float(n)) * value + (1 / float(n)) * reward
        self.values[chosen_arm] = new_value
        return

In [10]:
def ind_max(x):
    m = max(x)
    return x.index(m)

class UCB1level2(object):
    def __init__(self): #, counts, values):
        #self.counts = counts
        #self.values = values
        #return
        self.flag_set = False

    def set_arms(self, n_arms):
        self.n = n_arms
        self.counts = [0 for col in range(n_arms)]
        self.values = [0.0 for col in range(n_arms)]
        self.flag_set = True
        return

    def choose_arm(self):
        if self.flag_set == False:
            print('Error: UCB not set. Aborting')
            import sys
            sys.exit(1)

        n_arms = len(self.counts)
        for arm in range(n_arms):
            if self.counts[arm] == 0:
                return arm

        ucb_values = [0.0 for arm in range(n_arms)]
        total_counts = sum(self.counts)
        for arm in range(n_arms):
            bonus = math.sqrt((2 * math.log(total_counts)) / float(self.counts[arm]))
            ucb_values[arm] = self.values[arm] + bonus
        return ind_max(ucb_values)

    def update(self, chosen_arm, reward):
        if self.flag_set == False:
            print('Error: UCB not set. Aborting')
            import sys
            sys.exit(1)

        self.counts[chosen_arm] = self.counts[chosen_arm] + 1
        n = self.counts[chosen_arm]

        value = self.values[chosen_arm]
        new_value = ((n - 1) / float(n)) * value + (1 / float(n)) * reward
        self.values[chosen_arm] = new_value
        return

In [11]:
def ind_max(x):
    m = max(x)
    return x.index(m)

class UCB2(object):
    def __init__(self):
        self.flag_set = False

    def set_arms(self, n_arms):
        self.n = n_arms
        self.alpha = 0.5
        self.counts = [0 for col in range(n_arms)]
        self.values = [0.0 for col in range(n_arms)]
        self.r = [0 for col in range(n_arms)]
        self.__current_arm = 0
        self.__next_update = 0
        self.flag_set = True
        return

    def __bonus(self, n, r):
        tau = self.__tau(r)
        bonus = math.sqrt((1. + self.alpha) * math.log(math.e * float(n) / tau) / (2 * tau))
        return bonus

    def __tau(self, r):
        return int(math.ceil((1 + self.alpha) ** r))

    def __set_arm(self, arm):
        """
        When choosing a new arm, make sure we play that arm for
        tau(r+1) - tau(r) episodes.
        """
        self.__current_arm = arm
        self.__next_update += max(1, self.__tau(self.r[arm] + 1) - self.__tau(self.r[arm]))
        self.r[arm] += 1

    def choose_arm(self):
        if self.flag_set == False:
            print('Error: UCB not set. Aborting')
            import sys
            sys.exit(1)

        n_arms = len(self.counts)
        # play each arm once
        for arm in range(n_arms):
            if self.counts[arm] == 0:
                self.__set_arm(arm)
                return arm

        # make sure we aren't still playing the previous arm.
        if self.__next_update > sum(self.counts):
            return self.__current_arm

        ucb_values = [0.0 for arm in range(n_arms)]
        total_counts = sum(self.counts)
        for arm in xrange(n_arms):
            bonus = self.__bonus(total_counts, self.r[arm])
            ucb_values[arm] = self.values[arm] + bonus

        chosen_arm = ind_max(ucb_values)
        self.__set_arm(chosen_arm)
        return chosen_arm

    def update(self, chosen_arm, reward):
        if self.flag_set == False:
            print('Error: UCB not set. Aborting')
            import sys
            sys.exit(1)

        self.counts[chosen_arm] = self.counts[chosen_arm] + 1
        n = self.counts[chosen_arm]

        value = self.values[chosen_arm]
        new_value = ((n - 1) / float(n)) * value + (1 / float(n)) * reward
        self.values[chosen_arm] = new_value
        return

In [12]:
def ind_max(x):
    m = max(x)
    return x.index(m)

class UCB2Level2(object):
    def __init__(self):
        self.flag_set = False

    def set_arms(self, n_arms):
        self.n = n_arms
        self.alpha = 0.5
        self.counts = [0 for col in range(n_arms)]
        self.values = [0.0 for col in range(n_arms)]
        self.r = [0 for col in range(n_arms)]
        self.__current_arm = 0
        self.__next_update = 0
        self.flag_set = True
        return

    def __bonus(self, n, r):
        tau = self.__tau(r)
        bonus = math.sqrt((1. + self.alpha) * math.log(math.e * float(n) / tau) / (2 * tau))
        return bonus

    def __tau(self, r):
        return int(math.ceil((1 + self.alpha) ** r))

    def __set_arm(self, arm):
        """
        When choosing a new arm, make sure we play that arm for
        tau(r+1) - tau(r) episodes.
        """
        self.__current_arm = arm
        self.__next_update += max(1, self.__tau(self.r[arm] + 1) - self.__tau(self.r[arm]))
        self.r[arm] += 1

    def choose_arm(self):
        if self.flag_set == False:
            print('Error: UCB not set. Aborting')
            import sys
            sys.exit(1)

        n_arms = len(self.counts)
        # play each arm once
        for arm in range(n_arms):
            if self.counts[arm] == 0:
                self.__set_arm(arm)
                return arm

        # make sure we aren't still playing the previous arm.
        if self.__next_update > sum(self.counts):
            return self.__current_arm

        ucb_values = [0.0 for arm in range(n_arms)]
        total_counts = sum(self.counts)
        for arm in xrange(n_arms):
            bonus = self.__bonus(total_counts, self.r[arm])
            ucb_values[arm] = self.values[arm] + bonus

        chosen_arm = ind_max(ucb_values)
        self.__set_arm(chosen_arm)
        return chosen_arm

    def update(self, chosen_arm, reward):
        if self.flag_set == False:
            print('Error: UCB not set. Aborting')
            import sys
            sys.exit(1)

        self.counts[chosen_arm] = self.counts[chosen_arm] + 1
        n = self.counts[chosen_arm]

        value = self.values[chosen_arm]
        new_value = ((n - 1) / float(n)) * value + (1 / float(n)) * reward
        self.values[chosen_arm] = new_value
        return

## Bayes UCB

In [13]:
class BayesUCB():
    def __init__(self):
        self.flag_set = False

    def set_arms(self, n_arms):
        self.posterior_dist = Beta
        self.t         = 1.0
        self.arms      = n_arms
        self.posterior = defaultdict(lambda: None)
        for arm_id in range(self.arms):
            self.posterior[arm_id] = self.posterior_dist()
        for arm_id in range(self.arms):
            self.posterior[arm_id].reset()
        self.flag_set = True
        return

    def compute_index(self, arm_id):
        if self.flag_set == False:
            print('Error: BayesUCB not set. Aborting')
            import sys
            sys.exit(1)

        return self.posterior[arm_id].quantile(1 - (1. / self.t))

    def choose_arm(self):
        if self.flag_set == False:
            print('Error: BayesUCB not set. Aborting')
            import sys
            sys.exit(1)
        
        index = dict()
        for arm_id in range(self.arms):
            index[arm_id] = self.compute_index(arm_id)
        best_arm = np.argmax(index.values())
        #best_arm_id = [arm_id for arm_id in range(len(index.keys())) if index[arm_id] == best_arm][0]
        return best_arm

    def update(self, chosen_arm, reward):
        if self.flag_set == False:
            print('Error: BayesUCB not set. Aborting')
            import sys
            sys.exit(1)

        if chosen_arm not in range(self.arms):
            print('Error in BayesUCB. Invalid chosen arm. Aborting.')
            sys.exit(1)

        self.posterior[chosen_arm].update(reward)
        self.t += 1
        return

In [14]:
class BayesUCBlevel2():
    def __init__(self):
        self.flag_set = False

    def set_arms(self, n_arms):
        self.posterior_dist = Beta
        self.t         = 1.0
        self.arms      = n_arms
        self.posterior = defaultdict(lambda: None)
        for arm_id in range(self.arms):
            self.posterior[arm_id] = self.posterior_dist()
        for arm_id in range(self.arms):
            self.posterior[arm_id].reset()
        self.flag_set = True
        return

    def compute_index(self, arm_id):
        if self.flag_set == False:
            print('Error: BayesUCB not set. Aborting')
            import sys
            sys.exit(1)

        return self.posterior[arm_id].quantile(1 - (1. / self.t))

    def choose_arm(self):
        if self.flag_set == False:
            print('Error: BayesUCB not set. Aborting')
            import sys
            sys.exit(1)
        
        index = dict()
        for arm_id in range(self.arms):
            index[arm_id] = self.compute_index(arm_id)
        best_arm = np.argmax(index.values())
        #best_arm_id = [arm_id for arm_id in range(len(index.keys())) if index[arm_id] == best_arm][0]
        return best_arm

    def update(self, chosen_arm, reward):
        if self.flag_set == False:
            print('Error: BayesUCB not set. Aborting')
            import sys
            sys.exit(1)

        if chosen_arm not in range(self.arms):
            print('Error in BayesUCB. Invalid chosen arm. Aborting.')
            sys.exit(1)

        self.posterior[chosen_arm].update(reward)
        self.t += 1
        return

## Thompson Sampling

In [15]:
class ThompsonSampling(object):
    def __init__(self):
        self.flag_set = False

    def set_arms(self, n_arms):
        self.param_alpha   = defaultdict(lambda: None)
        self.param_beta    = defaultdict(lambda: None)
        self.num_successes = defaultdict(lambda: None)
        self.num_fails     = defaultdict(lambda: None)
        #self.counts = [0 for col in range(n_arms)]
        for arm in range(n_arms):
            self.param_alpha[arm]   = 1.0
            self.param_beta[arm]    = 1.0
            self.num_successes[arm] = 0.0
            self.num_fails[arm]     = 0.0
        self.n_arms = n_arms

        self.counts = defaultdict(lambda: None)
        for arm in range(self.n_arms):
            self.counts[arm] = 0
        self.flag_set = True
        return

    def choose_arm(self):
        if self.flag_set == False:
            print('Error: Thompson Sampling not set. Aborting')
            import sys
            sys.exit(1)

        scores = [(arm_id, beta(self.num_successes[arm_id] + self.param_alpha[arm_id],
                        self.num_fails[arm_id] + self.param_beta[arm_id]))
                        for arm_id in range(self.n_arms)]
        
        scores = sorted(scores, key=itemgetter(0))
        ranking = sorted(scores, key=itemgetter(1), reverse=True)
        selected_arm = ranking[0][0]
        return selected_arm

    def update(self, chosen_arm, reward):
        if self.flag_set == False:
            print('Error: Thompson Sampling not set. Aborting')
            import sys
            sys.exit(1)		

        if chosen_arm not in range(self.n_arms):
            print('--- self.n_arms')
            print(self.n_arms)
            print('--- chosen arm')
            print(chosen_arm)
            print('Error in thompson sampling. Invalid chosen arm. Aborting')
            sys.exit(1)

        if reward == 1.0:
            self.num_successes[chosen_arm] += 1.0
        elif reward == 0:
            self.num_fails[chosen_arm] += 1.0
        else:
            print('Error in thompson sampling. Invalid reward. Aborting')
            sys.exit(1)
        return

In [16]:
class ThompsonSamplingLevel2(object):
    def __init__(self):
        self.flag_set = False

    def set_arms(self, n_arms):
        self.param_alpha   = defaultdict(lambda: None)
        self.param_beta    = defaultdict(lambda: None)
        self.num_successes = defaultdict(lambda: None)
        self.num_fails     = defaultdict(lambda: None)
        #self.counts = [0 for col in range(n_arms)]
        for arm in range(n_arms):
            self.param_alpha[arm]   = 1.0
            self.param_beta[arm]    = 1.0
            self.num_successes[arm] = 0.0
            self.num_fails[arm]     = 0.0
        self.n_arms = n_arms

        self.counts = defaultdict(lambda: None)
        for arm in range(self.n_arms):
            self.counts[arm] = 0
        self.flag_set = True
        return

    def choose_arm(self):
        if self.flag_set == False:
            print('Error: Thompson Sampling not set. Aborting')
            import sys
            sys.exit(1)

        scores = [(arm_id, beta(self.num_successes[arm_id] + self.param_alpha[arm_id],
                        self.num_fails[arm_id] + self.param_beta[arm_id]))
                        for arm_id in range(self.n_arms)]
        
        scores = sorted(scores, key=itemgetter(0))
        ranking = sorted(scores, key=itemgetter(1), reverse=True)
        selected_arm = ranking[0][0]
        return selected_arm

    def update(self, chosen_arm, reward):
        if self.flag_set == False:
            print('Error: Thompson Sampling not set. Aborting')
            import sys
            sys.exit(1)		

        if chosen_arm not in range(self.n_arms):
            print('--- self.n_arms')
            print(self.n_arms)
            print('--- chosen arm')
            print(chosen_arm)
            print('Error in thompson sampling. Invalid chosen arm. Aborting')
            sys.exit(1)

        if reward == 1.0:
            self.num_successes[chosen_arm] += 1.0
        elif reward == 0:
            self.num_fails[chosen_arm] += 1.0
        else:
            print('Error in thompson sampling. Invalid reward. Aborting')
            sys.exit(1)
        return

## Policy Evaluator

In [17]:
import copy
from collections import Counter
from collections import defaultdict
import sys
import numpy as np
import os

In [18]:
class select_arm(object):
    def __init__(self, cluster, ncluster):
        self.cluster   = cluster
        self.ncluster  = ncluster
        self.mab_alg   = copy.copy(BayesUCBlevel2())
        self.log_file  = 'Movielens LDA/clusters/%s/movie_cluster_%s.txt' %(self.ncluster, self.cluster)
        self.idvideo2idarm = defaultdict(lambda: None)
        self.idarm2idvideo = defaultdict(lambda: None)
        self.contidarm = 0
        self.before    = defaultdict(lambda: None)

        if os.stat(self.log_file).st_size == 0:
            self.contidarm = 1
        
        #print('Policy evaluator started with log file %s' % (self.log_file))

        self.count_itens = defaultdict(lambda: 0)
        intro_file = open(self.log_file, 'r')
        logs = intro_file.readlines()
        logs2 = logs
        intro_file.close()
        for line in logs:
            arm_id, inf_value = line.strip().split(',')
            self.count_itens[arm_id] += 1
        
        for line in logs2:
            arm_id, inf_value = line.strip().split(',')
            if arm_id not in self.idvideo2idarm:
                self.idvideo2idarm[arm_id] = self.contidarm
                self.idarm2idvideo[self.contidarm] = arm_id
                self.contidarm += 1

        #print('Selected videos %s' % self.contidarm)
        self.mab_alg.set_arms(self.contidarm)
        self.before = self.mab_alg.choose_arm()
        self.count_n = 0		

    def get_video(self):
        self.mab_alg.set_arms(self.contidarm)
        recommendation = self.mab_alg.choose_arm()
        tmp_id_arm     = self.idarm2idvideo[recommendation]
        #print('Selected arm:', tmp_id_arm)
        if (self.before == recommendation):
            reward = 1.0
            self.count_n += 1
        else:
            reward = 0.0
        self.mab_alg.update(recommendation, reward)
        self.before = recommendation
        return tmp_id_arm

In [None]:
'''
#TEST
x=2
num_model=100
#mab_alg = copy.copy(RandomChoiceLevel2())
pegararm = select_arm(x,num_model)
video = pegararm.get_video()
print(video)'''

In [20]:
from collections import Counter
from collections import defaultdict
from xlrd import open_workbook
from xlutils.copy import copy
import matplotlib.pyplot as plt
import sys
import numpy as np

In [25]:
!git clone https://github.com/OtavioAugusto/RecSys.git
%cd RecSys/src

Cloning into 'RecSys'...
remote: Enumerating objects: 833, done.[K
remote: Counting objects: 100% (1/1), done.[K
remote: Total 833 (delta 0), reused 0 (delta 0), pack-reused 832[K
Receiving objects: 100% (833/833), 204.96 MiB | 22.07 MiB/s, done.
Resolving deltas: 100% (239/239), done.
Checking out files: 100% (861/861), done.
/content/RecSys/src


In [28]:
!wget -q --show-progress -O vids_very_ratings.txt https://raw.githubusercontent.com/OtavioAugusto/RecSys/master/src/Very%20reward.txt
!wget -q --show-progress -O vids_less_ratings.txt https://raw.githubusercontent.com/OtavioAugusto/RecSys/master/src/Low%20reward.txt



In [30]:
class PolicyEvaluator(object):
    def __init__(self, context):#, num_runs):
        self.context           = context
        self.mab_alg           = context['mab']
        self.log_file  	       = context['log_file']
        self.column	       = context['column']
        self.ncluster           = context['cluster']
        #self.num_runs          = num_runs

        self.cumulative_reward = [ ]
        '''self.idcluster2idarm     = defaultdict(lambda: None)
        self.idarm2idcluster     = defaultdict(lambda: None)'''
        self.idvideo2idarm = defaultdict(lambda: None)
        self.idarm2idvideo = defaultdict(lambda: None)
        self.contidarm         = 0
        self.contidarm = self.ncluster
        print('Selected clusters %s' % self.contidarm)
        self.mab_alg.set_arms(self.contidarm)

    def run(self):
        tmp_result = []
        rme_result = []
        rle_result = []
        self.mab_alg.set_arms(self.contidarm)
        self.cumulative_reward = [ ]
        
        #Verifying most and less ratings
        me = []
        le = []
        rat = open('vids_very_ratings.txt', 'r')
        lrat = open('vids_less_ratings.txt', 'r')
        acum = rat.readlines()
        acum1 = lrat.readlines()
        rat.close()
        lrat.close()
        #
            
        count_n = 0.0
        intro_file = open(self.log_file, 'r')
        logs = intro_file.readlines()
        intro_file.close()
        for line in logs:
            user_id, item_id = line.strip().split(',')
            cluster = self.mab_alg.choose_arm()
            get_arm = select_arm(cluster, self.ncluster)
            recommendation = get_arm.get_video()
            '''recommendation = self.mab_alg.choose_arm()
                        tmp_id_arm     = self.idarm2idvideo[recommendation]
            if (item_id == tmp_id_arm):'''
            if (item_id == recommendation):
                reward = 1.0
                count_n += 1
                for line in acum: #verifying if video is in most or less ratings
                    item_r = line.strip()								
                    if recommendation == item_r: 
                        rme = 1.0
                        me.append(rme)
                for line in acum1:
                    item_l = line.strip()
                    if recommendation == item_l:
                        rle = 1.0
                        le.append(rle)
            else:
                reward = 0.0
            self.mab_alg.update(cluster, reward)
            #self.mab_alg.update(recommendation, reward)
            self.cumulative_reward.append(reward)
        rme_result.append([sum(me[0:i]) for i, value in enumerate(me)])
        rle_result.append([sum(le[0:i]) for i, value in enumerate(le)])
        tmp_result.append([sum(self.cumulative_reward[0:i]) for i, value in enumerate(self.cumulative_reward)])
        mean_tmp_result = np.mean(tmp_result, axis=0)
        std_tmp_result = np.std(tmp_result, axis=0)
        final = open('Movielens LDA/clusters/Arrays/BayesUCB/reward_%s_clusters_test'%(self.ncluster),'w')
        final.write(str(tmp_result))
        final.close()
        veryrew = open('Very reward.txt','w')
        veryrew.write(str(rme_result))
        veryrew.close()
        lowrew = open('Low reward.txt','w')
        lowrew.write(str(rle_result))
        lowrew.close()

        # Open an Excel file and add a worksheet.
        rb = open_workbook("Results.xls")
        wb = copy(rb)
        # Write text in cells.
        worksheet = wb.get_sheet(0) #Pay attention for this
        worksheet.write(21, self.column, str(round(np.mean(mean_tmp_result))) + ' - ' + str(np.std(std_tmp_result)))
        wb.save('Results.xls')

        rc = open_workbook("Ratings_frequency.xls")
        wc = copy(rc)
        # Write text in cells.
        wks = wc.get_sheet(0) #Pay attention for this
        wks.write(2, 1, 'Muitas avaliacoes: ' + str(round(np.mean(rme_result))) +
                'Poucas avaliacoes: ' + str(round(np.mean(rle_result))))
        wc.save('Ratings_frequency.xls')

In [23]:
import copy

In [31]:
#num_runs = 1
cell = 1
num_cluster = [5]#[5,10,17,25,50,100]

for x in num_cluster:
	context = {
	'mab'          : copy.copy(BayesUCB()),
	'log_file'     : 'log.txt',
	'column'       : cell,
	'cluster'      : x
	}
	#print x, j
	if __name__ == '__main__':
		evaluator = PolicyEvaluator(context)#, num_runs)
		evaluator.run()
	cell += 1

Selected clusters 5


FileNotFoundError: ignored