# Problem sugestion with MAB algorithms

We are using a simple MAB algorithm to select problems to competitive programming competitors. In this work we first introduce the base case.

First we start with the Epsilon-Greedy algorithm.

## The data

The algorithm will be trainning using the data from the URI Onlide Judge. We solve around of 200k solutions.
The solutions is a list of tuples composed by `(problem_id, user_id, date)`.

At first we will read the data and sort by date.

In [None]:
import random, math
from itertools import groupby
from collections import namedtuple
import pandas as pd
import matplotlib.pyplot as plt

In [41]:
solutions_df = pd.read_csv('solutions.csv')
solutions_df[:5]

Unnamed: 0,user,problem,category,date
0,40980,1001,1,15/02/25 06:15
1,40980,1002,1,15/02/25 07:10
2,40980,1003,1,15/02/25 06:17
3,40980,1004,1,15/02/25 06:19
4,40980,1005,1,15/02/25 07:31


The first thing to do after read the dataset, is filter the data. This is important for us, because doint this first, we need to work with less data. This will speed up our work.

In [42]:
filter_category = 7

categories = [
    'All', #0
    'Beginner', #1
    'Ad-Hoc', #2
    'String', #3
    'Libraries', #4
    'Math', #5
    'Paradigms', #6
    'Graph', #7
    'Geometry', #8
]

category = categories[filter_category]

if filter_category:
    category = categories[filter_category]
    solutions_df = solutions_df.loc[solutions_df['category'] == filter_category]
    solutions_df[:5]
    

Unnamed: 0,user,problem,category,date
48,40980,1124,8,15/07/08 06:23
79,40980,1857,8,16/08/02 03:10
116,40980,1455,8,15/12/15 04:37
131,40980,1126,8,15/09/26 02:55
144,40980,1560,8,15/10/07 10:36


Now, we will get the set of problems and users.

In [43]:
problems = list(set(solutions_df['problem']))
problems.sort()
problems[:5]

[1039, 1102, 1108, 1124, 1126]

In [44]:
users = list(set(solutions_df['user']))
users.sort()
users[:5]

[2, 83, 174, 276, 384]

Now we will fix the user and problems id. We will make they be continuous starting from 0.

In [45]:
for i, user in enumerate(users):
    solutions_df.replace({'user': {user: i}}, inplace=True)

for i, problem in enumerate(problems):
    solutions_df.replace({'problem': {problem: i}}, inplace=True)
    
solutions_df[:5]

Unnamed: 0,user,problem,category,date
48,353,3,8,15/07/08 06:23
79,353,45,8,16/08/02 03:10
116,353,22,8,15/12/15 04:37
131,353,4,8,15/09/26 02:55
144,353,28,8,15/10/07 10:36


Update the users and problems to correspond the new id's

In [46]:
problems = list(set(solutions_df['problem']))
problems[:5]

[0, 1, 2, 3, 4]

In [47]:
users = list(set(solutions_df['user']))
users[:5]

[0, 1, 2, 3, 4]

After that, we need to sort the data by the date.

In [None]:
solutions_df = solutions_df.sort_values('date')
solutions_df[:5]

Now, we can split the data. 
OBS: The problems are indexed py your id. How they are sequential, we will decrease the value 1001 of each one. Because this will make the code more simple.

In [49]:
Solution = namedtuple('Solution', 'problem, user')
solutions = []

for row in solutions_df.itertuples():
    solutions.append(
        Solution(row.problem, row.user)
    )
        
solutions[:5]

[Solution(problem=0, user=0),
 Solution(problem=0, user=10),
 Solution(problem=3, user=0),
 Solution(problem=0, user=11),
 Solution(problem=3, user=3)]

At this point we have everything that is necessary to test our algorithms. To start, we will use the Epsilon-Greedy algorithm

In [50]:
class EpsilonGreedy():
    def __init__(self, n_users, n_problems):
        self.epsilon = 0.001
        self.n_arms = n_problems
        self.counts = [0     for col in range(self.n_arms)]
        self.values = [0.0   for col in range(self.n_arms)]
        self.chosen = [[0 for col in range(n_problems)] for col in range(n_users)]


    def max_arm(self, user):
        max_arm = 0
        max_value = -1
        
        for arm in range(self.n_arms):
            if self.chosen[user][arm]:
                continue
                
            if self.values[arm] > max_value:
                max_arm = arm
                max_value = self.values[arm]

        return max_arm

    
    def rand_arm(self, user):
        rand_arm = random.randrange(self.n_arms)
        
        while self.chosen[user][rand_arm]:
            rand_arm = (rand_arm + 1) % self.n_arms

        return rand_arm


    def select_arm(self, user):        
        if random.random() > self.epsilon:
            return self.max_arm(user)
        else:
            return self.rand_arm(user)
  

    def update(self, chosen_arm, reward):
        self.counts[chosen_arm] = self.counts[chosen_arm] + 1

        n = self.counts[chosen_arm]
        value = self.values[chosen_arm]
        new_value = ((n - 1) / float(n)) * value + (1 / float(n)) * reward
        self.values[chosen_arm] = new_value

In [51]:
class AnnealingEpsilonGreedy():
    def __init__(self, n_users, n_problems):
        self.epsilon = 0.1
        self.n_arms = n_problems
        self.counts = [0     for col in range(self.n_arms)]
        self.values = [0.0   for col in range(self.n_arms)]
        self.chosen = [[0 for col in range(n_problems)] for col in range(n_users)]


    def max_arm(self, user):
        max_arm = 0
        max_value = -1
        
        for arm in range(self.n_arms):
            if self.chosen[user][arm]:
                continue
                
            if self.values[arm] > max_value:
                max_arm = arm
                max_value = self.values[arm]

        return max_arm

    
    def rand_arm(self, user):
        rand_arm = random.randrange(self.n_arms)
        
        while self.chosen[user][rand_arm]:
            rand_arm = (rand_arm + 1) % self.n_arms

        return rand_arm


    def select_arm(self, user):
        t = sum(self.counts) + 1
        epsilon = 1 / math.log(t + 0.0000001)
        
        if random.random() > epsilon:
            return self.max_arm(user)
        else:
            return self.rand_arm(user)
  

    def update(self, chosen_arm, reward):
        self.counts[chosen_arm] = self.counts[chosen_arm] + 1

        n = self.counts[chosen_arm]
        value = self.values[chosen_arm]
        new_value = ((n - 1) / float(n)) * value + (1 / float(n)) * reward
        self.values[chosen_arm] = new_value

In [52]:
def ind_max(x):
    m = max(x)
    return x.index(m)

class UCB1():
    def __init__(self, n_users, n_problems):
        self.n_arms = n_problems
        self.counts = [0     for col in range(self.n_arms)]
        self.values = [0.0   for col in range(self.n_arms)]
        self.chosen = [[0 for col in range(n_problems)] for col in range(n_users)]
    
    def select_arm(self, user):
        for arm in range(self.n_arms):
            if self.counts[arm] == 0:
                return arm

        ucb_values = [0.0 for arm in range(self.n_arms)]
        total_counts = sum(self.counts)
        log_total_counts = 2 * math.log(total_counts)
        for arm in range(self.n_arms):
            if self.chosen[user][arm]:
                ucb_values[arm] = 0
            else:
                bonus = math.sqrt(log_total_counts / float(self.counts[arm]))
                ucb_values[arm] = self.values[arm] + bonus

        return ind_max(ucb_values)
    
    def update(self, chosen_arm, reward):
        self.counts[chosen_arm] = self.counts[chosen_arm] + 1
        n = self.counts[chosen_arm]

        value = self.values[chosen_arm]
        new_value = ((n - 1) / float(n)) * value + (1 / float(n)) * reward
        self.values[chosen_arm] = new_value
        return


And we have a test structure

In [53]:
def test_algorithm(algo, solutions):
    n_solutions        = len(solutions)
    rewards            = [0 for i in range(n_solutions)]
    cumulative_rewards = [0 for i in range(n_solutions)]
    chosen_arms        = [0 for i in range(n_solutions)]
  
    for i, solution in enumerate(solutions):
        chosen_arm = algo.select_arm(solution.user)

        if chosen_arm == solution.problem:
            reward = 1
        else:
            reward = 0
        
        rewards[i] = reward
        cumulative_rewards[i] = cumulative_rewards[i - 1] + reward
        chosen_arms[i] = chosen_arm
        
        algo.update(chosen_arm, reward)
        algo.chosen[solution.user][solution.problem] = 1

    return cumulative_rewards

So, for each user in the dataset, we will use the algorithm. And then plot the result of the accuracy of the algorithm.

In [54]:
n_users = len(users)
n_problems = len(problems)

eg = EpsilonGreedy(n_users, n_problems)
aeg = AnnealingEpsilonGreedy(n_users, n_problems)
ucb1 = UCB1(n_users, n_problems)

cr_eg = test_algorithm(eg, solutions)
print 'eg'
cr_aeg = test_algorithm(aeg, solutions)
print 'aeg'
cr_ucb1 = test_algorithm(ucb1, solutions)
print 'ucb'

eg
aeg
ucb


In [None]:
x = range(len(solutions))

plt.plot(x, cr_eg, alpha=0.5, label='EpsilonGreedy')
plt.plot(x, cr_aeg, alpha=0.5, label='AnnealingEpsilonGreedy')
plt.plot(x, cr_ucb1, alpha=0.5, label='UCB1')

plt.title(category)
plt.xlabel('time')
plt.ylabel('cumulative rewards')
plt.legend()

plt.show()