In [2]:
import random
import bisect
import collections

ALPHA = 0.1
BETTA = 0.1

def cdf(weights):
    total = sum(weights)
    result = []
    cumsum = 0
    for w in weights:
        cumsum += w
        result.append(cumsum / total)
    return result

def choice(population, weights):
    assert len(population) == len(weights)
    cdf_vals = cdf(weights)
    x = random.random()
    idx = bisect.bisect(cdf_vals, x)
    return population[idx]

def play_game():
    average_reward = 0
    actions = [1, 2, 3]
    weights = [0.3, 0.3, 0.3]
    while 1:
        cur_action = choice(actions, weights)
        user_action = input("1: rock, 2: scissors, 3: paper | User action: ")
        assert user_action in ['1', '2', '3']
        user_action = int(user_action)
        
        
        if cur_action - user_action == -1 or cur_action - user_action == 2:
            reward = 1
        elif cur_action == user_action:
            reward = 0
        else:
            reward = -1
            
        print("Bot action: {0}, User action: {1}, Reward: {2}".format(cur_action, user_action, reward))
        
        average_reward += ALPHA * (reward - average_reward)
        weights[cur_action-1] += BETTA * (reward - average_reward)
        print(weights)
        

In [3]:
play_game()

1: rock, 2: scissors, 3: paper | User action: 1
Bot action: 2, User action: 1, Reward: -1
[0.3, 0.20999999999999996, 0.3]
1: rock, 2: scissors, 3: paper | User action: 1
Bot action: 2, User action: 1, Reward: -1
[0.3, 0.12899999999999995, 0.3]
1: rock, 2: scissors, 3: paper | User action: 1
Bot action: 3, User action: 1, Reward: 1
[0.3, 0.12899999999999995, 0.4071]
1: rock, 2: scissors, 3: paper | User action: 1
Bot action: 1, User action: 1, Reward: 0
[0.30639, 0.12899999999999995, 0.4071]
1: rock, 2: scissors, 3: paper | User action: 1
Bot action: 2, User action: 1, Reward: -1
[0.30639, 0.044750999999999944, 0.4071]
1: rock, 2: scissors, 3: paper | User action: 1
Bot action: 3, User action: 1, Reward: 1
[0.30639, 0.044750999999999944, 0.5112759]
1: rock, 2: scissors, 3: paper | User action: 1
Bot action: 3, User action: 1, Reward: 1
[0.30639, 0.044750999999999944, 0.60503421]
1: rock, 2: scissors, 3: paper | User action: 1
Bot action: 1, User action: 1, Reward: 0
[0.300772479, 0.0447

KeyboardInterrupt: 

In [17]:
class Bot:
    def __init__(self, bot_name, alpha, betta):
        self.alpha = alpha
        self.betta = betta
        self.bot_name = bot_name
        self.average_reward = 0
        self.total_plays = 0
        self.actions = [1, 2, 3]
        self.weights = [0.3, 0.3, 0.3]
        self.last_action = 0
        
    def play_game(self):
        self.cur_action = choice(self.actions, self.weights)
        print("Bot {0} plays: {1}".format(self.bot_name, self.cur_action))
        return self.cur_action
    
    def train(self, user_action):
        if self.cur_action - user_action == -1 or self.cur_action - user_action == 2:
            reward = 1
        elif self.cur_action == user_action:
            reward = 0
        else:
            reward = -1
            
        self.total_plays += 1
        self.average_reward += (reward - self.average_reward) / self.total_plays
        self.weights[self.cur_action-1] += self.betta * (reward - self.average_reward)

In [15]:
bot_1 = Bot('bot_1', 1.09, 0.01)
bot_2 = Bot('bot_2', 0.01, 0.01)

total_score = {1: 0, 2: 0}

for _ in range(10000):
    res1 = bot_1.play_game()
    res2 = bot_2.play_game()
    bot_1.train(res2)
    bot_2.train(res1)
    if res1 - res2 == -1 or res1 - res2 == 2:
        print('bot_1 wins')
        total_score[1] += 1
    elif res1 == res2:
        print('Nichia')
    else:
        print('bot_2 wins')
        total_score[2] += 1
        


Bot bot_1 plays: 1
Bot bot_2 plays: 1
Nichia
Bot bot_1 plays: 2
Bot bot_2 plays: 2
Nichia
Bot bot_1 plays: 1
Bot bot_2 plays: 2
bot_1 wins
Bot bot_1 plays: 3
Bot bot_2 plays: 3
Nichia
Bot bot_1 plays: 1
Bot bot_2 plays: 2
bot_1 wins
Bot bot_1 plays: 1
Bot bot_2 plays: 1
Nichia
Bot bot_1 plays: 1
Bot bot_2 plays: 3
bot_2 wins
Bot bot_1 plays: 3
Bot bot_2 plays: 2
bot_2 wins
Bot bot_1 plays: 1
Bot bot_2 plays: 2
bot_1 wins
Bot bot_1 plays: 3
Bot bot_2 plays: 1
bot_1 wins
Bot bot_1 plays: 3
Bot bot_2 plays: 3
Nichia
Bot bot_1 plays: 1
Bot bot_2 plays: 2
bot_1 wins
Bot bot_1 plays: 2
Bot bot_2 plays: 2
Nichia
Bot bot_1 plays: 3
Bot bot_2 plays: 2
bot_2 wins
Bot bot_1 plays: 2
Bot bot_2 plays: 2
Nichia
Bot bot_1 plays: 1
Bot bot_2 plays: 1
Nichia
Bot bot_1 plays: 2
Bot bot_2 plays: 3
bot_1 wins
Bot bot_1 plays: 3
Bot bot_2 plays: 1
bot_1 wins
Bot bot_1 plays: 2
Bot bot_2 plays: 1
bot_2 wins
Bot bot_1 plays: 2
Bot bot_2 plays: 3
bot_1 wins
Bot bot_1 plays: 2
Bot bot_2 plays: 1
bot_2 wins
Bot

In [16]:
print("Total score: ", total_score)

Total score:  {1: 600, 2: 7692}
