In [1]:
import random
import itertools

In [2]:
class MarkovChain():
    def __init__(self, decay=1.0):
        self.decay = decay
        self.reset()
        self.last_state = 'RR'
        self.last_pred = ''

    def _create_matrix(self, order=1):
        def create_keys(order):            
            keys = ['R', 'P', 'S']
            for i in range((order * 2 - 1)):
                key_len = len(keys)
                for i in itertools.product(keys, ''.join(keys)):
                    keys.append(''.join(i))
                keys = keys[key_len:]
            return keys

        keys = create_keys(order)
        matrix = {}
        for key in keys:
            matrix[key] = {
                'R': {
                    'prob' : 1 / 3,
                    'n_obs' : 0
                },
                'P': {
                    'prob' : 1 / 3,
                    'n_obs' : 0
                },
                'S': {
                    'prob' : 1 / 3,
                    'n_obs' : 0
                }
            }
        return matrix

    def _update_matrix(self, pair, input):
        for i in self.matrix[pair]:
            self.matrix[pair][i]['n_obs'] = self.decay * self.matrix[pair][i]['n_obs']

        self.matrix[pair][input]['n_obs'] = self.matrix[pair][input]['n_obs'] + 1
        
        n_total = 0
        for i in self.matrix[pair]:
            n_total += self.matrix[pair][i]['n_obs']
            
        for i in self.matrix[pair]:
            self.matrix[pair][i]['prob'] = self.matrix[pair][i]['n_obs'] / n_total            

    def reset(self):
        self.matrix = self._create_matrix()

    def predict(self):
        pair = self.last_state
        probs = self.matrix[pair]
        

        if probs['R']['prob'] == probs['P']['prob'] == probs['S']['prob']:
            pred = random.choice(['R', 'P', 'S'])
        else:
            decision = 'R'
            prob = probs['R']['prob']
            for i in {'P', 'S'}:
                if probs[i]['prob'] > prob:
                    prob = probs[i]['prob']
                    decision = i
            beat = {'R':'P', 'P':'S', 'S':'R'}
            pred = beat[decision]
        self.last_pred = pred

        return {'R':'rock', 'P':'paper', 'S':'scissors'}[pred]
    
    def update(self, player_rps):
        player_rps = {'rock':'R', 'paper':'P', 'scissors':'S'}[player_rps]
        self.last_state = player_rps + self.last_pred
        self._update_matrix(self.last_state, player_rps)

In [3]:
agent = MarkovChain(0.9)

In [8]:
agent.predict()

'paper'

In [10]:
agent.update('rock')