# Paper, Rock, Scissors!

- The computer is given a tactic in the form of a transition matrix:
    ```python
    transition_matrix_computer = {
        "Paper": {
            "Paper": 2 / 3,
            "Rock": 1 / 3,
            "Scissors": 0 / 3
        },
        "Rock": {
            "Paper": 0 / 3,
            "Rock": 2 / 3,
            "Scissors": 1 / 3
        },
        "Scissors": {
            "Paper": 2 / 3,
            "Rock": 0 / 3,
            "Scissors": 1 / 3
        }
    }
    ```
i.e. based on his previous choice he chooses the current paper, rock or scissors
- The player's tactics are:
    - Version 1: based on the stationary vector `transition_matrix_computer`
    - Version 2: the transition matrix is updated during the game
- The payoff value for the player is:
    - 1 in case of a win
    - -1 in case of a loss
    - 0 in case of a draw

## Requirements
- `numpy`
- `matplotlib`
- `ipywidgets`

In [1]:
from numpy import zeros, random, dot, clip, sign
import matplotlib.pyplot as plt

Description: The programme simulates a game of 'Rock, Paper, Scissors' between a player and a computer,
       updating the transition matrix based on performance and learning in real time.

In [2]:
states_computer = ["Paper", "Rock", "Scissors"]
states_player = ["Scissors", "Paper", "Rock"]
transition_matrix_computer = {
    "Paper": {
        "Paper": 2 / 3,
        "Rock": 1 / 3,
        "Scissors": 0 / 3
    },
    "Rock": {
        "Paper": 0 / 3,
        "Rock": 2 / 3,
        "Scissors": 1 / 3
    },
    "Scissors": {
        "Paper": 2 / 3,
        "Rock": 0 / 3,
        "Scissors": 1 / 3
    }
}
transition_matrix_player = {
    "Paper": {
        "Paper": 1 / 3,
        "Rock": 1 / 3,
        "Scissors": 1 / 3
    },
    "Rock": {
        "Paper": 1 / 3,
        "Rock": 1 / 3,
        "Scissors": 1 / 3
    },
    "Scissors": {
        "Paper": 1 / 3,
        "Rock": 1 / 3,
        "Scissors": 1 / 3
    }
}

In [3]:
def choose_move(prev_move, transition_matrix, states):
    next_move = random.choice(
        states, p=[transition_matrix[prev_move][_] for _ in states])
    return next_move


def game(computer_move, player_move):
    match (computer_move, player_move):
        case ("Paper", "Scissors") | ("Rock", "Paper") | ("Scissors", "Rock"):
            return 1
        case ("Paper", "Rock") | ("Rock", "Scissors") | ("Scissors", "Paper"):
            return -1
        case _:
            return 0

#### Version 1

In [4]:
def matrix_to_stationary_vector(transition_matrix, states):
    vector = [1 / 3, 1 / 3, 1 / 3]

    matrix_size = len(states)
    matrix = zeros((matrix_size, matrix_size))

    for idx in range(matrix_size**2):
        i = idx % matrix_size
        j = idx // matrix_size
        matrix[i, j] = transition_matrix[states[i]][states[j]]

    for _ in range(5):
        vector = dot(vector, matrix)

    return vector


def version_first(stationary_vector):
    cash = 0
    cash_history = [cash]

    prev_computer_move = None
    for _ in range(10001):
        player_move = random.choice(states_player, p=stationary_vector)
        if prev_computer_move is None:
            computer_move = random.choice(states_computer)
        else:
            computer_move = choose_move(prev_computer_move,
                                        transition_matrix_computer,
                                        states_computer)
        change = game(computer_move, player_move)
        cash += change
        cash_history.append(cash)
        prev_computer_move = computer_move
    return (cash, cash_history)

#### Version 2

In [5]:
def update_transition_matrix(change, move, prev_move, transition_matrix):
    learning_rate = 0.02
    transition_matrix[prev_move][move] *= (1 + sign(change) * learning_rate)
    transition_matrix[prev_move][move] = clip(
        transition_matrix[prev_move][move], 0, 1)
    for k1, d in transition_matrix.items():
        values_sum = sum(d.values())
        if values_sum > 0:
            for k2 in d.keys():
                transition_matrix[k1][k2] /= values_sum


def version_second():
    cash = 0
    cash_history = [cash]

    prev_computer_move = None
    prev_player_move = None
    for _ in range(10001):
        if prev_computer_move is not None:
            computer_move = choose_move(prev_computer_move,
                                        transition_matrix_computer,
                                        states_computer)
        else:
            computer_move = random.choice(states_computer)
        if prev_player_move is not None:
            player_move = choose_move(prev_player_move,
                                      transition_matrix_player, states_player)
        else:
            player_move = random.choice(states_computer)
        change = game(computer_move, player_move)
        cash += change
        cash_history.append(cash)
        if prev_player_move is not None:
            update_transition_matrix(change, player_move, prev_player_move,
                                     transition_matrix_player)
        prev_player_move = player_move
        prev_computer_move = computer_move
    return (cash, cash_history)

In [6]:
def draw_cash_history(cash_history):
    plt.plot(range(len(cash_history)), cash_history)
    plt.plot(range(len(cash_history)), [0] * len(cash_history), color='red')
    plt.xlabel('Numer Gry')
    plt.ylabel('Stan Gotówki')
    plt.title('Zmiana Stanu Gotówki w Grze "Kamień, Papier, Nożyce"')
    plt.show()

In [7]:
from ipywidgets import interact


@interact(choose=["Stationary vector", "Transition matrix"])
def choose_version(choose):
    match choose:
        case "Stationary vector":
            stationary_vector = matrix_to_stationary_vector(
                transition_matrix_computer, states_computer)
            cash, cash_history = version_first(stationary_vector)
        case "Transition matrix":
            cash, cash_history = version_second()
    print(f"Końcowy stan konta: {cash}")
    draw_cash_history(cash_history)

interactive(children=(Dropdown(description='choose', options=('Stationary vector', 'Transition matrix'), value…