# Building Q-Table for Choosing Cards

In [1]:
from card_choice_gym import CardChoiceEnv
import numpy as np
import random
import pandas as pd
import csv

In [16]:
def LearnJoker(q_in=np.zeros((4, 9, 3, 4, 18, 4)), alpha_in=0.01, epsilon_in=0.5, gamma_in=0.95, episodes_in=100):
  acts = ['STRG-BEAT', 'STRG-LOSS', 'WEAK-BEAT','WEAK-LOSS']
  env = CardChoiceEnv()

  alpha, gamma, epsilon = alpha_in, gamma_in, epsilon_in
  q = q_in
  
  wins = []
  good_calls = np.ndarray((6,)).T

  for i in range(episodes_in): 
    done = False
    s = env.reset()
    s0, s1, s2, s3, s4 = s
    while True: 
      if np.random.random() < epsilon:
      # choose random action
        act_num = random.randint(0, 3)
      else:
        # greedy
        act_num = np.argmax(q[s0, s1, s2, s3, s4])
      
      action = acts[act_num]

      s_, r, done, _ = env.step(action)

      s_0, s_1, s_2, s_3, s_4 = s_
      td_target = r + gamma * np.argmax(q[s_0, s_1, s_2, s_3, s_4])
      td_error = td_target - q[s0, s1, s2, s3, s4, act_num]
      s = s_

      q[s0, s1, s2, s3, s4, act_num] += alpha * td_error
      if done:
        if r > 0:
          wins.append(i)
          good_calls = np.vstack((good_calls, [env.call_state]))
        break
  return wins, good_calls, q

In [19]:
eps = 100000
wins, calls, q = LearnJoker(epsilon_in=0.95, episodes_in=eps)
len(calls)

In [18]:
calls

array([[0., 0., 2., 2., 1., 0.],
       [1., 0., 0., 0., 2., 2.],
       [0., 0., 1., 2., 1., 0.],
       ...,
       [1., 1., 0., 2., 2., 2.],
       [2., 2., 1., 0., 0., 3.],
       [3., 0., 1., 2., 2., 0.]])

In [4]:
eps = 1000
wins, calls, q = LearnJoker(epsilon_in=0.01, episodes_in=eps, q_in = q)
len(calls)

451

In [30]:
with open('/Users/konstantinekahadze/Desktop/Projects/JustNines/data/calls.csv', 'w') as out:
    csv_out = csv.writer(out)
    csv_out.writerow(['order', 'already', 'jokers', "aces", "kings", "queens"])
    for row in calls:
        csv_out.writerow(row)

In [31]:
df = pd.read_csv('../data/calls.csv')
df

Unnamed: 0,order,already,jokers,aces,kings,queens
0,3,3.0,0,0,2,1
1,1,0.0,0,1,0,1
2,2,0.0,1,1,0,2
3,3,3.0,0,1,0,1
4,3,1.0,0,2,0,0
...,...,...,...,...,...,...
492,3,2.0,1,2,0,1
493,0,0.0,0,0,0,2
494,1,1.0,1,1,1,1
495,3,3.0,1,0,3,0


In [32]:
df.already.describe()

count    497.000000
mean       0.991952
std        1.086916
min        0.000000
25%        0.000000
50%        1.000000
75%        2.000000
max        4.000000
Name: already, dtype: float64

In [10]:
def save_q_table(table):
    file = '../models/q-table.npy'
    with open(file, "wb"):
        np.save(file, table, allow_pickle=True)

In [11]:
save_q_table(q)