<a href="https://colab.research.google.com/github/takagiyuusuke/QT/blob/main/Q_miniTETRIS_cp.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# ★ 最初に実行すること

## 0. インポート・乱数シードの設定

In [None]:
import random

import torch
import numpy as np

from google.colab import drive
drive.mount('/content/drive')

random.seed(0)
np.random.seed(0)
torch.manual_seed(0)

Mounted at /content/drive


<torch._C.Generator at 0x7e97d4bb4b50>

## 1. miniTETRISのロジック
横6マスのテトリスを定義する

In [None]:
import copy
import time

class Tetris:
  #各種ブロックの定義
  block1 = [[0, 2, 1], [-1, 2, 1], [-1, 3, 1], [0, 3, 1], [-0.5, 2.5]] #O
  block2 = [[0, 3, 2], [0, 2, 2], [0, 4, 2], [-1, 3, 2], [0, 3]] #T
  block3 = [[-1, 2, 3], [-1, 1, 3], [-1, 4, 3], [-1, 3, 3], [-0.5, 2.5]] #I
  block4 = [[0, 3, 4], [0, 2, 4], [0, 4, 4], [-1, 2, 4], [0, 3]] #J
  block5 = [[0, 3, 5], [0, 2, 5], [0, 4, 5], [-1, 4, 5], [0, 3]] #L
  block6 = [[0, 3, 6], [-1, 2, 6], [0, 4, 6],  [-1, 3, 6],[0, 3]] #Z
  block7 = [[0, 3, 7], [0, 2, 7], [-1, 4, 7], [-1, 3, 7], [0, 3]] #S
  blocks = [block1,block2,block3,block4,block5,block6,block7]
  # blocks = [block5,block5,block5,block5,block5,block5,block5]
  # blocks = [block4,block4,block4,block4,block4,block4,block4]

  #ブロックの回転の定義
  roundblock= [[[[0,-1],[-1,0],[0,1]],[[-1,0],[0,1],[1,0]],[[0,1],[1,0],[0,-1]],[[1,0],[0,-1],[-1,0]]],
  [[[-0.5,-1.5],[-0.5,-0.5],[-0.5,0.5],[-0.5,1.5]],[[-1.5,0.5],[-0.5,0.5],[0.5,0.5],[1.5,0.5]],[[0.5,-1.5],[0.5,-0.5],[0.5,0.5],[0.5,1.5]],[[-1.5,-0.5],[-0.5,-0.5],[0.5,-0.5],[1.5,-0.5]]],
  [[[-1,-1],[0,-1],[0,1]],[[-1,1],[-1,0],[1,0]],[[0,-1],[0,1],[1,1]],[[-1,0],[1,0],[1,-1]]],
  [[[-1,1],[0,1],[0,-1]],[[1,0],[-1,0],[1,1]],[[0,-1],[0,1],[1,-1]],[[1,0],[-1,0],[-1,-1]]],
  [[[-1,-1],[-1,0],[0,1]],[[-1,1],[0,1],[1,0]],[[0,-1],[1,0],[1,1]],[[1,-1],[0,-1],[-1,0]]],
  [[[0,-1],[-1,0],[-1,1]],[[-1,0],[0,1],[1,1]],[[1,-1],[1,0],[0,1]],[[-1,-1],[0,-1],[1,0]]]]


  '''
  盤面の情報などの情報の初期化を行う
  display:画面を表示するか否か
  '''
  def __init__(self, display: bool, interval = 0):
    self.display = display
    self.interval = interval
    self.reset_game()

  '''
  ゲームをリセットする
  '''
  def reset_game(self):
    self.location = []    #現在の盤面
    self.moveblock = []   #現在落下中のブロック
    self.hold = []        #ホールド中のブロック
    self.angle = 0        #落下中のブロックの角度(0,1,2,3)
    self.num = 1          #落下中のブロックが何番目か
    self.lines = 0        #消したラインの数
    self.score = 0        #スコア
    self.level = 0        #レベル
    self.canhold = True   #ホールドできるか
    self.maxheight = 22   #最大高さ
    self.shape = 0        #盤面上部の形状
    self.shape2 = 0       #盤面の凹凸に課す罰則用
    self.vacant_count = 0 #盤面における空白カウント
    self.weights = [2,2,2,2,2,2,2]
    self.nex =[self.random_choice() for i in range(20)] #後におちるブロック
    self.setblock(copy.deepcopy(self.nex[0])) #最初のブロックをセットする
    self.turn_left = self.turn("left")  #左回転した場合のブロック配置
    self.turn_right = self.turn("right") #右回転した場合のブロック配置

  '''
  次のブロックを返す(短期間的に偏りを減らすためにweightsを使用)
  '''
  def random_choice(self):
    index = random.choices(range(7), weights = self.weights, k=1)[0]
    block = Tetris.blocks[index]
    self.weights[index] /= 2
    if self.weights[index] <= 1:
      for i in range(7):
        self.weights[i] *= 2
    return block

  '''
  ゲームの盤面を表示する
  '''
  def printboard(self, display_force = False):
      if display_force:
        pass
      elif not self.display:
        return
      board = "\n"
      board += ("⏹"*8+"     NEXT "+str(self.num+1) +"\n")
      for i in range(22):
          board += ("⏹")
          for j in range(6):
              ad = [l for l in self.location + self.moveblock if l[0] ==i and l[1]==j]
              if ad:
                  board += (chr(128996+ad[0][2]))
              else:
                  board += ("⬜")
          board += ("⏹  ")
          if i == 0 or i == 1:
              for j in range(1,5):
                  af = [l for l in self.nex[self.num%20] if l[0] ==i-1 and l[1]==j]
                  if af:
                      board += (chr(128996+af[0][2]))
                  else:
                      board += ("⬜")
          elif i == 3:
              board += (" 👆👆👆 "+str(self.num+2))
          elif i == 4 or i == 5:
              for j in range(1,5):
                  af = [l for l in self.nex[(self.num+1)%20] if l[0] ==i-5 and l[1]==j]
                  if af:
                      board += (chr(128996+af[0][2]))
                  else:
                      board += ("⬜")
          elif i == 7:
              board += (" 👆👆👆 "+str(self.num+3))
          elif i == 8 or i == 9:
              for j in range(1,5):
                  af = [l for l in self.nex[(self.num+2)%20] if l[0] ==i-9 and l[1]==j]
                  if af:
                      board += (chr(128996+af[0][2]))
                  else:
                      board += ("⬜")
          elif i == 12:
              board += (" LEVEL =>"+str(self.level))
          elif i ==14:
              board += (" LINES =>"+str(self.lines))
          elif i == 16:
              board += (" SCORE =>"+str(self.score))
          elif i == 18:
              board += (" -HOLD-")
          elif i == 19 or i == 20:
              for j in range(1,5):
                  af = [l for l in self.hold if l[0] ==i-20 and l[1]==j]
                  if af:
                      board += (chr(128996+af[0][2]))
                  else:
                      board += ("⬜")
          elif i == 21:
              board += (" ------")
          board += ("\n")
      board += ("⏹"*8)
      print(board)
      time.sleep(self.interval)

  '''
  引数に指定したブロックを盤面に追加する
  '''
  def setblock(self, block):
      self.moveblock = block
      self.angle = 0
      if self.num%20==13:
          self.nex =[(self.random_choice() if i < 10 else self.nex[i]) for i in range(20)]
      elif self.num%20==3:
          self.nex =[(self.random_choice() if i >= 10 else self.nex[i]) for i in range(20)]

  '''
  ブロックが落下しなくなったか判定する
  '''
  def stopfall(self):
      return any((any(l[:2] == [i[0]+1,i[1]] for l in self.location) or i[0] >=21) for i in self.moveblock)

  '''
  一列そろっている箇所があれば消去する
  '''
  def set_removeline(self):
      # candidate = [self.moveblock[i][0] for i in range(4) if self.moveblock[i][0]]
      self.location.extend(self.moveblock[:4])
      L = []
      for i in range(22):
          if all([any(row[:2] == [i,j] for row in self.location) for j in range(6)]):
              L.append(i)
              self.lines += 1
              if self.lines % 10 == 0:
                  self.level += 1

      if L:
              self.location = [l for l in self.location if l[0] not in L]
              for j in L:
                  for m in self.location:
                      if m[0] < j:
                          m[0] += 1
              self.score += (len(L))**2*1000

  '''
  ブロックを右に移動させる
  '''
  def right(self):
      for j in self.moveblock:
          j[1] +=1
      self.printboard()

  '''
  ブロックを左に移動させる
  '''
  def left(self):
      for j in self.moveblock:
          j[1] -=1
      self.printboard()

  '''
  ブロックを下に動かせるか確かめたのちに下に移動させる
  '''
  def down(self):
      if self.stopfall():
          self.moveblock = [x for x in self.moveblock if len(x) != 2]

          self.set_removeline()
          if self.location:
            self.maxheight = min(self.location[i][0] for i in range(len(self.location)))
          else:
            self.maxheight = 22
          self.setblock(copy.deepcopy(self.nex[self.num%20]))
          self.canhold = True
          self.num += 1
          self.printboard()
      else:
          for j in self.moveblock:
              j[0] +=1
          self.printboard()

  '''
  ブロックを回転させられるか確認する。
  - direction:右か左か
  '''
  def turn(self, direction):
      col = self.moveblock[0][2]
      angle_before = self.angle
      if direction == "left":
          angle = self.angle-1 if self.angle >0 else 3
      elif direction == "right":
          angle = self.angle+1 if self.angle <3 else 0
      if col > 1:
          e = Tetris.roundblock[col-2][angle]
          if col != 3:
                  m = [[self.moveblock[4][0]+e[0][0],self.moveblock[4][1]+e[0][1]],
                       [self.moveblock[4][0]+e[1][0],self.moveblock[4][1]+e[1][1]],
                       [self.moveblock[4][0]+e[2][0],self.moveblock[4][1]+e[2][1]],
                       [self.moveblock[4][0],self.moveblock[4][1]]]
                  check = any(e in [row[:2] for row in self.location] for e in m)
                  if not check and max(m[0][1],m[1][1],m[2][1],m[3][1]) <= 5 and min(m[0][1],m[1][1],m[2][1],m[3][1])>=0 and max(m[0][0],m[1][0],m[2][0],m[3][0])<=21:
                      return [[m[0][0],m[0][1],col],[m[1][0],m[1][1],col],[m[2][0],m[2][1],col],[m[3][0],m[3][1],col],self.moveblock[4]]
          elif col ==3:
                  m = [[round(self.moveblock[4][0]+e[0][0]),round(self.moveblock[4][1]+e[0][1])],
                       [round(self.moveblock[4][0]+e[1][0]),round(self.moveblock[4][1]+e[1][1])],
                       [round(self.moveblock[4][0]+e[2][0]),round(self.moveblock[4][1]+e[2][1])],
                       [round(self.moveblock[4][0]+e[3][0]),round(self.moveblock[4][1]+e[3][1])]]
                  check = any(e in [row[:2] for row in self.location] for e in m)
                  if not check and max(m[0][1],m[1][1],m[2][1],m[3][1])<=5 and min(m[0][1],m[1][1],m[2][1],m[3][1])>=0 and max(m[0][0],m[1][0],m[2][0],m[3][0])<=21:
                      return [[m[0][0],m[0][1],col],[m[1][0],m[1][1],col],[m[2][0],m[2][1],col],[m[3][0],m[3][1],col],self.moveblock[4]]
      return []

  def turnright(self):
      self.moveblock = self.turn_right
      self.angle = self.angle+1 if self.angle <3 else 0
      self.printboard()

  def turnleft(self):
      self.moveblock = self.turn_left
      self.angle = self.angle-1 if self.angle >0 else 3
      self.printboard()

  '''
  ブロックをホールドする
  '''
  def holding(self):
      if not self.hold:
          self.hold =Tetris.blocks[self.moveblock[0][2]-1][:]
          self.setblock(copy.deepcopy(self.nex[self.num%20]))
          self.canhold = True
          self.num += 1
      else:
          if self.canhold:
              self.hold , self.moveblock = Tetris.blocks[self.moveblock[0][2]-1] , copy.deepcopy(self.hold)
              self.canhold = False
              self.angle = 0
          else:
              return
      self.printboard()

  '''
  ゲームオーバーかどうかの判定を行う
  '''
  def gameover(self):
    return self.maxheight < 2

  '''
  盤面の形状を返す
  '''
  def board_check(self):
      result = []
      vacant_count = 0
      for j in range(6):
        _array = [l[0] for l in self.location if l[1] == j]
        try:
          mine0_ = min(_array)
        except:
          mine0_ = 22
        result.append(mine0_ - self.maxheight)
        vacant_count += 22 - mine0_ - len(_array)
      self.shape = min(result[0],4)*100000 + min(result[1],4)*10000 + min(result[2],4)*1000 + min(result[3],4)*100 + min(result[4],4)*10 + min(result[5],4)
      self.shape2 = abs(result[0] - result[1]) + abs(result[1] - result[2]) + abs(result[2] - result[3]) + abs(result[3] - result[4]) + abs(result[4] - result[5])
      self.vacant_count = vacant_count

  '''
  盤面と落下中のブロックの形状を返す
  '''
  def get_state(self):
    height = self.maxheight - int(self.moveblock[4][0])
    moveblock = self.moveblock[0][2] * 1000 +  self.angle * 100 + max(min(height, 4),-2) * 10 + int(self.moveblock[4][1])
    return self.shape, moveblock, self.vacant_count, self.maxheight, self.shape2

  '''
  すべての可能な行動を返す
  '''
  def get_possible_actions(self):
    possible_actions = [0]
    height = self.maxheight - int(self.moveblock[4][0])
    if not height >= 4:
      left = self.moveblock[4][1] <= 3 and not any((any(l[:2] == [i[0],i[1]-1] for l in self.location) or i[1] <= 0) for i in self.moveblock)
      if (left):
        possible_actions.append(1)

      right = self.moveblock[4][1] >= 2.5 and not any((any(l[:2] == [i[0],i[1]+1] for l in self.location) or i[1] >= 5) for i in self.moveblock)
      if (right):
        possible_actions.append(2)

      if self.angle in [0,3] and self.turn_left:
        possible_actions.append(3)

      if self.angle in [0,1] and self.turn_right:
        possible_actions.append(4)
    else:
      if self.canhold:
        # possible_actions.append(5)
        possible_actions.append(4+ (self.hold[0][2] if self.hold else self.nex[self.num%20][0][2]))
    random.shuffle(possible_actions) #シャッフルして渡すことで学習初期の学習効率を向上させる
    return possible_actions

  '''
  スコアを取得する
  '''
  def get_score(self):
    return self.score

  '''
  盤面の変更を行う
  '''
  def modify_board(self, action):
    if action == 0:
        self.down()
    elif action == 1:
        self.left()
    elif action == 2:
        self.right()
    elif action == 3:
        self.turnleft()
    elif action == 4:
        self.turnright()
    elif action >= 5:
        self.holding()

    self.turn_left = self.turn("left")
    self.turn_right = self.turn("right")
    self.board_check()


In [None]:
class Testris:
  #各種ブロックの定義
  block1 = [[0, 2, 1], [-1, 2, 1], [-1, 3, 1], [0, 3, 1], [-0.5, 2.5]] #O
  block2 = [[0, 3, 2], [0, 2, 2], [0, 4, 2], [-1, 3, 2], [0, 3]] #T
  block3 = [[-1, 2, 3], [-1, 1, 3], [-1, 4, 3], [-1, 3, 3], [-0.5, 2.5]] #I
  block4 = [[0, 3, 4], [0, 2, 4], [0, 4, 4], [-1, 2, 4], [0, 3]] #J
  block5 = [[0, 3, 5], [0, 2, 5], [0, 4, 5], [-1, 4, 5], [0, 3]] #L
  block6 = [[0, 3, 6], [-1, 2, 6], [0, 4, 6],  [-1, 3, 6],[0, 3]] #Z
  block7 = [[0, 3, 7], [0, 2, 7], [-1, 4, 7], [-1, 3, 7], [0, 3]] #S
  blocks = [block1,block2,block3,block4,block5,block6,block7]
  # blocks = [block5,block5,block5,block5,block5,block5,block5]
  # blocks = [block4,block4,block4,block4,block4,block4,block4]

  #ブロックの回転の定義
  roundblock= [[[[0,-1],[-1,0],[0,1]],[[-1,0],[0,1],[1,0]],[[0,1],[1,0],[0,-1]],[[1,0],[0,-1],[-1,0]]],
  [[[-0.5,-1.5],[-0.5,-0.5],[-0.5,0.5],[-0.5,1.5]],[[-1.5,0.5],[-0.5,0.5],[0.5,0.5],[1.5,0.5]],[[0.5,-1.5],[0.5,-0.5],[0.5,0.5],[0.5,1.5]],[[-1.5,-0.5],[-0.5,-0.5],[0.5,-0.5],[1.5,-0.5]]],
  [[[-1,-1],[0,-1],[0,1]],[[-1,1],[-1,0],[1,0]],[[0,-1],[0,1],[1,1]],[[-1,0],[1,0],[1,-1]]],
  [[[-1,1],[0,1],[0,-1]],[[1,0],[-1,0],[1,1]],[[0,-1],[0,1],[1,-1]],[[1,0],[-1,0],[-1,-1]]],
  [[[-1,-1],[-1,0],[0,1]],[[-1,1],[0,1],[1,0]],[[0,-1],[1,0],[1,1]],[[1,-1],[0,-1],[-1,0]]],
  [[[0,-1],[-1,0],[-1,1]],[[-1,0],[0,1],[1,1]],[[1,-1],[1,0],[0,1]],[[-1,-1],[0,-1],[1,0]]]]


  '''
  盤面の情報などの情報の初期化を行う
  display:画面を表示するか否か
  '''
  def __init__(self):
    self.reset_game()

  '''
  ゲームをリセットする
  '''
  def reset_game(self):
    self.location = set() #現在の盤面
    self.moveblock = []   #現在落下中のブロック
    self.hold = []        #ホールド中のブロック
    self.angle = 0        #落下中のブロックの角度(0,1,2,3)
    self.num = 1          #落下中のブロックが何番目か
    self.lines = 0        #消したラインの数
    self.score = 0        #スコア
    self.level = 0        #レベル
    self.canhold = True   #ホールドできるか
    self.maxheight = 22   #最大高さ
    self.shape = 0        #盤面上部の形状
    self.shape2 = 0       #盤面の凹凸に課す罰則用
    self.vacant_count = 0 #盤面における空白カウント
    self.nex =[random.choice(Tetris.blocks) for i in range(20)] #後におちるブロック
    self.setblock(copy.deepcopy(self.nex[0])) #最初のブロックをセットする
    self.turn_left = self.turn("left")  #左回転した場合のブロック配置
    self.turn_right = self.turn("right") #右回転した場合のブロック配置

  '''
  ゲームの盤面を表示する
  '''
  def printboard(self, display_force = False):
      if display_force:
        pass
      board = "\n"
      board += ("⏹"*8+"     NEXT "+str(self.num+1) +"\n")
      for i in range(22):
          board += ("⏹")
          for j in range(6):
              if (i,j) in self.location:
                  board += ("⬛")
              else:
                  board += ("⬜")
          board += ("⏹  ")
          if i == 0 or i == 1:
              for j in range(1,5):
                  af = [l for l in self.nex[self.num%20] if l[0] ==i-1 and l[1]==j]
                  if af:
                      board += (chr(128996+af[0][2]))
                  else:
                      board += ("⬜")
          elif i == 3:
              board += (" 👆👆👆 "+str(self.num+2))
          elif i == 4 or i == 5:
              for j in range(1,5):
                  af = [l for l in self.nex[(self.num+1)%20] if l[0] ==i-5 and l[1]==j]
                  if af:
                      board += (chr(128996+af[0][2]))
                  else:
                      board += ("⬜")
          elif i == 7:
              board += (" 👆👆👆 "+str(self.num+3))
          elif i == 8 or i == 9:
              for j in range(1,5):
                  af = [l for l in self.nex[(self.num+2)%20] if l[0] ==i-9 and l[1]==j]
                  if af:
                      board += (chr(128996+af[0][2]))
                  else:
                      board += ("⬜")
          elif i == 12:
              board += (" LEVEL =>"+str(self.level))
          elif i ==14:
              board += (" LINES =>"+str(self.lines))
          elif i == 16:
              board += (" SCORE =>"+str(self.score))
          elif i == 18:
              board += (" -HOLD-")
          elif i == 19 or i == 20:
              for j in range(1,5):
                  af = [l for l in self.hold if l[0] ==i-20 and l[1]==j]
                  if af:
                      board += (chr(128996+af[0][2]))
                  else:
                      board += ("⬜")
          elif i == 21:
              board += (" ------")
          board += ("\n")
      board += ("⏹"*8)
      print(board)

  '''
  引数に指定したブロックを盤面に追加する
  '''
  def setblock(self, block):
      self.moveblock = block
      self.angle = 0
      if self.num%20==13:
          self.nex =[(random.choice(Tetris.blocks) if i < 10 else self.nex[i]) for i in range(20)]
      elif self.num%20==3:
          self.nex =[(random.choice(Tetris.blocks) if i >= 10 else self.nex[i]) for i in range(20)]

  '''
  ブロックが落下しなくなったか判定する
  '''
  def stopfall(self):
      return any(((i[0]+1,i[1]) in self.location or i[0] >=21) for i in self.moveblock[:4])

  '''
  一列そろっている箇所があれば消去する
  '''
  def set_removeline(self):
      candidate = [self.moveblock[i][0] for i in range(4)]
      for j in range(4):
          self.location.add(tuple(self.moveblock[j][:2]))
      L = []
      for i in set(candidate):
          if all((i,j) in self.location for j in range(6)):
              L.append(i)
              self.lines += 1
              if self.lines % 10 == 0:
                  self.level += 1

      if L:
          L.sort()
          for i in L:
            self.location -= {(i,j) for j in range(6)}
          if len(L) == 1:
            li = [l for l in self.location if l[0] < L[0]]
            for m in li:
              self.location.discard(m)
            for m in li:
              self.location.add((m[0]+1,m[1]))
          elif len(L) == 2:
            li_1 = [l for l in self.location if l[0] < L[0]]
            li_2 = [l for l in self.location if l[0] < L[1] and l[0] > L[0]]
            for m in li_2:
              self.location.discard(m)
            for m in li_2:
              self.location.add((m[0]+1,m[1]))
            for m in li_1:
              self.location.discard(m)
            for m in li_1:
              self.location.add((m[0]+2,m[1]))
          elif len(L) == 3:
            li_1 = [l for l in self.location if l[0] < L[0]]
            li_2 = [l for l in self.location if l[0] < L[1] and l[0] > L[0]]
            li_3 = [l for l in self.location if l[0] < L[2] and l[0] > L[1]]
            for m in li_3:
              self.location.discard(m)
            for m in li_3:
              self.location.add((m[0]+1,m[1]))
            for m in li_2:
              self.location.discard(m)
            for m in li_2:
              self.location.add((m[0]+2,m[1]))
            for m in li_1:
              self.location.discard(m)
            for m in li_1:
              self.location.add((m[0]+3,m[1]))
          elif len(L) == 4:
            li = [l for l in self.location if l[0] < L[0]]
            for m in li:
              self.location.discard(m)
            for m in li:
              self.location.add((m[0]+4,m[1]))
          self.score += (len(L))**2*1000

  '''
  ブロックを右に移動させる
  '''
  def right(self):
      for j in self.moveblock:
          j[1] +=1

  '''
  ブロックを左に移動させる
  '''
  def left(self):
      for j in self.moveblock:
          j[1] -=1

  '''
  ブロックを下に動かせるか確かめたのちに下に移動させる
  '''
  def down(self):
      if self.stopfall():
          self.moveblock = [x for x in self.moveblock if len(x) != 2]

          self.set_removeline()
          if self.location:
            self.maxheight = min(i[0] for i in self.location)
          else:
            self.maxheight = 22

          self.board_check()

          self.setblock(copy.deepcopy(self.nex[self.num%20]))
          self.canhold = True
          self.num += 1
      else:
          for j in self.moveblock:
              j[0] +=1

  '''
  ブロックを回転させられるか確認する。
  - direction:右か左か
  '''
  def turn(self, direction):
      col = self.moveblock[0][2]
      angle_before = self.angle
      if direction == "left":
          angle = self.angle-1 if self.angle >0 else 3
      elif direction == "right":
          angle = self.angle+1 if self.angle <3 else 0
      if col > 1:
          e = Tetris.roundblock[col-2][angle]
          if col != 3:
                  m = [[self.moveblock[4][0]+e[0][0],self.moveblock[4][1]+e[0][1]],
                       [self.moveblock[4][0]+e[1][0],self.moveblock[4][1]+e[1][1]],
                       [self.moveblock[4][0]+e[2][0],self.moveblock[4][1]+e[2][1]],
                       [self.moveblock[4][0],self.moveblock[4][1]]]
                  check = any(tuple(el) in self.location for el in m)
                  if not check and max(m[0][1],m[1][1],m[2][1],m[3][1]) <= 5 and min(m[0][1],m[1][1],m[2][1],m[3][1])>=0 and max(m[0][0],m[1][0],m[2][0],m[3][0])<=21:
                      return [[m[0][0],m[0][1],col],[m[1][0],m[1][1],col],[m[2][0],m[2][1],col],[m[3][0],m[3][1],col],self.moveblock[4]]
          elif col ==3:
                  m = [[round(self.moveblock[4][0]+e[0][0]),round(self.moveblock[4][1]+e[0][1])],
                       [round(self.moveblock[4][0]+e[1][0]),round(self.moveblock[4][1]+e[1][1])],
                       [round(self.moveblock[4][0]+e[2][0]),round(self.moveblock[4][1]+e[2][1])],
                       [round(self.moveblock[4][0]+e[3][0]),round(self.moveblock[4][1]+e[3][1])]]
                  check = any(tuple(el) in self.location for el in m)
                  if not check and max(m[0][1],m[1][1],m[2][1],m[3][1])<=5 and min(m[0][1],m[1][1],m[2][1],m[3][1])>=0 and max(m[0][0],m[1][0],m[2][0],m[3][0])<=21:
                      return [[m[0][0],m[0][1],col],[m[1][0],m[1][1],col],[m[2][0],m[2][1],col],[m[3][0],m[3][1],col],self.moveblock[4]]
      return []

  def turnright(self):
      self.moveblock = self.turn_right
      self.angle = self.angle+1 if self.angle <3 else 0

  def turnleft(self):
      self.moveblock = self.turn_left
      self.angle = self.angle-1 if self.angle >0 else 3

  '''
  ブロックをホールドする
  '''
  def holding(self):
      if not self.hold:
          self.hold =Tetris.blocks[self.moveblock[0][2]-1][:]
          self.setblock(copy.deepcopy(self.nex[self.num%20]))
          self.canhold = True
          self.num += 1
      else:
          if self.canhold:
              self.hold , self.moveblock = Tetris.blocks[self.moveblock[0][2]-1] , copy.deepcopy(self.hold)
              self.canhold = False
              self.angle = 0
          else:
              return

  '''
  ゲームオーバーかどうかの判定を行う
  '''
  def gameover(self):
    return self.maxheight < 2

  '''
  盤面の形状を返す
  '''
  def board_check(self):
      result = []
      vacant_count = 0
      for j in range(6):
        _array = [l[0] for l in self.location if l[1] == j]
        try:
          mine0_ = min(_array)
        except:
          mine0_ = 22
        result.append(mine0_ - self.maxheight)
        vacant_count += 22 - mine0_ - len(_array)
      self.shape = min(result[0],4)*100000 + min(result[1],4)*10000 + min(result[2],4)*1000 + min(result[3],4)*100 + min(result[4],4)*10 + min(result[5],4)
      self.shape2 = abs(result[0] - result[1]) + abs(result[1] - result[2]) + abs(result[2] - result[3]) + abs(result[3] - result[4]) + abs(result[4] - result[5])
      self.vacant_count = vacant_count

  '''
  盤面と落下中のブロックの形状を返す
  '''
  def get_state(self):
    height = self.maxheight - int(self.moveblock[4][0])
    moveblock = self.moveblock[0][2] * 1000 +  self.angle * 100 + max(min(height, 4),-2) * 10 + int(self.moveblock[4][1])
    return self.shape, moveblock, self.vacant_count, self.maxheight, self.shape2

  '''
  すべての可能な行動を返す
  '''
  def get_possible_actions(self):
    possible_actions = [0]
    height = self.maxheight - int(self.moveblock[4][0])
    if not height >= 4:
      left = self.moveblock[4][1] <= 3 and not any(((i[0],i[1]-1) in self.location or i[1] <= 0) for i in self.moveblock[:4])
      if (left):
        possible_actions.append(1)

      right = self.moveblock[4][1] >= 2.5 and not any(((i[0],i[1]+1) in self.location or i[1] >= 5) for i in self.moveblock[:4])
      if (right):
        possible_actions.append(2)

      if self.angle in [0,3]:
        self.turn_left = self.turn("left")
        if self.turn_left:
          possible_actions.append(3)

      if self.angle in [0,1]:
        self.turn_right = self.turn("right")
        if self.turn_right:
          possible_actions.append(4)
    else:
      if self.canhold:
        # possible_actions.append(5)
        possible_actions.append(4+ (self.hold[0][2] if self.hold else self.nex[self.num%20][0][2]))
    # random.shuffle(possible_actions) #シャッフルして渡すことで学習初期の学習効率を向上させる
    return possible_actions

  '''
  スコアを取得する
  '''
  def get_score(self):
    return self.score

  '''
  盤面の変更を行う
  '''
  def modify_board(self, action):
    if action == 0:
        self.down()
    elif action == 1:
        self.left()
    elif action == 2:
        self.right()
    elif action == 3:
        self.turnleft()
    elif action == 4:
        self.turnright()
    elif action >= 5:
        self.holding()

    # self.turn_left = self.turn("left")
    # self.turn_right = self.turn("right")
    # self.board_check()


## 2. Agentの定義

In [None]:
class Agent:
    def __init__(self):
        self.frozen = False

    def train(self):
        self.frozen = False

    def eval(self):
        self.frozen = True

    def _observe(self, tetris: Tetris ):
        return tetris.get_state()

    def action(self, tetris: Tetris):
        pass

    def update(self, tetris: Tetris, state, action, reward1, new_state):
        pass

### 2-1. Qエージェントの定義

In [None]:
class QAgent(Agent):
    def __init__(self, lr: float, eps = 0.3):
        super().__init__()
        self.q_table = {}  # Qテーブルの初期化
        self.discount_factor = 0.8
        self.learning_rate = lr
        self.epsilon = eps
        # self.state_action_list = {}

    def action(self, tetris: Tetris) :
        board, block, _, _, _ = self._observe(tetris)
        possible_actions = tetris.get_possible_actions()
        if not self.frozen and random.random() < self.epsilon:
            return random.choice(possible_actions)
        else:
            # count = tetris.count
            best_action, _ = self._get_the_best(board, block, possible_actions)
            # self.state_action_list[(block, best_action)] = self.state_action_list.get((block, best_action), 0) + 1
            return best_action

    def _get_the_best(self, board, block, possible_moves):
        best_move = None
        best_q_value = -float('inf')
        for move in possible_moves:
            q_value = self.q_table.get((board,block,move), 0)
            # if count == 0:
            #     self.state_action_list = {}
            # if move !=  0:
            #     q_value -= (math.factorial(self.state_action_list.get((block, move), 0)+1))**3 - 1
            if q_value > best_q_value:
                best_q_value = q_value
                best_move = move
        return best_move, best_q_value

    #stateとnext_stateはboardとblockを渡す
    def update(self, tetris, state, action, reward, next_state):
        if self.frozen:
            return None

        old_value = self.q_table.get((state[0], state[1], action), 0)
        possible_moves = tetris.get_possible_actions()

        assert next_state is not None
        next_max = max([self.q_table.get((next_state[0], next_state[1], next_action), 0) for next_action in possible_moves])

        new_value = old_value + self.learning_rate * (reward + self.discount_factor * next_max - old_value)
        if new_value != 0: #new_valueが0のときはq_tavleに追加しなくても差し支えなし
          self.q_table[(state[0], state[1], action)] = new_value

    # def _is_periodic_action(self, before_actions, new_action):
    #   if len(before_actions) < 50:
    #       return False
    #   for period in range(2, 25):
    #       pattern = before_actions[-period:]
    #       if before_actions[-2*period:-period] == pattern:
    #           if new_action == pattern[0]:
    #               return True
    #   return False

### 2-2.DQNエージェントの定義

In [None]:
import math
from torch import nn, optim
from torch.nn import functional as F


class QNetwork(nn.Module):
    def __init__(self, state_size, action_size):
        super().__init__()
        self.state_size = state_size
        self.intermidiate_size = state_size * 8 // 3
        self.action_size = action_size

        self.a = nn.Parameter(torch.tensor(0.0), requires_grad=True)
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=4)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3)
        self.gap = nn.AdaptiveAvgPool2d([1, 1])
        self.in_proj = nn.Linear(64, action_size)
        self.o_proj = nn.Sequential(
            nn.Linear(action_size, self.intermidiate_size),
            nn.GELU(),
            nn.Linear(self.intermidiate_size, action_size),
        )

    @property
    def device(self):
        return self.parameters().__next__().device

    @property
    def dtype(self):
        return self.parameters().__next__().dtype

    def forward(self, state):
        """ f: state -> Q(state, action) """
        x = state.view(1, self.board_size, self.board_size)
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = self.gap(x)
        x = self.in_proj(x)
        x = x + self.a * self.o_proj(x)
        return x


class DQNAgent(QAgent):
    def __init__(self, lr: float):
        super().__init__(lr)
        self.state_size = 6 * 20
        self.action_size = 6
        self.q_network = QNetwork(self.state_size, self.action_size)
        self.q_network.to("cuda")
        self.optimizer = optim.SGD(self.q_network.parameters(), lr=self.learning_rate, momentum=0.9)
        self.n_update = 0

    def train(self):
        super().train()
        self.q_network.train()

    def eval(self):
        super().eval()
        self.q_network.eval()

    def _get_qvalues(self, state: torch.Tensor) -> torch.Tensor:
        state = state.to(self.q_network.dtype).to(self.q_network.device)
        state = state.view(-1)  # Just flatten it (shape: [8, 8] -> [64])
        qvalues = self.q_network(state).view(self.board_size, self.board_size)  # Restore the shape: [64] -> [8, 8]
        return qvalues

    # def _get_the_best(self, board, possible_moves):
    #     qvalues = self._get_qvalues(board)
    #     best_move = None
    #     best_q_value = -float('inf')
    #     for mv_x, mv_y in possible_moves:
    #         # q_value = self.q_table.get((board, move), 0)
    #         q_value = qvalues[mv_x, mv_y]
    #         if q_value > best_q_value:
    #             best_q_value = q_value
    #             best_move = (mv_x, mv_y)
    #     return best_move, best_q_value

    def _get_the_best(self, board, block, possible_moves, count):
        qvalues = self._get_qvalues(board)
        best_move = None
        best_q_value = -float('inf')
        for move in possible_moves:
            q_value = self.q_table.get((board,block,move), 0)
            if move == 0:
                q_value += (count / 35) ** 2
            if q_value > best_q_value:
                best_q_value = q_value
                best_move = move
        return best_move, best_q_value

    def update(self, tetris, state, action, reward, next_state):
        if self.frozen:
            return None

        with torch.no_grad():
            board, possible_moves = self._observe(tetris)
            _, best_value = self._get_the_best(board, possible_moves)
            next_max = max(0, best_value)
            target_q = torch.tensor(reward + self.discount_factor * next_max)
            target_q = target_q.to(self.q_network.device).to(self.q_network.dtype)

        if self.n_update % 32 == 0:  # use grad accumulation
            old_qvalue = self._get_qvalues(state)[action]
            loss = nn.functional.huber_loss(old_qvalue, target_q)
            # print(f"{loss.item():.3f}")
            loss.backward()
            self.optimizer.step()
            for param in self.q_network.parameters():  # fast zero_grad
                param.grad = None

### 2-3. 人間エージェントの定義

In [None]:
class HumanAgent(Agent):
  def action(self, tetris: Tetris):
        valid_moves = tetris.get_possible_actions()
        while True:
            user_input = input("Enter your move: {}".format(valid_moves))
            if user_input in valid_moves:
                return user_input
            print("Invalid move. Valid moves are: {}".format(valid_moves))
            return int(user_input)

## 3. Env.

In [None]:
import tqdm
import plotly.graph_objects as go
import statistics

class Env:
    def __init__(self, agent: Agent, tetris: Tetris) -> None:
        self.agent = agent
        self.tetris = tetris

    def _get_reward(self) -> int:
        score = self.tetris.get_score()
        return score

    def train(self, episodes: int, visualize=False) :  # Nエピソード実行
        record = []
        for i in tqdm.tqdm(range(episodes)):
            score = self.execute(train=True, visualize=visualize)
            record.append(self._get_reward())
            if (i+1) % int(episodes / 10) == 0:
              self.tetris.printboard(display_force = True)
              print("Episode: from {} to {}".format(i+1-int(episodes/10), i))
              print("Average Score: {:>7}".format(int(sum(record[-int(episodes/10):])/int(episodes/10))))
              print("Max Score: {:>11}".format(max(record[-int(episodes/10):])))
              print("Median Score:  {:>7}".format(statistics.median_low(record[-int(episodes/10):])))
        print("Report:")
        return record

    def execute(self, train=False, visualize=True):  # 1 episode
        self.tetris.reset_game()
        # self.state_list = []
        while not self.tetris.gameover():
            state = self.tetris.get_state()
            action = self.agent.action(self.tetris)
            before_point = self.tetris.get_score()
            self.tetris.modify_board(action)
            after_point = self.tetris.get_score()
            reward = math.sqrt((after_point - before_point)*10)*10
            # if self.tetris.gameover():
            #     reward = -100000
            if train:
                next_state = self.tetris.get_state()
                reward -= max(min((next_state[2] - state[2]) * 400, 2400), 0)
                reward += min((next_state[3] - state[3]) * 350, 0)
                reward -= (next_state[4] - state[4]) * 100

                if action is not None:
                    self.agent.update(self.tetris, state, action, reward, next_state)

        # game end
        score = self.tetris.get_score()

        return score

## 4. google driveからQテーブルを読み込む

In [None]:
# Google Driveをマウントする
from google.colab import drive
drive.mount('/content/drive')

# モジュールが保存されているディレクトリをシステムパスに追加する
import sys
dir_path = '/content/drive/My Drive/Q_TETRIS_8/' # @param {type:"string"}
sys.path.append(dir_path)

import importlib
file_name = "q_table_module_b" # @param {type:"string"}

q_table = {}

for i in tqdm.tqdm(range(8)):
  file_name_ = f'{file_name}_{i+1}'
  module = importlib.import_module(file_name_)
  q = module.q_table
  del module
  q_table.update(q)

q_agent = QAgent(lr=0.1, eps=0.1)
q_agent.q_table = q_table

print("正常に読み込みが完了しました!")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


100%|██████████| 8/8 [00:27<00:00,  3.43s/it]

正常に読み込みが完了しました!





## 5. 学習が完了したQエージェントにプレイさせてみる
モデルが実際にどのようなプレイをするのか確かめる。

In [None]:
q_agent.eval()
env = Env(q_agent, Tetris(True, interval = 1/2))
env.execute()
q_agent.train()


⏹⏹⏹⏹⏹⏹⏹⏹     NEXT 2
⏹⬜🟧🟧🟧🟧⬜⏹  ⬜🟪🟪⬜
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜⬜🟪🟪
⏹⬜⬜⬜⬜⬜⬜⏹  
⏹⬜⬜⬜⬜⬜⬜⏹   👆👆👆 3
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜⬜🟫🟫
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜🟫🟫⬜
⏹⬜⬜⬜⬜⬜⬜⏹  
⏹⬜⬜⬜⬜⬜⬜⏹   👆👆👆 4
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜⬜⬜🟩
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜🟩🟩🟩
⏹⬜⬜⬜⬜⬜⬜⏹  
⏹⬜⬜⬜⬜⬜⬜⏹  
⏹⬜⬜⬜⬜⬜⬜⏹   LEVEL =>0
⏹⬜⬜⬜⬜⬜⬜⏹  
⏹⬜⬜⬜⬜⬜⬜⏹   LINES =>0
⏹⬜⬜⬜⬜⬜⬜⏹  
⏹⬜⬜⬜⬜⬜⬜⏹   SCORE =>0
⏹⬜⬜⬜⬜⬜⬜⏹  
⏹⬜⬜⬜⬜⬜⬜⏹   -HOLD-
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜⬜⬜⬜
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜⬜⬜⬜
⏹⬜⬜⬜⬜⬜⬜⏹   ------
⏹⏹⏹⏹⏹⏹⏹⏹

⏹⏹⏹⏹⏹⏹⏹⏹     NEXT 2
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜🟪🟪⬜
⏹⬜🟧🟧🟧🟧⬜⏹  ⬜⬜🟪🟪
⏹⬜⬜⬜⬜⬜⬜⏹  
⏹⬜⬜⬜⬜⬜⬜⏹   👆👆👆 3
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜⬜🟫🟫
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜🟫🟫⬜
⏹⬜⬜⬜⬜⬜⬜⏹  
⏹⬜⬜⬜⬜⬜⬜⏹   👆👆👆 4
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜⬜⬜🟩
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜🟩🟩🟩
⏹⬜⬜⬜⬜⬜⬜⏹  
⏹⬜⬜⬜⬜⬜⬜⏹  
⏹⬜⬜⬜⬜⬜⬜⏹   LEVEL =>0
⏹⬜⬜⬜⬜⬜⬜⏹  
⏹⬜⬜⬜⬜⬜⬜⏹   LINES =>0
⏹⬜⬜⬜⬜⬜⬜⏹  
⏹⬜⬜⬜⬜⬜⬜⏹   SCORE =>0
⏹⬜⬜⬜⬜⬜⬜⏹  
⏹⬜⬜⬜⬜⬜⬜⏹   -HOLD-
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜⬜⬜⬜
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜⬜⬜⬜
⏹⬜⬜⬜⬜⬜⬜⏹   ------
⏹⏹⏹⏹⏹⏹⏹⏹

⏹⏹⏹⏹⏹⏹⏹⏹     NEXT 2
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜🟪🟪⬜
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜⬜🟪🟪
⏹⬜🟧🟧🟧🟧⬜⏹  
⏹⬜⬜⬜⬜⬜⬜⏹   👆👆👆 3
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜⬜🟫🟫
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜🟫🟫⬜
⏹⬜⬜⬜⬜⬜⬜⏹  
⏹⬜⬜⬜⬜⬜⬜⏹   👆👆👆 4
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜⬜⬜🟩
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜🟩🟩🟩
⏹⬜⬜⬜⬜⬜⬜⏹  
⏹⬜⬜⬜⬜⬜⬜⏹  
⏹⬜⬜⬜⬜⬜⬜⏹   LEVEL =>0
⏹⬜⬜⬜⬜⬜⬜⏹  
⏹⬜⬜⬜⬜⬜⬜⏹   LINES =>0
⏹⬜⬜⬜⬜⬜⬜⏹  
⏹⬜⬜⬜⬜⬜⬜⏹   SCORE =>0
⏹⬜⬜⬜⬜⬜

KeyboardInterrupt: 

## 6. 追加で学習させる
さらに学習させたい場合はここを実行  
epsilon, lr を適宜変更するとよい

In [None]:
epsilon = 0.05 # @param {type:"number"}
lr = 0.1 # @param {type:"number"}
train_num = 20000 # @param {type:"number"}
repetition_num = 100 # @param {type:"number"}

from google.colab import drive
import os

drive.mount('/content/drive')
save_path = '/content/drive/My Drive/Q_TETRIS_2' # @param {type:"string"}
os.makedirs(save_path, exist_ok=True)
os.chdir(save_path)

q_agent.train()
# tetris = Tetris(False)
tetris = Testris()
q_agent.epsilon = epsilon
q_agent.learning_rate = lr
for _ in range(repetition_num):
  print(f"{_+1}/{repetition_num}回目の学習を開始します")
  env = Env(q_agent, tetris)
  record = env.train(train_num)
  fig = go.Figure(data=go.Scatter(y=record))
  fig.show()

  keys = list(q_agent.q_table.keys())
  split_index = len(keys) // 8 + 1
  file_name = "q_table_module_b" # @param {type:"string"}
  for i in range(8):
    file_path = f'{save_path}/{file_name}_{i+1}.py'
    dict_ = {key: q_agent.q_table[key] for key in keys[split_index*i:min(split_index*(i+1),len(keys)-1)]}
    with open(file_path, 'w') as f:
        f.write(f'q_table = {dict_}'.replace(" ", ""))
    print(f"q_tableを'{file_path}'に保存しました。")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
1/100回目の学習を開始します


 10%|█         | 2001/20000 [03:29<22:33, 13.30it/s]


⏹⏹⏹⏹⏹⏹⏹⏹     NEXT 84
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜🟥🟥⬜
⏹⬜⬛⬜⬜⬜⬜⏹  ⬜🟥🟥⬜
⏹⬛⬛⬜⬜⬜⬜⏹  
⏹⬛⬛⬛⬛⬛⬜⏹   👆👆👆 85
⏹⬛⬛⬛⬛⬜⬜⏹  🟧🟧🟧🟧
⏹⬛⬛⬛⬜⬜⬛⏹  ⬜⬜⬜⬜
⏹⬛⬛⬛⬜⬜⬛⏹  
⏹⬛⬛⬛⬛⬜⬛⏹   👆👆👆 86
⏹⬛⬜⬜⬛⬛⬛⏹  ⬜⬜⬜🟩
⏹⬛⬛⬛⬜⬛⬛⏹  ⬜🟩🟩🟩
⏹⬛⬜⬛⬜⬛⬛⏹  
⏹⬛⬛⬛⬛⬛⬜⏹  
⏹⬛⬛⬜⬛⬛⬛⏹   LEVEL =>3
⏹⬜⬛⬛⬛⬛⬛⏹  
⏹⬛⬛⬛⬛⬜⬛⏹   LINES =>39
⏹⬛⬛⬜⬛⬜⬛⏹  
⏹⬜⬛⬛⬛⬛⬛⏹   SCORE =>47000
⏹⬛⬛⬜⬛⬛⬛⏹  
⏹⬛⬛⬜⬛⬛⬛⏹   -HOLD-
⏹⬜⬛⬜⬜⬛⬛⏹  🟧🟧🟧🟧
⏹⬜⬛⬛⬛⬛⬛⏹  ⬜⬜⬜⬜
⏹⬜⬛⬛⬛⬛⬛⏹   ------
⏹⏹⏹⏹⏹⏹⏹⏹
Episode: from 0 to 1999
Average Score:  156501
Max Score:      673000
Median Score:   129000


 20%|██        | 4002/20000 [06:53<20:29, 13.01it/s]


⏹⏹⏹⏹⏹⏹⏹⏹     NEXT 150
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜⬜⬜🟩
⏹⬜⬜⬜⬜⬛⬛⏹  ⬜🟩🟩🟩
⏹⬜⬜⬜⬛⬛⬜⏹  
⏹⬜⬛⬛⬛⬛⬛⏹   👆👆👆 151
⏹⬜⬜⬛⬛⬛⬛⏹  ⬜⬜🟦⬜
⏹⬛⬛⬛⬛⬜⬛⏹  ⬜🟦🟦🟦
⏹⬛⬛⬛⬜⬛⬛⏹  
⏹⬛⬛⬛⬛⬜⬛⏹   👆👆👆 152
⏹⬛⬛⬛⬜⬛⬛⏹  ⬜⬜🟦⬜
⏹⬜⬛⬛⬛⬛⬛⏹  ⬜🟦🟦🟦
⏹⬛⬛⬛⬜⬛⬛⏹  
⏹⬛⬛⬛⬜⬛⬛⏹  
⏹⬛⬛⬛⬜⬛⬛⏹   LEVEL =>8
⏹⬛⬛⬛⬜⬛⬜⏹  
⏹⬛⬛⬛⬜⬛⬜⏹   LINES =>82
⏹⬜⬛⬛⬛⬛⬛⏹  
⏹⬛⬛⬛⬛⬜⬛⏹   SCORE =>108000
⏹⬛⬜⬛⬛⬛⬛⏹  
⏹⬛⬛⬜⬛⬛⬛⏹   -HOLD-
⏹⬜⬛⬛⬛⬛⬛⏹  ⬜🟥🟥⬜
⏹⬛⬛⬜⬛⬛⬛⏹  ⬜🟥🟥⬜
⏹⬜⬛⬛⬛⬛⬛⏹   ------
⏹⏹⏹⏹⏹⏹⏹⏹
Episode: from 2000 to 3999
Average Score:  155787
Max Score:      890000
Median Score:   128000


 30%|███       | 6002/20000 [10:18<35:38,  6.54it/s]


⏹⏹⏹⏹⏹⏹⏹⏹     NEXT 355
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜⬜🟦⬜
⏹⬛⬛⬜⬜⬜⬜⏹  ⬜🟦🟦🟦
⏹⬛⬛⬜⬛⬜⬜⏹  
⏹⬛⬛⬜⬛⬜⬜⏹   👆👆👆 356
⏹⬜⬛⬛⬛⬛⬜⏹  ⬜⬜⬜🟩
⏹⬛⬛⬛⬛⬛⬜⏹  ⬜🟩🟩🟩
⏹⬛⬛⬛⬛⬛⬜⏹  
⏹⬜⬛⬛⬛⬛⬛⏹   👆👆👆 357
⏹⬜⬛⬛⬛⬛⬛⏹  ⬜⬜🟦⬜
⏹⬜⬛⬛⬛⬛⬛⏹  ⬜🟦🟦🟦
⏹⬛⬛⬛⬛⬛⬜⏹  
⏹⬛⬛⬛⬛⬛⬜⏹  
⏹⬛⬛⬛⬜⬛⬛⏹   LEVEL =>21
⏹⬜⬛⬛⬛⬛⬛⏹  
⏹⬜⬛⬛⬜⬛⬜⏹   LINES =>219
⏹⬛⬛⬛⬛⬛⬜⏹  
⏹⬛⬛⬛⬛⬛⬜⏹   SCORE =>297000
⏹⬛⬛⬛⬜⬛⬛⏹  
⏹⬜⬛⬛⬛⬛⬛⏹   -HOLD-
⏹⬛⬜⬛⬜⬛⬛⏹  ⬜⬜🟦⬜
⏹⬛⬛⬜⬛⬛⬛⏹  ⬜🟦🟦🟦
⏹⬛⬛⬛⬛⬛⬜⏹   ------
⏹⏹⏹⏹⏹⏹⏹⏹
Episode: from 4000 to 5999
Average Score:  149760
Max Score:      754000
Median Score:   127000


 40%|████      | 8003/20000 [13:38<16:56, 11.81it/s]


⏹⏹⏹⏹⏹⏹⏹⏹     NEXT 319
⏹⬜⬜⬜⬜⬜⬛⏹  ⬜⬜🟫🟫
⏹⬜⬜⬜⬜⬜⬛⏹  ⬜🟫🟫⬜
⏹⬜⬛⬜⬜⬜⬛⏹  
⏹⬛⬛⬛⬜⬜⬛⏹   👆👆👆 320
⏹⬜⬛⬛⬛⬛⬛⏹  ⬜🟨⬜⬜
⏹⬛⬜⬛⬛⬛⬛⏹  ⬜🟨🟨🟨
⏹⬛⬛⬛⬛⬛⬜⏹  
⏹⬛⬛⬜⬛⬛⬛⏹   👆👆👆 321
⏹⬛⬛⬛⬜⬛⬛⏹  ⬜🟪🟪⬜
⏹⬛⬛⬛⬜⬛⬛⏹  ⬜⬜🟪🟪
⏹⬛⬛⬛⬜⬛⬛⏹  
⏹⬜⬜⬛⬜⬛⬜⏹  
⏹⬛⬛⬛⬜⬛⬛⏹   LEVEL =>19
⏹⬛⬛⬜⬜⬛⬛⏹  
⏹⬛⬛⬛⬛⬛⬜⏹   LINES =>195
⏹⬛⬜⬛⬛⬛⬛⏹  
⏹⬛⬛⬛⬛⬜⬛⏹   SCORE =>235000
⏹⬜⬛⬛⬛⬛⬛⏹  
⏹⬛⬜⬛⬛⬛⬛⏹   -HOLD-
⏹⬛⬛⬜⬛⬛⬛⏹  🟧🟧🟧🟧
⏹⬛⬜⬛⬛⬛⬛⏹  ⬜⬜⬜⬜
⏹⬛⬛⬛⬜⬛⬛⏹   ------
⏹⏹⏹⏹⏹⏹⏹⏹
Episode: from 6000 to 7999
Average Score:  150250
Max Score:      911000
Median Score:   126000


 50%|████▉     | 9999/20000 [16:56<14:19, 11.63it/s]


⏹⏹⏹⏹⏹⏹⏹⏹     NEXT 80
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜⬜🟦⬜
⏹⬛⬛⬜⬜⬜⬜⏹  ⬜🟦🟦🟦
⏹⬛⬜⬜⬜⬜⬜⏹  
⏹⬛⬛⬛⬛⬛⬜⏹   👆👆👆 81
⏹⬛⬛⬜⬜⬛⬜⏹  ⬜🟥🟥⬜
⏹⬜⬛⬛⬜⬛⬜⏹  ⬜🟥🟥⬜
⏹⬛⬜⬛⬜⬛⬜⏹  
⏹⬛⬛⬛⬛⬛⬜⏹   👆👆👆 82
⏹⬜⬛⬛⬛⬛⬛⏹  ⬜⬜⬜🟩
⏹⬜⬛⬛⬛⬜⬛⏹  ⬜🟩🟩🟩
⏹⬜⬛⬛⬛⬛⬛⏹  
⏹⬛⬛⬛⬛⬜⬛⏹  
⏹⬛⬛⬛⬛⬜⬜⏹   LEVEL =>3
⏹⬛⬛⬛⬛⬛⬜⏹  
⏹⬛⬛⬜⬛⬛⬜⏹   LINES =>37
⏹⬛⬛⬜⬜⬛⬜⏹  
⏹⬛⬛⬛⬛⬛⬜⏹   SCORE =>45000
⏹⬛⬛⬜⬛⬛⬜⏹  
⏹⬛⬛⬛⬛⬛⬜⏹   -HOLD-
⏹⬛⬛⬛⬛⬛⬜⏹  ⬜🟨⬜⬜
⏹⬛⬛⬛⬛⬛⬜⏹  ⬜🟨🟨🟨
⏹⬜⬛⬛⬛⬛⬛⏹   ------
⏹⏹⏹⏹⏹⏹⏹⏹
Episode: from 8000 to 9999
Average Score:  149884
Max Score:      762000
Median Score:   126000


 60%|██████    | 12000/20000 [20:22<12:00, 11.10it/s]


⏹⏹⏹⏹⏹⏹⏹⏹     NEXT 192
⏹⬜⬜⬜⬜⬛⬛⏹  ⬜🟨⬜⬜
⏹⬜⬜⬜⬜⬛⬜⏹  ⬜🟨🟨🟨
⏹⬜⬛⬛⬜⬛⬜⏹  
⏹⬛⬛⬛⬛⬛⬜⏹   👆👆👆 193
⏹⬜⬛⬛⬛⬛⬛⏹  ⬜🟨⬜⬜
⏹⬛⬛⬛⬛⬜⬛⏹  ⬜🟨🟨🟨
⏹⬜⬛⬛⬛⬛⬛⏹  
⏹⬛⬛⬜⬜⬜⬜⏹   👆👆👆 194
⏹⬛⬛⬛⬜⬜⬜⏹  ⬜🟪🟪⬜
⏹⬛⬛⬜⬛⬛⬜⏹  ⬜⬜🟪🟪
⏹⬜⬛⬛⬛⬛⬛⏹  
⏹⬜⬛⬛⬛⬛⬛⏹  
⏹⬜⬛⬜⬛⬛⬜⏹   LEVEL =>11
⏹⬜⬛⬛⬛⬛⬛⏹  
⏹⬜⬛⬛⬛⬛⬛⏹   LINES =>112
⏹⬜⬜⬜⬛⬛⬛⏹  
⏹⬜⬜⬜⬜⬛⬜⏹   SCORE =>144000
⏹⬜⬛⬛⬜⬛⬛⏹  
⏹⬛⬛⬜⬜⬛⬛⏹   -HOLD-
⏹⬛⬛⬛⬜⬛⬜⏹  ⬜🟥🟥⬜
⏹⬛⬛⬛⬛⬛⬜⏹  ⬜🟥🟥⬜
⏹⬛⬛⬛⬛⬛⬜⏹   ------
⏹⏹⏹⏹⏹⏹⏹⏹
Episode: from 10000 to 11999
Average Score:  154050
Max Score:     1281000
Median Score:   127000


 70%|███████   | 14002/20000 [23:40<08:32, 11.70it/s]


⏹⏹⏹⏹⏹⏹⏹⏹     NEXT 59
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜⬜🟫🟫
⏹⬜⬜⬜⬜⬜⬛⏹  ⬜🟫🟫⬜
⏹⬜⬛⬛⬛⬛⬛⏹  
⏹⬜⬛⬛⬛⬛⬜⏹   👆👆👆 60
⏹⬜⬛⬛⬛⬛⬛⏹  ⬜⬜⬜🟩
⏹⬜⬛⬛⬛⬛⬛⏹  ⬜🟩🟩🟩
⏹⬛⬛⬛⬜⬛⬛⏹  
⏹⬜⬛⬛⬛⬛⬛⏹   👆👆👆 61
⏹⬛⬛⬜⬛⬛⬛⏹  ⬜🟨⬜⬜
⏹⬜⬛⬛⬛⬛⬛⏹  ⬜🟨🟨🟨
⏹⬛⬛⬜⬛⬛⬛⏹  
⏹⬛⬛⬛⬛⬛⬜⏹  
⏹⬛⬛⬛⬛⬛⬜⏹   LEVEL =>2
⏹⬛⬛⬛⬛⬛⬜⏹  
⏹⬛⬛⬛⬛⬛⬜⏹   LINES =>21
⏹⬛⬛⬛⬛⬛⬜⏹  
⏹⬛⬜⬛⬛⬛⬜⏹   SCORE =>23000
⏹⬛⬜⬛⬛⬛⬜⏹  
⏹⬛⬛⬛⬜⬛⬛⏹   -HOLD-
⏹⬛⬛⬛⬜⬛⬛⏹  ⬜⬜⬜🟩
⏹⬛⬛⬛⬛⬜⬛⏹  ⬜🟩🟩🟩
⏹⬛⬜⬛⬛⬛⬛⏹   ------
⏹⏹⏹⏹⏹⏹⏹⏹
Episode: from 12000 to 13999
Average Score:  150211
Max Score:     1015000
Median Score:   123000


 80%|████████  | 16003/20000 [26:56<05:53, 11.31it/s]


⏹⏹⏹⏹⏹⏹⏹⏹     NEXT 439
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜⬜🟫🟫
⏹⬜⬜⬜⬛⬛⬜⏹  ⬜🟫🟫⬜
⏹⬜⬜⬜⬛⬛⬛⏹  
⏹⬜⬛⬛⬛⬛⬜⏹   👆👆👆 440
⏹⬜⬜⬛⬛⬛⬛⏹  ⬜🟪🟪⬜
⏹⬜⬛⬛⬛⬛⬛⏹  ⬜⬜🟪🟪
⏹⬛⬛⬛⬛⬜⬛⏹  
⏹⬜⬛⬛⬛⬛⬛⏹   👆👆👆 441
⏹⬜⬛⬛⬛⬛⬛⏹  ⬜⬜🟦⬜
⏹⬜⬛⬛⬛⬜⬛⏹  ⬜🟦🟦🟦
⏹⬛⬛⬛⬛⬜⬛⏹  
⏹⬜⬛⬜⬜⬛⬜⏹  
⏹⬜⬛⬛⬛⬛⬜⏹   LEVEL =>27
⏹⬜⬛⬛⬛⬛⬜⏹  
⏹⬜⬛⬛⬛⬛⬜⏹   LINES =>276
⏹⬛⬛⬜⬛⬛⬜⏹  
⏹⬜⬛⬛⬛⬛⬛⏹   SCORE =>408000
⏹⬜⬛⬛⬛⬜⬛⏹  
⏹⬛⬜⬛⬛⬛⬜⏹   -HOLD-
⏹⬛⬛⬛⬛⬜⬛⏹  ⬜⬜🟫🟫
⏹⬛⬛⬜⬛⬛⬛⏹  ⬜🟫🟫⬜
⏹⬛⬛⬛⬛⬛⬜⏹   ------
⏹⏹⏹⏹⏹⏹⏹⏹
Episode: from 14000 to 15999
Average Score:  152256
Max Score:      699000
Median Score:   125000


 90%|█████████ | 18001/20000 [30:17<02:50, 11.71it/s]


⏹⏹⏹⏹⏹⏹⏹⏹     NEXT 242
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜⬜🟫🟫
⏹⬜⬜⬜⬜⬛⬛⏹  ⬜🟫🟫⬜
⏹⬜⬜⬜⬛⬛⬛⏹  
⏹⬜⬛⬛⬛⬛⬛⏹   👆👆👆 243
⏹⬛⬛⬜⬛⬛⬛⏹  ⬜🟪🟪⬜
⏹⬛⬜⬛⬛⬛⬜⏹  ⬜⬜🟪🟪
⏹⬛⬛⬛⬛⬛⬜⏹  
⏹⬜⬛⬛⬛⬛⬜⏹   👆👆👆 244
⏹⬛⬛⬛⬛⬛⬜⏹  ⬜⬜⬜🟩
⏹⬛⬛⬛⬛⬛⬜⏹  ⬜🟩🟩🟩
⏹⬛⬛⬛⬛⬜⬛⏹  
⏹⬛⬛⬛⬛⬜⬛⏹  
⏹⬛⬜⬛⬛⬛⬛⏹   LEVEL =>14
⏹⬜⬛⬛⬛⬛⬛⏹  
⏹⬛⬛⬜⬛⬛⬜⏹   LINES =>144
⏹⬜⬛⬜⬛⬛⬜⏹  
⏹⬛⬛⬛⬛⬛⬜⏹   SCORE =>168000
⏹⬜⬛⬛⬛⬛⬜⏹  
⏹⬜⬛⬛⬜⬛⬜⏹   -HOLD-
⏹⬛⬛⬛⬛⬛⬜⏹  ⬜🟨⬜⬜
⏹⬛⬛⬛⬜⬛⬛⏹  ⬜🟨🟨🟨
⏹⬛⬛⬛⬛⬜⬛⏹   ------
⏹⏹⏹⏹⏹⏹⏹⏹
Episode: from 16000 to 17999
Average Score:  154912
Max Score:     1104000
Median Score:   129000


100%|██████████| 20000/20000 [33:34<00:00,  9.93it/s]



⏹⏹⏹⏹⏹⏹⏹⏹     NEXT 104
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜⬜🟦⬜
⏹⬜⬜⬜⬜⬜⬛⏹  ⬜🟦🟦🟦
⏹⬛⬛⬜⬜⬛⬛⏹  
⏹⬜⬛⬛⬛⬛⬛⏹   👆👆👆 105
⏹⬛⬛⬛⬛⬛⬜⏹  ⬜⬜⬜🟩
⏹⬛⬛⬛⬛⬛⬜⏹  ⬜🟩🟩🟩
⏹⬛⬛⬛⬜⬛⬛⏹  
⏹⬜⬛⬜⬜⬛⬜⏹   👆👆👆 106
⏹⬜⬛⬛⬛⬛⬛⏹  🟧🟧🟧🟧
⏹⬛⬛⬛⬜⬛⬛⏹  ⬜⬜⬜⬜
⏹⬛⬛⬛⬜⬛⬛⏹  
⏹⬛⬛⬛⬜⬛⬜⏹  
⏹⬛⬛⬛⬜⬛⬛⏹   LEVEL =>5
⏹⬜⬛⬛⬜⬜⬛⏹  
⏹⬜⬛⬛⬛⬛⬛⏹   LINES =>52
⏹⬛⬛⬛⬛⬜⬛⏹  
⏹⬛⬛⬜⬛⬛⬛⏹   SCORE =>64000
⏹⬛⬛⬜⬛⬛⬛⏹  
⏹⬜⬛⬜⬛⬜⬛⏹   -HOLD-
⏹⬛⬛⬛⬛⬛⬜⏹  ⬜🟪🟪⬜
⏹⬛⬛⬛⬛⬜⬛⏹  ⬜⬜🟪🟪
⏹⬜⬛⬛⬛⬛⬛⏹   ------
⏹⏹⏹⏹⏹⏹⏹⏹
Episode: from 18000 to 19999
Average Score:  152801
Max Score:      980000
Median Score:   125000
Report:


q_tableを'/content/drive/My Drive/Q_TETRIS_2/q_table_module_b_1.py'に保存しました。
q_tableを'/content/drive/My Drive/Q_TETRIS_2/q_table_module_b_2.py'に保存しました。
q_tableを'/content/drive/My Drive/Q_TETRIS_2/q_table_module_b_3.py'に保存しました。
q_tableを'/content/drive/My Drive/Q_TETRIS_2/q_table_module_b_4.py'に保存しました。
q_tableを'/content/drive/My Drive/Q_TETRIS_2/q_table_module_b_5.py'に保存しました。
q_tableを'/content/drive/My Drive/Q_TETRIS_2/q_table_module_b_6.py'に保存しました。
q_tableを'/content/drive/My Drive/Q_TETRIS_2/q_table_module_b_7.py'に保存しました。
q_tableを'/content/drive/My Drive/Q_TETRIS_2/q_table_module_b_8.py'に保存しました。
2/100回目の学習を開始します


 10%|█         | 2001/20000 [03:25<29:59, 10.00it/s]


⏹⏹⏹⏹⏹⏹⏹⏹     NEXT 153
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜⬜🟫🟫
⏹⬜⬜⬛⬜⬜⬜⏹  ⬜🟫🟫⬜
⏹⬜⬜⬛⬜⬜⬜⏹  
⏹⬜⬜⬛⬛⬛⬛⏹   👆👆👆 154
⏹⬛⬛⬛⬛⬜⬜⏹  ⬜🟥🟥⬜
⏹⬛⬜⬛⬛⬜⬜⏹  ⬜🟥🟥⬜
⏹⬛⬛⬛⬛⬛⬜⏹  
⏹⬛⬛⬛⬛⬛⬜⏹   👆👆👆 155
⏹⬛⬛⬛⬛⬜⬛⏹  🟧🟧🟧🟧
⏹⬜⬛⬛⬛⬜⬛⏹  ⬜⬜⬜⬜
⏹⬜⬛⬛⬛⬛⬛⏹  
⏹⬛⬜⬛⬛⬛⬛⏹  
⏹⬛⬜⬛⬛⬛⬛⏹   LEVEL =>8
⏹⬛⬛⬛⬛⬛⬜⏹  
⏹⬛⬛⬜⬛⬛⬛⏹   LINES =>85
⏹⬛⬛⬜⬜⬛⬜⏹  
⏹⬛⬛⬛⬛⬛⬜⏹   SCORE =>111000
⏹⬛⬛⬜⬛⬛⬛⏹  
⏹⬛⬛⬛⬜⬛⬛⏹   -HOLD-
⏹⬛⬛⬛⬛⬛⬜⏹  ⬜⬜🟫🟫
⏹⬛⬛⬛⬛⬛⬜⏹  ⬜🟫🟫⬜
⏹⬛⬛⬛⬛⬛⬜⏹   ------
⏹⏹⏹⏹⏹⏹⏹⏹
Episode: from 0 to 1999
Average Score:  155810
Max Score:      766000
Median Score:   128000


 20%|██        | 4001/20000 [06:41<21:46, 12.24it/s]


⏹⏹⏹⏹⏹⏹⏹⏹     NEXT 144
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜⬜🟦⬜
⏹⬜⬜⬜⬜⬜⬛⏹  ⬜🟦🟦🟦
⏹⬜⬜⬜⬜⬜⬛⏹  
⏹⬜⬜⬛⬜⬜⬛⏹   👆👆👆 145
⏹⬛⬛⬛⬜⬛⬛⏹  ⬜⬜🟦⬜
⏹⬛⬜⬛⬛⬛⬛⏹  ⬜🟦🟦🟦
⏹⬛⬛⬛⬛⬛⬜⏹  
⏹⬜⬛⬛⬜⬛⬜⏹   👆👆👆 146
⏹⬛⬛⬛⬛⬛⬜⏹  ⬜🟥🟥⬜
⏹⬛⬛⬛⬛⬛⬜⏹  ⬜🟥🟥⬜
⏹⬛⬜⬛⬛⬛⬛⏹  
⏹⬛⬛⬛⬜⬛⬛⏹  
⏹⬛⬛⬛⬛⬜⬛⏹   LEVEL =>7
⏹⬛⬛⬛⬜⬛⬛⏹  
⏹⬜⬛⬛⬛⬛⬛⏹   LINES =>79
⏹⬜⬛⬛⬛⬜⬛⏹  
⏹⬛⬛⬛⬜⬛⬛⏹   SCORE =>91000
⏹⬛⬛⬜⬛⬜⬛⏹  
⏹⬛⬛⬜⬛⬛⬛⏹   -HOLD-
⏹⬛⬛⬜⬛⬛⬛⏹  ⬜⬜🟫🟫
⏹⬜⬛⬛⬛⬛⬛⏹  ⬜🟫🟫⬜
⏹⬛⬛⬛⬛⬛⬜⏹   ------
⏹⏹⏹⏹⏹⏹⏹⏹
Episode: from 2000 to 3999
Average Score:  148263
Max Score:      931000
Median Score:   121000


 30%|███       | 6001/20000 [10:08<21:35, 10.80it/s]


⏹⏹⏹⏹⏹⏹⏹⏹     NEXT 71
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜🟪🟪⬜
⏹⬜⬜⬜⬜⬛⬜⏹  ⬜⬜🟪🟪
⏹⬜⬜⬜⬛⬛⬜⏹  
⏹⬜⬜⬜⬛⬛⬛⏹   👆👆👆 72
⏹⬜⬜⬜⬜⬛⬛⏹  ⬜🟨⬜⬜
⏹⬛⬜⬛⬛⬛⬛⏹  ⬜🟨🟨🟨
⏹⬜⬜⬛⬛⬛⬛⏹  
⏹⬛⬛⬛⬛⬜⬛⏹   👆👆👆 73
⏹⬛⬜⬛⬛⬛⬛⏹  ⬜🟪🟪⬜
⏹⬜⬛⬛⬛⬛⬛⏹  ⬜⬜🟪🟪
⏹⬛⬛⬛⬜⬛⬛⏹  
⏹⬛⬛⬛⬛⬛⬜⏹  
⏹⬛⬛⬛⬛⬜⬛⏹   LEVEL =>3
⏹⬛⬜⬛⬛⬛⬛⏹  
⏹⬛⬛⬜⬛⬛⬛⏹   LINES =>30
⏹⬛⬛⬛⬛⬛⬜⏹  
⏹⬛⬛⬛⬛⬛⬜⏹   SCORE =>38000
⏹⬛⬛⬛⬛⬛⬜⏹  
⏹⬛⬛⬛⬜⬛⬛⏹   -HOLD-
⏹⬛⬛⬛⬛⬛⬜⏹  ⬜🟨⬜⬜
⏹⬜⬛⬛⬛⬛⬛⏹  ⬜🟨🟨🟨
⏹⬜⬛⬛⬛⬛⬛⏹   ------
⏹⏹⏹⏹⏹⏹⏹⏹
Episode: from 4000 to 5999
Average Score:  154043
Max Score:      955000
Median Score:   123000


 40%|████      | 8001/20000 [13:33<18:19, 10.91it/s]


⏹⏹⏹⏹⏹⏹⏹⏹     NEXT 367
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜🟨⬜⬜
⏹⬛⬛⬜⬜⬜⬜⏹  ⬜🟨🟨🟨
⏹⬛⬛⬜⬛⬛⬛⏹  
⏹⬛⬛⬜⬛⬛⬛⏹   👆👆👆 368
⏹⬜⬛⬛⬜⬛⬛⏹  ⬜🟪🟪⬜
⏹⬜⬛⬛⬛⬛⬜⏹  ⬜⬜🟪🟪
⏹⬜⬛⬛⬛⬛⬜⏹  
⏹⬛⬛⬛⬛⬛⬜⏹   👆👆👆 369
⏹⬛⬛⬛⬜⬛⬛⏹  ⬜🟨⬜⬜
⏹⬛⬛⬛⬜⬛⬛⏹  ⬜🟨🟨🟨
⏹⬛⬛⬜⬜⬛⬛⏹  
⏹⬛⬛⬜⬛⬛⬛⏹  
⏹⬛⬛⬛⬛⬛⬜⏹   LEVEL =>22
⏹⬛⬛⬛⬜⬛⬛⏹  
⏹⬛⬛⬛⬜⬛⬛⏹   LINES =>227
⏹⬛⬛⬜⬛⬛⬛⏹  
⏹⬛⬛⬜⬜⬛⬜⏹   SCORE =>293000
⏹⬛⬛⬜⬜⬛⬛⏹  
⏹⬛⬛⬜⬛⬛⬛⏹   -HOLD-
⏹⬛⬛⬛⬛⬜⬛⏹  ⬜⬜🟦⬜
⏹⬛⬜⬛⬛⬛⬛⏹  ⬜🟦🟦🟦
⏹⬜⬛⬛⬛⬛⬜⏹   ------
⏹⏹⏹⏹⏹⏹⏹⏹
Episode: from 6000 to 7999
Average Score:  151515
Max Score:      670000
Median Score:   126000


 50%|█████     | 10002/20000 [16:56<15:18, 10.89it/s]


⏹⏹⏹⏹⏹⏹⏹⏹     NEXT 171
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜🟥🟥⬜
⏹⬜⬜⬛⬜⬜⬜⏹  ⬜🟥🟥⬜
⏹⬜⬜⬛⬜⬜⬜⏹  
⏹⬜⬜⬛⬛⬜⬜⏹   👆👆👆 172
⏹⬜⬜⬛⬛⬛⬜⏹  ⬜🟥🟥⬜
⏹⬜⬜⬛⬛⬛⬛⏹  ⬜🟥🟥⬜
⏹⬜⬛⬛⬛⬛⬛⏹  
⏹⬜⬛⬛⬛⬛⬛⏹   👆👆👆 173
⏹⬛⬛⬜⬛⬛⬛⏹  ⬜🟥🟥⬜
⏹⬛⬜⬛⬛⬛⬛⏹  ⬜🟥🟥⬜
⏹⬛⬛⬛⬛⬜⬛⏹  
⏹⬛⬛⬛⬛⬜⬛⏹  
⏹⬜⬜⬛⬛⬛⬛⏹   LEVEL =>9
⏹⬛⬛⬛⬜⬛⬛⏹  
⏹⬜⬛⬛⬛⬛⬛⏹   LINES =>97
⏹⬛⬛⬛⬛⬛⬜⏹  
⏹⬜⬛⬛⬛⬛⬛⏹   SCORE =>119000
⏹⬜⬛⬛⬛⬛⬛⏹  
⏹⬛⬛⬛⬜⬛⬛⏹   -HOLD-
⏹⬛⬛⬛⬜⬛⬛⏹  ⬜⬜🟫🟫
⏹⬛⬜⬛⬛⬛⬛⏹  ⬜🟫🟫⬜
⏹⬛⬛⬜⬛⬛⬛⏹   ------
⏹⏹⏹⏹⏹⏹⏹⏹
Episode: from 8000 to 9999
Average Score:  152566
Max Score:      878000
Median Score:   128000


 60%|██████    | 12001/20000 [20:22<11:19, 11.77it/s]


⏹⏹⏹⏹⏹⏹⏹⏹     NEXT 86
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜🟥🟥⬜
⏹⬜⬜⬜⬜⬜⬛⏹  ⬜🟥🟥⬜
⏹⬜⬜⬜⬜⬜⬛⏹  
⏹⬜⬛⬜⬛⬛⬛⏹   👆👆👆 87
⏹⬜⬛⬛⬛⬜⬛⏹  ⬜🟪🟪⬜
⏹⬛⬛⬛⬛⬜⬛⏹  ⬜⬜🟪🟪
⏹⬛⬛⬛⬛⬛⬜⏹  
⏹⬛⬛⬛⬛⬛⬜⏹   👆👆👆 88
⏹⬛⬛⬛⬛⬛⬜⏹  ⬜⬜🟫🟫
⏹⬛⬜⬛⬛⬛⬛⏹  ⬜🟫🟫⬜
⏹⬛⬛⬛⬛⬛⬜⏹  
⏹⬜⬛⬛⬛⬛⬛⏹  
⏹⬛⬛⬛⬛⬛⬜⏹   LEVEL =>4
⏹⬛⬛⬜⬛⬛⬜⏹  
⏹⬛⬛⬛⬛⬛⬜⏹   LINES =>40
⏹⬛⬛⬛⬛⬛⬜⏹  
⏹⬛⬛⬛⬜⬛⬜⏹   SCORE =>50000
⏹⬛⬜⬛⬛⬛⬜⏹  
⏹⬛⬛⬛⬛⬛⬜⏹   -HOLD-
⏹⬛⬛⬜⬛⬛⬛⏹  ⬜🟪🟪⬜
⏹⬜⬛⬛⬛⬛⬛⏹  ⬜⬜🟪🟪
⏹⬛⬛⬛⬛⬜⬛⏹   ------
⏹⏹⏹⏹⏹⏹⏹⏹
Episode: from 10000 to 11999
Average Score:  152558
Max Score:     1108000
Median Score:   127000


 70%|███████   | 14001/20000 [23:48<10:46,  9.28it/s]


⏹⏹⏹⏹⏹⏹⏹⏹     NEXT 116
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜🟨⬜⬜
⏹⬜⬜⬜⬜⬛⬜⏹  ⬜🟨🟨🟨
⏹⬜⬜⬜⬜⬛⬛⏹  
⏹⬜⬛⬛⬛⬜⬛⏹   👆👆👆 117
⏹⬜⬛⬛⬛⬜⬛⏹  ⬜⬜🟦⬜
⏹⬜⬛⬛⬛⬛⬛⏹  ⬜🟦🟦🟦
⏹⬛⬛⬛⬛⬜⬛⏹  
⏹⬛⬜⬛⬛⬜⬛⏹   👆👆👆 118
⏹⬛⬛⬛⬜⬛⬛⏹  ⬜⬜🟦⬜
⏹⬛⬛⬛⬛⬜⬜⏹  ⬜🟦🟦🟦
⏹⬛⬛⬜⬛⬜⬜⏹  
⏹⬛⬛⬛⬛⬛⬜⏹  
⏹⬛⬛⬜⬛⬛⬛⏹   LEVEL =>6
⏹⬛⬛⬜⬜⬛⬛⏹  
⏹⬛⬛⬛⬜⬛⬛⏹   LINES =>61
⏹⬜⬛⬛⬛⬜⬛⏹  
⏹⬜⬛⬛⬜⬛⬛⏹   SCORE =>73000
⏹⬜⬛⬛⬛⬛⬛⏹  
⏹⬜⬛⬛⬛⬛⬛⏹   -HOLD-
⏹⬛⬛⬛⬛⬛⬜⏹  ⬜⬜🟫🟫
⏹⬜⬛⬜⬛⬛⬜⏹  ⬜🟫🟫⬜
⏹⬜⬛⬛⬛⬛⬜⏹   ------
⏹⏹⏹⏹⏹⏹⏹⏹
Episode: from 12000 to 13999
Average Score:  152684
Max Score:      778000
Median Score:   129000


 80%|████████  | 16000/20000 [27:12<05:09, 12.91it/s]


⏹⏹⏹⏹⏹⏹⏹⏹     NEXT 164
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜⬜🟦⬜
⏹⬜⬛⬜⬜⬜⬜⏹  ⬜🟦🟦🟦
⏹⬛⬛⬜⬜⬜⬜⏹  
⏹⬛⬛⬜⬜⬛⬜⏹   👆👆👆 165
⏹⬛⬛⬜⬛⬛⬛⏹  ⬜⬜🟦⬜
⏹⬛⬛⬜⬛⬜⬛⏹  ⬜🟦🟦🟦
⏹⬛⬛⬜⬛⬛⬛⏹  
⏹⬜⬛⬛⬛⬛⬛⏹   👆👆👆 166
⏹⬛⬛⬜⬛⬛⬛⏹  ⬜🟪🟪⬜
⏹⬜⬛⬛⬛⬜⬛⏹  ⬜⬜🟪🟪
⏹⬛⬛⬛⬛⬜⬛⏹  
⏹⬛⬛⬜⬛⬛⬛⏹  
⏹⬛⬛⬛⬛⬛⬜⏹   LEVEL =>9
⏹⬛⬛⬛⬜⬛⬛⏹  
⏹⬛⬜⬜⬜⬛⬛⏹   LINES =>92
⏹⬛⬛⬛⬛⬛⬜⏹  
⏹⬛⬛⬜⬛⬛⬛⏹   SCORE =>128000
⏹⬛⬛⬛⬜⬛⬛⏹  
⏹⬛⬛⬛⬜⬛⬛⏹   -HOLD-
⏹⬛⬛⬛⬛⬛⬜⏹  ⬜⬜🟦⬜
⏹⬛⬛⬛⬛⬛⬜⏹  ⬜🟦🟦🟦
⏹⬛⬛⬜⬛⬛⬛⏹   ------
⏹⏹⏹⏹⏹⏹⏹⏹
Episode: from 14000 to 15999
Average Score:  152244
Max Score:      938000
Median Score:   127000


 90%|█████████ | 18002/20000 [30:35<02:19, 14.34it/s]


⏹⏹⏹⏹⏹⏹⏹⏹     NEXT 177
⏹⬜⬛⬜⬜⬜⬜⏹  🟧🟧🟧🟧
⏹⬛⬛⬜⬜⬜⬜⏹  ⬜⬜⬜⬜
⏹⬛⬜⬛⬛⬜⬜⏹  
⏹⬛⬛⬛⬛⬛⬜⏹   👆👆👆 178
⏹⬛⬛⬛⬛⬛⬜⏹  ⬜⬜🟫🟫
⏹⬛⬜⬛⬛⬛⬛⏹  ⬜🟫🟫⬜
⏹⬛⬛⬜⬛⬛⬛⏹  
⏹⬛⬛⬜⬛⬛⬛⏹   👆👆👆 179
⏹⬛⬜⬛⬛⬛⬛⏹  ⬜🟪🟪⬜
⏹⬛⬛⬛⬛⬛⬜⏹  ⬜⬜🟪🟪
⏹⬛⬛⬛⬛⬜⬜⏹  
⏹⬛⬛⬛⬛⬜⬛⏹  
⏹⬛⬛⬜⬛⬛⬛⏹   LEVEL =>10
⏹⬜⬛⬛⬛⬛⬛⏹  
⏹⬜⬛⬛⬛⬛⬛⏹   LINES =>100
⏹⬛⬛⬛⬛⬜⬜⏹  
⏹⬛⬛⬛⬛⬛⬜⏹   SCORE =>118000
⏹⬛⬜⬛⬛⬛⬜⏹  
⏹⬛⬛⬛⬛⬛⬜⏹   -HOLD-
⏹⬜⬜⬛⬛⬛⬜⏹  ⬜🟪🟪⬜
⏹⬛⬜⬛⬛⬛⬛⏹  ⬜⬜🟪🟪
⏹⬛⬛⬛⬛⬜⬛⏹   ------
⏹⏹⏹⏹⏹⏹⏹⏹
Episode: from 16000 to 17999
Average Score:  148929
Max Score:      838000
Median Score:   124000


100%|██████████| 20000/20000 [33:55<00:00,  9.82it/s]



⏹⏹⏹⏹⏹⏹⏹⏹     NEXT 445
⏹⬜⬜⬛⬜⬜⬜⏹  ⬜⬜🟫🟫
⏹⬜⬜⬛⬜⬜⬜⏹  ⬜🟫🟫⬜
⏹⬜⬜⬛⬜⬜⬜⏹  
⏹⬛⬛⬛⬛⬜⬛⏹   👆👆👆 446
⏹⬜⬛⬛⬛⬛⬛⏹  ⬜⬜⬜🟩
⏹⬛⬜⬛⬛⬛⬛⏹  ⬜🟩🟩🟩
⏹⬛⬛⬛⬛⬜⬛⏹  
⏹⬛⬛⬛⬜⬜⬛⏹   👆👆👆 447
⏹⬛⬛⬛⬛⬛⬜⏹  ⬜🟪🟪⬜
⏹⬛⬛⬛⬛⬛⬜⏹  ⬜⬜🟪🟪
⏹⬛⬛⬜⬜⬛⬛⏹  
⏹⬜⬛⬛⬛⬛⬛⏹  
⏹⬛⬛⬛⬜⬛⬛⏹   LEVEL =>27
⏹⬛⬜⬛⬛⬜⬛⏹  
⏹⬛⬛⬜⬛⬛⬛⏹   LINES =>279
⏹⬜⬛⬛⬛⬛⬛⏹  
⏹⬜⬛⬛⬛⬛⬛⏹   SCORE =>361000
⏹⬜⬛⬛⬛⬛⬛⏹  
⏹⬛⬛⬛⬛⬜⬛⏹   -HOLD-
⏹⬛⬛⬛⬛⬜⬛⏹  ⬜⬜🟫🟫
⏹⬜⬛⬛⬛⬜⬛⏹  ⬜🟫🟫⬜
⏹⬛⬛⬛⬜⬛⬛⏹   ------
⏹⏹⏹⏹⏹⏹⏹⏹
Episode: from 18000 to 19999
Average Score:  149439
Max Score:      780000
Median Score:   124000
Report:


q_tableを'/content/drive/My Drive/Q_TETRIS_2/q_table_module_b_1.py'に保存しました。
q_tableを'/content/drive/My Drive/Q_TETRIS_2/q_table_module_b_2.py'に保存しました。
q_tableを'/content/drive/My Drive/Q_TETRIS_2/q_table_module_b_3.py'に保存しました。
q_tableを'/content/drive/My Drive/Q_TETRIS_2/q_table_module_b_4.py'に保存しました。
q_tableを'/content/drive/My Drive/Q_TETRIS_2/q_table_module_b_5.py'に保存しました。
q_tableを'/content/drive/My Drive/Q_TETRIS_2/q_table_module_b_6.py'に保存しました。
q_tableを'/content/drive/My Drive/Q_TETRIS_2/q_table_module_b_7.py'に保存しました。
q_tableを'/content/drive/My Drive/Q_TETRIS_2/q_table_module_b_8.py'に保存しました。
3/100回目の学習を開始します


 10%|█         | 2000/20000 [03:30<26:45, 11.21it/s]


⏹⏹⏹⏹⏹⏹⏹⏹     NEXT 217
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜⬜🟦⬜
⏹⬛⬛⬜⬜⬜⬜⏹  ⬜🟦🟦🟦
⏹⬜⬛⬛⬛⬛⬛⏹  
⏹⬛⬛⬜⬛⬛⬛⏹   👆👆👆 218
⏹⬛⬛⬜⬜⬛⬛⏹  ⬜⬜🟦⬜
⏹⬛⬛⬛⬛⬛⬜⏹  ⬜🟦🟦🟦
⏹⬜⬛⬛⬛⬛⬛⏹  
⏹⬛⬛⬜⬛⬛⬛⏹   👆👆👆 219
⏹⬛⬛⬛⬛⬜⬜⏹  ⬜⬜🟦⬜
⏹⬛⬛⬛⬛⬜⬜⏹  ⬜🟦🟦🟦
⏹⬛⬛⬛⬛⬜⬜⏹  
⏹⬜⬛⬛⬛⬛⬜⏹  
⏹⬛⬛⬛⬛⬛⬜⏹   LEVEL =>12
⏹⬛⬛⬛⬛⬛⬜⏹  
⏹⬛⬛⬛⬛⬛⬜⏹   LINES =>127
⏹⬜⬛⬛⬛⬛⬛⏹  
⏹⬜⬛⬛⬛⬛⬛⏹   SCORE =>169000
⏹⬛⬜⬛⬜⬛⬛⏹  
⏹⬛⬛⬜⬛⬜⬛⏹   -HOLD-
⏹⬛⬛⬛⬜⬛⬜⏹  ⬜🟥🟥⬜
⏹⬛⬛⬜⬛⬛⬛⏹  ⬜🟥🟥⬜
⏹⬜⬛⬛⬛⬛⬛⏹   ------
⏹⏹⏹⏹⏹⏹⏹⏹
Episode: from 0 to 1999
Average Score:  156054
Max Score:      747000
Median Score:   129000


 20%|█▉        | 3998/20000 [06:56<26:43,  9.98it/s]


⏹⏹⏹⏹⏹⏹⏹⏹     NEXT 112
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜⬜🟦⬜
⏹⬜⬜⬛⬜⬜⬜⏹  ⬜🟦🟦🟦
⏹⬜⬜⬛⬜⬜⬜⏹  
⏹⬜⬛⬛⬛⬛⬛⏹   👆👆👆 113
⏹⬛⬛⬛⬛⬜⬜⏹  ⬜🟨⬜⬜
⏹⬛⬜⬛⬛⬜⬜⏹  ⬜🟨🟨🟨
⏹⬛⬛⬛⬜⬛⬜⏹  
⏹⬛⬜⬛⬜⬛⬜⏹   👆👆👆 114
⏹⬛⬛⬛⬜⬛⬜⏹  ⬜🟥🟥⬜
⏹⬛⬛⬛⬛⬛⬜⏹  ⬜🟥🟥⬜
⏹⬛⬜⬜⬛⬛⬛⏹  
⏹⬛⬛⬛⬛⬜⬛⏹  
⏹⬛⬛⬛⬛⬛⬜⏹   LEVEL =>5
⏹⬛⬛⬛⬛⬜⬛⏹  
⏹⬛⬛⬛⬛⬜⬛⏹   LINES =>58
⏹⬛⬛⬛⬛⬜⬜⏹  
⏹⬛⬛⬛⬜⬛⬛⏹   SCORE =>68000
⏹⬛⬜⬛⬛⬛⬛⏹  
⏹⬛⬛⬛⬜⬛⬛⏹   -HOLD-
⏹⬛⬛⬛⬜⬛⬛⏹  ⬜🟪🟪⬜
⏹⬛⬛⬛⬛⬛⬜⏹  ⬜⬜🟪🟪
⏹⬛⬜⬛⬛⬛⬛⏹   ------
⏹⏹⏹⏹⏹⏹⏹⏹
Episode: from 2000 to 3999
Average Score:  152118
Max Score:      838000
Median Score:   124000


 30%|███       | 6001/20000 [10:21<35:53,  6.50it/s]


⏹⏹⏹⏹⏹⏹⏹⏹     NEXT 282
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜⬜⬜🟩
⏹⬜⬜⬛⬛⬛⬛⏹  ⬜🟩🟩🟩
⏹⬜⬜⬜⬛⬛⬛⏹  
⏹⬛⬛⬛⬛⬜⬛⏹   👆👆👆 283
⏹⬛⬛⬜⬛⬛⬛⏹  ⬜🟥🟥⬜
⏹⬛⬛⬛⬛⬛⬜⏹  ⬜🟥🟥⬜
⏹⬛⬛⬜⬛⬛⬛⏹  
⏹⬜⬛⬛⬛⬛⬛⏹   👆👆👆 284
⏹⬜⬛⬛⬛⬛⬛⏹  ⬜⬜🟫🟫
⏹⬜⬛⬛⬛⬛⬛⏹  ⬜🟫🟫⬜
⏹⬛⬛⬜⬛⬛⬛⏹  
⏹⬛⬛⬛⬛⬜⬛⏹  
⏹⬜⬛⬛⬛⬛⬛⏹   LEVEL =>16
⏹⬛⬛⬛⬛⬛⬜⏹  
⏹⬛⬛⬛⬛⬛⬜⏹   LINES =>169
⏹⬛⬛⬛⬜⬛⬛⏹  
⏹⬜⬛⬛⬛⬛⬛⏹   SCORE =>223000
⏹⬛⬛⬜⬛⬛⬛⏹  
⏹⬛⬛⬜⬛⬛⬛⏹   -HOLD-
⏹⬛⬛⬛⬛⬛⬜⏹  ⬜⬜🟫🟫
⏹⬛⬛⬛⬛⬜⬛⏹  ⬜🟫🟫⬜
⏹⬜⬛⬛⬛⬛⬛⏹   ------
⏹⏹⏹⏹⏹⏹⏹⏹
Episode: from 4000 to 5999
Average Score:  152419
Max Score:      767000
Median Score:   128000


 40%|████      | 8002/20000 [13:43<15:33, 12.86it/s]


⏹⏹⏹⏹⏹⏹⏹⏹     NEXT 162
⏹⬜⬜⬜⬜⬜⬜⏹  🟧🟧🟧🟧
⏹⬛⬛⬛⬜⬜⬜⏹  ⬜⬜⬜⬜
⏹⬛⬛⬛⬛⬛⬜⏹  
⏹⬛⬛⬛⬛⬛⬜⏹   👆👆👆 163
⏹⬜⬛⬛⬛⬛⬛⏹  ⬜🟪🟪⬜
⏹⬜⬛⬛⬛⬛⬛⏹  ⬜⬜🟪🟪
⏹⬜⬛⬛⬛⬛⬛⏹  
⏹⬛⬛⬛⬛⬛⬜⏹   👆👆👆 164
⏹⬛⬛⬛⬜⬛⬛⏹  ⬜🟨⬜⬜
⏹⬛⬛⬛⬛⬜⬛⏹  ⬜🟨🟨🟨
⏹⬜⬛⬛⬛⬛⬛⏹  
⏹⬜⬛⬛⬛⬛⬛⏹  
⏹⬜⬛⬛⬛⬛⬛⏹   LEVEL =>8
⏹⬜⬛⬛⬛⬛⬛⏹  
⏹⬜⬛⬛⬛⬛⬛⏹   LINES =>89
⏹⬜⬛⬛⬛⬛⬛⏹  
⏹⬜⬛⬛⬛⬛⬜⏹   SCORE =>127000
⏹⬜⬛⬛⬛⬛⬛⏹  
⏹⬜⬛⬛⬛⬛⬛⏹   -HOLD-
⏹⬛⬜⬛⬛⬛⬛⏹  ⬜🟥🟥⬜
⏹⬛⬛⬛⬜⬛⬛⏹  ⬜🟥🟥⬜
⏹⬛⬛⬛⬜⬛⬛⏹   ------
⏹⏹⏹⏹⏹⏹⏹⏹
Episode: from 6000 to 7999
Average Score:  151235
Max Score:      892000
Median Score:   127000


 50%|█████     | 10003/20000 [17:10<12:01, 13.86it/s]


⏹⏹⏹⏹⏹⏹⏹⏹     NEXT 150
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜⬜🟦⬜
⏹⬛⬛⬜⬜⬜⬜⏹  ⬜🟦🟦🟦
⏹⬛⬛⬜⬛⬛⬜⏹  
⏹⬛⬛⬜⬜⬛⬜⏹   👆👆👆 151
⏹⬛⬛⬜⬜⬛⬜⏹  ⬜🟨⬜⬜
⏹⬛⬛⬛⬛⬛⬜⏹  ⬜🟨🟨🟨
⏹⬜⬛⬛⬛⬛⬛⏹  
⏹⬛⬛⬛⬛⬛⬜⏹   👆👆👆 152
⏹⬜⬛⬛⬛⬛⬛⏹  🟧🟧🟧🟧
⏹⬛⬜⬛⬛⬛⬛⏹  ⬜⬜⬜⬜
⏹⬛⬛⬛⬛⬜⬛⏹  
⏹⬛⬛⬜⬛⬛⬛⏹  
⏹⬛⬛⬜⬛⬛⬛⏹   LEVEL =>8
⏹⬛⬛⬛⬛⬜⬛⏹  
⏹⬜⬛⬛⬛⬛⬛⏹   LINES =>82
⏹⬛⬜⬛⬛⬛⬛⏹  
⏹⬛⬛⬛⬛⬜⬛⏹   SCORE =>100000
⏹⬛⬛⬜⬛⬛⬛⏹  
⏹⬛⬛⬜⬛⬛⬛⏹   -HOLD-
⏹⬛⬛⬜⬛⬛⬛⏹  ⬜🟥🟥⬜
⏹⬜⬛⬛⬛⬜⬛⏹  ⬜🟥🟥⬜
⏹⬛⬛⬛⬛⬜⬛⏹   ------
⏹⏹⏹⏹⏹⏹⏹⏹
Episode: from 8000 to 9999
Average Score:  154562
Max Score:      857000
Median Score:   128000


 60%|██████    | 12002/20000 [20:34<11:05, 12.02it/s]


⏹⏹⏹⏹⏹⏹⏹⏹     NEXT 157
⏹⬜⬜⬜⬜⬛⬜⏹  ⬜⬜🟫🟫
⏹⬜⬜⬜⬜⬛⬛⏹  ⬜🟫🟫⬜
⏹⬜⬛⬛⬛⬜⬛⏹  
⏹⬛⬛⬛⬜⬛⬛⏹   👆👆👆 158
⏹⬜⬛⬛⬛⬛⬛⏹  ⬜⬜🟫🟫
⏹⬜⬛⬛⬜⬛⬜⏹  ⬜🟫🟫⬜
⏹⬜⬛⬛⬛⬛⬛⏹  
⏹⬜⬛⬛⬛⬛⬛⏹   👆👆👆 159
⏹⬜⬛⬛⬛⬛⬛⏹  ⬜🟪🟪⬜
⏹⬜⬛⬛⬛⬛⬜⏹  ⬜⬜🟪🟪
⏹⬜⬛⬛⬜⬛⬜⏹  
⏹⬜⬛⬛⬛⬛⬜⏹  
⏹⬛⬛⬛⬛⬛⬜⏹   LEVEL =>8
⏹⬛⬜⬛⬛⬛⬛⏹  
⏹⬛⬜⬛⬛⬛⬛⏹   LINES =>87
⏹⬛⬜⬛⬛⬛⬛⏹  
⏹⬛⬛⬛⬛⬛⬜⏹   SCORE =>117000
⏹⬛⬛⬛⬜⬛⬛⏹  
⏹⬛⬛⬛⬛⬛⬜⏹   -HOLD-
⏹⬜⬛⬛⬛⬛⬛⏹  ⬜⬜🟫🟫
⏹⬜⬛⬛⬛⬜⬛⏹  ⬜🟫🟫⬜
⏹⬛⬛⬜⬛⬜⬛⏹   ------
⏹⏹⏹⏹⏹⏹⏹⏹
Episode: from 10000 to 11999
Average Score:  153334
Max Score:      686000
Median Score:   129000


 70%|███████   | 14002/20000 [24:02<08:36, 11.61it/s]


⏹⏹⏹⏹⏹⏹⏹⏹     NEXT 139
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜⬜⬜🟩
⏹⬛⬛⬜⬜⬜⬜⏹  ⬜🟩🟩🟩
⏹⬛⬛⬜⬜⬜⬜⏹  
⏹⬛⬛⬜⬛⬛⬜⏹   👆👆👆 140
⏹⬜⬛⬛⬛⬛⬛⏹  🟧🟧🟧🟧
⏹⬛⬛⬜⬛⬛⬛⏹  ⬜⬜⬜⬜
⏹⬛⬜⬜⬛⬜⬛⏹  
⏹⬛⬜⬛⬛⬛⬛⏹   👆👆👆 141
⏹⬜⬛⬛⬛⬛⬜⏹  ⬜🟨⬜⬜
⏹⬛⬛⬛⬛⬛⬜⏹  ⬜🟨🟨🟨
⏹⬛⬛⬛⬛⬛⬜⏹  
⏹⬛⬛⬛⬛⬛⬜⏹  
⏹⬛⬛⬛⬛⬛⬜⏹   LEVEL =>7
⏹⬛⬜⬛⬛⬛⬜⏹  
⏹⬛⬛⬛⬛⬛⬜⏹   LINES =>75
⏹⬛⬛⬜⬛⬛⬛⏹  
⏹⬛⬛⬛⬛⬜⬛⏹   SCORE =>83000
⏹⬛⬛⬛⬛⬜⬛⏹  
⏹⬜⬛⬛⬛⬛⬛⏹   -HOLD-
⏹⬛⬛⬛⬜⬛⬛⏹  ⬜⬜⬜🟩
⏹⬛⬜⬛⬛⬛⬛⏹  ⬜🟩🟩🟩
⏹⬛⬛⬛⬛⬜⬛⏹   ------
⏹⏹⏹⏹⏹⏹⏹⏹
Episode: from 12000 to 13999
Average Score:  154758
Max Score:      789000
Median Score:   129000


 80%|████████  | 16002/20000 [27:26<05:39, 11.79it/s]


⏹⏹⏹⏹⏹⏹⏹⏹     NEXT 148
⏹⬜⬜⬜⬜⬜⬜⏹  🟧🟧🟧🟧
⏹⬜⬜⬜⬛⬛⬜⏹  ⬜⬜⬜⬜
⏹⬜⬛⬜⬛⬛⬛⏹  
⏹⬜⬛⬛⬛⬛⬛⏹   👆👆👆 149
⏹⬜⬜⬛⬛⬛⬛⏹  ⬜⬜⬜🟩
⏹⬜⬛⬛⬛⬛⬛⏹  ⬜🟩🟩🟩
⏹⬜⬛⬛⬛⬛⬛⏹  
⏹⬜⬛⬛⬛⬛⬛⏹   👆👆👆 150
⏹⬛⬛⬛⬛⬛⬜⏹  ⬜⬜🟦⬜
⏹⬛⬛⬛⬛⬜⬜⏹  ⬜🟦🟦🟦
⏹⬛⬛⬛⬜⬜⬛⏹  
⏹⬛⬛⬛⬛⬜⬛⏹  
⏹⬛⬛⬜⬛⬛⬛⏹   LEVEL =>8
⏹⬜⬛⬛⬛⬛⬛⏹  
⏹⬜⬜⬛⬛⬛⬛⏹   LINES =>81
⏹⬜⬛⬛⬛⬛⬛⏹  
⏹⬛⬛⬛⬛⬛⬜⏹   SCORE =>101000
⏹⬛⬛⬛⬛⬜⬛⏹  
⏹⬛⬛⬜⬛⬛⬛⏹   -HOLD-
⏹⬜⬛⬜⬛⬛⬛⏹  ⬜⬜🟫🟫
⏹⬜⬛⬛⬜⬛⬛⏹  ⬜🟫🟫⬜
⏹⬜⬛⬛⬛⬜⬛⏹   ------
⏹⏹⏹⏹⏹⏹⏹⏹
Episode: from 14000 to 15999
Average Score:  151010
Max Score:      747000
Median Score:   125000


 90%|█████████ | 18000/20000 [30:54<04:17,  7.78it/s]


⏹⏹⏹⏹⏹⏹⏹⏹     NEXT 175
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜⬜⬜🟩
⏹⬜⬜⬜⬜⬛⬜⏹  ⬜🟩🟩🟩
⏹⬜⬛⬛⬛⬛⬜⏹  
⏹⬜⬛⬛⬜⬛⬛⏹   👆👆👆 176
⏹⬜⬛⬛⬛⬛⬛⏹  ⬜⬜🟦⬜
⏹⬜⬛⬛⬛⬛⬛⏹  ⬜🟦🟦🟦
⏹⬛⬛⬛⬛⬜⬜⏹  
⏹⬛⬛⬛⬛⬜⬜⏹   👆👆👆 177
⏹⬜⬛⬛⬛⬛⬛⏹  ⬜⬜🟦⬜
⏹⬜⬛⬛⬛⬛⬛⏹  ⬜🟦🟦🟦
⏹⬛⬛⬛⬜⬛⬛⏹  
⏹⬛⬛⬛⬛⬛⬜⏹  
⏹⬛⬛⬛⬛⬛⬜⏹   LEVEL =>9
⏹⬛⬛⬛⬛⬛⬜⏹  
⏹⬛⬛⬛⬛⬜⬛⏹   LINES =>99
⏹⬛⬛⬛⬛⬛⬜⏹  
⏹⬜⬛⬛⬛⬛⬛⏹   SCORE =>127000
⏹⬛⬛⬛⬜⬛⬛⏹  
⏹⬛⬛⬜⬜⬛⬛⏹   -HOLD-
⏹⬛⬛⬛⬛⬛⬜⏹  ⬜⬜🟫🟫
⏹⬛⬛⬛⬛⬜⬜⏹  ⬜🟫🟫⬜
⏹⬜⬛⬛⬛⬛⬜⏹   ------
⏹⏹⏹⏹⏹⏹⏹⏹
Episode: from 16000 to 17999
Average Score:  155560
Max Score:      701000
Median Score:   128000


100%|██████████| 20000/20000 [34:20<00:00,  9.71it/s]



⏹⏹⏹⏹⏹⏹⏹⏹     NEXT 324
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜⬜🟫🟫
⏹⬜⬜⬛⬛⬜⬜⏹  ⬜🟫🟫⬜
⏹⬛⬛⬜⬛⬛⬜⏹  
⏹⬜⬛⬛⬛⬛⬛⏹   👆👆👆 325
⏹⬛⬛⬛⬛⬜⬛⏹  ⬜🟪🟪⬜
⏹⬛⬜⬛⬛⬛⬛⏹  ⬜⬜🟪🟪
⏹⬜⬛⬛⬛⬛⬛⏹  
⏹⬜⬛⬛⬛⬛⬛⏹   👆👆👆 326
⏹⬜⬛⬛⬛⬛⬛⏹  ⬜⬜🟦⬜
⏹⬛⬛⬛⬛⬛⬜⏹  ⬜🟦🟦🟦
⏹⬛⬛⬜⬛⬛⬛⏹  
⏹⬛⬛⬜⬛⬛⬛⏹  
⏹⬛⬛⬛⬛⬜⬛⏹   LEVEL =>19
⏹⬜⬛⬛⬛⬜⬛⏹  
⏹⬛⬛⬛⬛⬜⬛⏹   LINES =>198
⏹⬜⬛⬛⬛⬛⬛⏹  
⏹⬜⬛⬛⬛⬜⬛⏹   SCORE =>262000
⏹⬛⬛⬛⬜⬜⬛⏹  
⏹⬜⬛⬛⬜⬜⬛⏹   -HOLD-
⏹⬛⬛⬜⬛⬛⬛⏹  ⬜⬜⬜🟩
⏹⬛⬜⬛⬛⬛⬛⏹  ⬜🟩🟩🟩
⏹⬛⬛⬛⬜⬛⬛⏹   ------
⏹⏹⏹⏹⏹⏹⏹⏹
Episode: from 18000 to 19999
Average Score:  154142
Max Score:     1015000
Median Score:   127000
Report:


q_tableを'/content/drive/My Drive/Q_TETRIS_2/q_table_module_b_1.py'に保存しました。
q_tableを'/content/drive/My Drive/Q_TETRIS_2/q_table_module_b_2.py'に保存しました。
q_tableを'/content/drive/My Drive/Q_TETRIS_2/q_table_module_b_3.py'に保存しました。
q_tableを'/content/drive/My Drive/Q_TETRIS_2/q_table_module_b_4.py'に保存しました。
q_tableを'/content/drive/My Drive/Q_TETRIS_2/q_table_module_b_5.py'に保存しました。
q_tableを'/content/drive/My Drive/Q_TETRIS_2/q_table_module_b_6.py'に保存しました。
q_tableを'/content/drive/My Drive/Q_TETRIS_2/q_table_module_b_7.py'に保存しました。
q_tableを'/content/drive/My Drive/Q_TETRIS_2/q_table_module_b_8.py'に保存しました。
4/100回目の学習を開始します


 10%|█         | 2000/20000 [03:26<31:26,  9.54it/s]


⏹⏹⏹⏹⏹⏹⏹⏹     NEXT 657
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜⬜🟦⬜
⏹⬛⬛⬛⬛⬜⬜⏹  ⬜🟦🟦🟦
⏹⬛⬛⬛⬛⬜⬛⏹  
⏹⬛⬛⬜⬛⬛⬛⏹   👆👆👆 658
⏹⬛⬛⬛⬛⬛⬜⏹  ⬜⬜⬜🟩
⏹⬛⬜⬛⬛⬛⬛⏹  ⬜🟩🟩🟩
⏹⬛⬜⬜⬛⬛⬛⏹  
⏹⬛⬜⬛⬛⬜⬛⏹   👆👆👆 659
⏹⬛⬜⬛⬛⬛⬛⏹  ⬜⬜🟫🟫
⏹⬛⬜⬛⬛⬛⬛⏹  ⬜🟫🟫⬜
⏹⬛⬛⬛⬛⬜⬛⏹  
⏹⬛⬛⬜⬛⬛⬛⏹  
⏹⬛⬛⬛⬜⬛⬛⏹   LEVEL =>41
⏹⬛⬛⬛⬛⬜⬛⏹  
⏹⬛⬛⬜⬛⬛⬛⏹   LINES =>419
⏹⬛⬛⬛⬜⬛⬛⏹  
⏹⬛⬛⬛⬜⬛⬛⏹   SCORE =>533000
⏹⬛⬛⬛⬜⬛⬛⏹  
⏹⬛⬛⬜⬛⬛⬛⏹   -HOLD-
⏹⬛⬛⬛⬛⬜⬛⏹  ⬜🟨⬜⬜
⏹⬜⬛⬛⬛⬛⬛⏹  ⬜🟨🟨🟨
⏹⬛⬛⬜⬛⬛⬛⏹   ------
⏹⏹⏹⏹⏹⏹⏹⏹
Episode: from 0 to 1999
Average Score:  151800
Max Score:     1134000
Median Score:   126000


 20%|██        | 4000/20000 [06:52<21:03, 12.66it/s]


⏹⏹⏹⏹⏹⏹⏹⏹     NEXT 221
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜🟨⬜⬜
⏹⬛⬛⬜⬜⬜⬜⏹  ⬜🟨🟨🟨
⏹⬛⬛⬜⬜⬛⬛⏹  
⏹⬛⬛⬛⬜⬛⬛⏹   👆👆👆 222
⏹⬜⬛⬜⬛⬛⬛⏹  ⬜🟨⬜⬜
⏹⬛⬛⬛⬜⬛⬛⏹  ⬜🟨🟨🟨
⏹⬜⬛⬛⬛⬛⬛⏹  
⏹⬛⬛⬛⬛⬛⬜⏹   👆👆👆 223
⏹⬛⬛⬛⬛⬛⬜⏹  ⬜🟥🟥⬜
⏹⬜⬛⬛⬛⬛⬛⏹  ⬜🟥🟥⬜
⏹⬜⬛⬛⬛⬛⬛⏹  
⏹⬛⬛⬜⬛⬛⬛⏹  
⏹⬛⬛⬛⬜⬛⬛⏹   LEVEL =>12
⏹⬜⬛⬛⬛⬛⬛⏹  
⏹⬛⬜⬛⬜⬛⬛⏹   LINES =>129
⏹⬛⬛⬛⬛⬛⬜⏹  
⏹⬛⬛⬜⬛⬛⬛⏹   SCORE =>157000
⏹⬛⬛⬛⬛⬛⬜⏹  
⏹⬛⬛⬛⬜⬛⬛⏹   -HOLD-
⏹⬛⬛⬛⬜⬛⬛⏹  ⬜🟨⬜⬜
⏹⬛⬛⬛⬛⬜⬛⏹  ⬜🟨🟨🟨
⏹⬛⬜⬛⬛⬛⬜⏹   ------
⏹⏹⏹⏹⏹⏹⏹⏹
Episode: from 2000 to 3999
Average Score:  152206
Max Score:      900000
Median Score:   126000


 30%|███       | 6002/20000 [10:13<16:22, 14.24it/s]


⏹⏹⏹⏹⏹⏹⏹⏹     NEXT 252
⏹⬜⬜⬜⬜⬜⬜⏹  🟧🟧🟧🟧
⏹⬜⬛⬛⬜⬜⬜⏹  ⬜⬜⬜⬜
⏹⬛⬛⬛⬛⬛⬜⏹  
⏹⬛⬛⬛⬜⬛⬛⏹   👆👆👆 253
⏹⬛⬛⬛⬜⬛⬛⏹  ⬜🟪🟪⬜
⏹⬛⬛⬜⬜⬛⬛⏹  ⬜⬜🟪🟪
⏹⬛⬛⬛⬜⬛⬛⏹  
⏹⬛⬛⬛⬜⬜⬛⏹   👆👆👆 254
⏹⬛⬛⬛⬜⬛⬛⏹  🟧🟧🟧🟧
⏹⬛⬛⬛⬛⬛⬜⏹  ⬜⬜⬜⬜
⏹⬛⬛⬛⬛⬜⬜⏹  
⏹⬜⬛⬛⬛⬜⬜⏹  
⏹⬛⬛⬛⬛⬜⬜⏹   LEVEL =>15
⏹⬜⬛⬛⬛⬛⬛⏹  
⏹⬛⬛⬜⬛⬛⬛⏹   LINES =>150
⏹⬛⬜⬛⬛⬛⬛⏹  
⏹⬛⬛⬜⬛⬛⬛⏹   SCORE =>196000
⏹⬜⬛⬛⬛⬛⬛⏹  
⏹⬛⬛⬜⬛⬛⬛⏹   -HOLD-
⏹⬜⬛⬛⬛⬛⬛⏹  ⬜🟪🟪⬜
⏹⬛⬛⬛⬛⬛⬜⏹  ⬜⬜🟪🟪
⏹⬜⬛⬛⬛⬛⬛⏹   ------
⏹⏹⏹⏹⏹⏹⏹⏹
Episode: from 4000 to 5999
Average Score:  149450
Max Score:      778000
Median Score:   123000


 40%|████      | 8002/20000 [13:27<20:13,  9.89it/s]


⏹⏹⏹⏹⏹⏹⏹⏹     NEXT 457
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜⬜⬜🟩
⏹⬜⬜⬜⬜⬛⬛⏹  ⬜🟩🟩🟩
⏹⬜⬜⬜⬜⬛⬛⏹  
⏹⬜⬛⬛⬛⬛⬛⏹   👆👆👆 458
⏹⬛⬛⬜⬛⬛⬛⏹  ⬜⬜🟫🟫
⏹⬛⬛⬛⬜⬜⬛⏹  ⬜🟫🟫⬜
⏹⬛⬛⬛⬜⬛⬛⏹  
⏹⬛⬜⬛⬜⬛⬛⏹   👆👆👆 459
⏹⬛⬛⬜⬛⬛⬛⏹  ⬜⬜⬜🟩
⏹⬛⬛⬛⬛⬛⬜⏹  ⬜🟩🟩🟩
⏹⬛⬛⬛⬜⬛⬛⏹  
⏹⬛⬛⬜⬛⬛⬛⏹  
⏹⬛⬛⬛⬛⬛⬜⏹   LEVEL =>28
⏹⬛⬛⬛⬛⬛⬜⏹  
⏹⬛⬛⬛⬛⬜⬛⏹   LINES =>287
⏹⬛⬜⬛⬛⬛⬛⏹  
⏹⬛⬛⬛⬛⬜⬛⏹   SCORE =>369000
⏹⬜⬛⬛⬛⬛⬛⏹  
⏹⬛⬛⬜⬛⬛⬜⏹   -HOLD-
⏹⬜⬛⬛⬛⬛⬛⏹  ⬜🟪🟪⬜
⏹⬜⬛⬛⬜⬛⬛⏹  ⬜⬜🟪🟪
⏹⬜⬛⬛⬜⬛⬛⏹   ------
⏹⏹⏹⏹⏹⏹⏹⏹
Episode: from 6000 to 7999
Average Score:  147191
Max Score:      894000
Median Score:   122000


 50%|█████     | 10001/20000 [16:46<18:51,  8.84it/s]


⏹⏹⏹⏹⏹⏹⏹⏹     NEXT 231
⏹⬜⬜⬜⬜⬜⬜⏹  🟧🟧🟧🟧
⏹⬜⬜⬜⬛⬛⬜⏹  ⬜⬜⬜⬜
⏹⬜⬛⬛⬛⬛⬜⏹  
⏹⬜⬛⬛⬛⬛⬜⏹   👆👆👆 232
⏹⬜⬛⬛⬜⬛⬛⏹  ⬜⬜🟦⬜
⏹⬛⬛⬜⬜⬛⬛⏹  ⬜🟦🟦🟦
⏹⬛⬛⬜⬜⬛⬛⏹  
⏹⬛⬛⬜⬛⬛⬛⏹   👆👆👆 233
⏹⬛⬛⬜⬛⬛⬛⏹  ⬜⬜⬜🟩
⏹⬛⬛⬜⬛⬛⬛⏹  ⬜🟩🟩🟩
⏹⬜⬛⬛⬛⬜⬛⏹  
⏹⬛⬛⬛⬜⬛⬛⏹  
⏹⬛⬛⬛⬜⬛⬛⏹   LEVEL =>13
⏹⬜⬛⬜⬛⬛⬜⏹  
⏹⬛⬛⬛⬜⬛⬛⏹   LINES =>137
⏹⬜⬛⬛⬛⬛⬛⏹  
⏹⬜⬛⬛⬛⬛⬛⏹   SCORE =>179000
⏹⬜⬛⬛⬛⬛⬛⏹  
⏹⬛⬛⬛⬜⬜⬜⏹   -HOLD-
⏹⬛⬛⬜⬛⬜⬜⏹  ⬜⬜🟦⬜
⏹⬛⬜⬛⬛⬛⬛⏹  ⬜🟦🟦🟦
⏹⬛⬛⬛⬛⬛⬜⏹   ------
⏹⏹⏹⏹⏹⏹⏹⏹
Episode: from 8000 to 9999
Average Score:  152155
Max Score:     1224000
Median Score:   128000


 60%|██████    | 12000/20000 [20:05<21:32,  6.19it/s]


⏹⏹⏹⏹⏹⏹⏹⏹     NEXT 415
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜🟨⬜⬜
⏹⬛⬛⬜⬜⬜⬜⏹  ⬜🟨🟨🟨
⏹⬛⬛⬛⬛⬛⬜⏹  
⏹⬛⬛⬛⬛⬛⬜⏹   👆👆👆 416
⏹⬜⬛⬛⬛⬛⬛⏹  ⬜🟨⬜⬜
⏹⬛⬛⬛⬛⬛⬜⏹  ⬜🟨🟨🟨
⏹⬜⬜⬛⬜⬛⬛⏹  
⏹⬛⬛⬛⬛⬛⬜⏹   👆👆👆 417
⏹⬜⬛⬛⬛⬛⬛⏹  ⬜🟨⬜⬜
⏹⬛⬛⬛⬛⬛⬜⏹  ⬜🟨🟨🟨
⏹⬛⬛⬛⬜⬛⬛⏹  
⏹⬛⬛⬜⬛⬛⬛⏹  
⏹⬛⬛⬛⬛⬜⬛⏹   LEVEL =>25
⏹⬛⬛⬛⬜⬛⬛⏹  
⏹⬜⬛⬛⬛⬛⬛⏹   LINES =>258
⏹⬛⬛⬛⬛⬛⬜⏹  
⏹⬛⬛⬛⬛⬛⬜⏹   SCORE =>350000
⏹⬜⬛⬛⬛⬛⬛⏹  
⏹⬛⬜⬛⬛⬛⬛⏹   -HOLD-
⏹⬛⬜⬛⬛⬛⬛⏹  ⬜🟪🟪⬜
⏹⬛⬛⬛⬛⬜⬛⏹  ⬜⬜🟪🟪
⏹⬜⬛⬛⬛⬛⬛⏹   ------
⏹⏹⏹⏹⏹⏹⏹⏹
Episode: from 10000 to 11999
Average Score:  150358
Max Score:      719000
Median Score:   127000


 70%|███████   | 14000/20000 [23:22<07:32, 13.26it/s]


⏹⏹⏹⏹⏹⏹⏹⏹     NEXT 174
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜🟪🟪⬜
⏹⬜⬜⬛⬜⬜⬜⏹  ⬜⬜🟪🟪
⏹⬜⬜⬛⬜⬜⬜⏹  
⏹⬜⬛⬛⬜⬜⬜⏹   👆👆👆 175
⏹⬜⬜⬛⬛⬛⬛⏹  ⬜⬜⬜🟩
⏹⬜⬜⬛⬛⬜⬛⏹  ⬜🟩🟩🟩
⏹⬛⬛⬛⬛⬜⬛⏹  
⏹⬛⬛⬛⬜⬛⬜⏹   👆👆👆 176
⏹⬛⬛⬛⬛⬛⬜⏹  ⬜⬜⬜🟩
⏹⬜⬛⬛⬛⬛⬛⏹  ⬜🟩🟩🟩
⏹⬛⬛⬛⬜⬛⬛⏹  
⏹⬛⬛⬛⬛⬛⬜⏹  
⏹⬜⬛⬛⬛⬛⬛⏹   LEVEL =>10
⏹⬛⬛⬜⬛⬛⬛⏹  
⏹⬜⬛⬜⬛⬛⬛⏹   LINES =>100
⏹⬛⬛⬛⬛⬜⬛⏹  
⏹⬜⬜⬛⬛⬛⬛⏹   SCORE =>126000
⏹⬜⬛⬜⬛⬛⬛⏹  
⏹⬜⬛⬜⬛⬛⬛⏹   -HOLD-
⏹⬛⬛⬛⬛⬛⬜⏹  ⬜🟨⬜⬜
⏹⬛⬛⬛⬜⬛⬛⏹  ⬜🟨🟨🟨
⏹⬛⬛⬜⬜⬛⬜⏹   ------
⏹⏹⏹⏹⏹⏹⏹⏹
Episode: from 12000 to 13999
Average Score:  151565
Max Score:      865000
Median Score:   128000


 80%|████████  | 16003/20000 [26:38<04:36, 14.45it/s]


⏹⏹⏹⏹⏹⏹⏹⏹     NEXT 160
⏹⬛⬜⬜⬜⬜⬜⏹  ⬜🟥🟥⬜
⏹⬛⬛⬜⬜⬜⬜⏹  ⬜🟥🟥⬜
⏹⬛⬜⬜⬜⬜⬜⏹  
⏹⬛⬛⬜⬜⬜⬜⏹   👆👆👆 161
⏹⬛⬛⬜⬜⬛⬜⏹  ⬜⬜⬜🟩
⏹⬛⬛⬜⬛⬛⬜⏹  ⬜🟩🟩🟩
⏹⬛⬛⬛⬛⬛⬜⏹  
⏹⬜⬛⬛⬛⬛⬛⏹   👆👆👆 162
⏹⬜⬛⬛⬜⬛⬛⏹  ⬜🟪🟪⬜
⏹⬜⬛⬜⬛⬛⬛⏹  ⬜⬜🟪🟪
⏹⬛⬛⬛⬛⬜⬛⏹  
⏹⬜⬛⬛⬛⬛⬛⏹  
⏹⬛⬛⬛⬜⬛⬛⏹   LEVEL =>9
⏹⬛⬛⬛⬜⬛⬛⏹  
⏹⬛⬛⬛⬜⬛⬛⏹   LINES =>90
⏹⬛⬛⬜⬛⬛⬛⏹  
⏹⬛⬛⬛⬛⬜⬛⏹   SCORE =>112000
⏹⬛⬛⬛⬜⬛⬛⏹  
⏹⬛⬜⬛⬛⬛⬛⏹   -HOLD-
⏹⬜⬛⬛⬛⬛⬛⏹  ⬜🟥🟥⬜
⏹⬜⬛⬛⬛⬛⬜⏹  ⬜🟥🟥⬜
⏹⬜⬛⬜⬛⬛⬜⏹   ------
⏹⏹⏹⏹⏹⏹⏹⏹
Episode: from 14000 to 15999
Average Score:  148324
Max Score:      737000
Median Score:   125000


 90%|█████████ | 18001/20000 [29:53<02:32, 13.14it/s]


⏹⏹⏹⏹⏹⏹⏹⏹     NEXT 124
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜🟥🟥⬜
⏹⬜⬜⬜⬜⬜⬛⏹  ⬜🟥🟥⬜
⏹⬜⬜⬜⬜⬜⬛⏹  
⏹⬜⬜⬛⬜⬛⬛⏹   👆👆👆 125
⏹⬛⬛⬛⬜⬜⬛⏹  ⬜🟥🟥⬜
⏹⬛⬜⬛⬛⬛⬛⏹  ⬜🟥🟥⬜
⏹⬛⬛⬛⬛⬜⬛⏹  
⏹⬛⬛⬜⬛⬛⬛⏹   👆👆👆 126
⏹⬛⬛⬜⬛⬛⬛⏹  ⬜🟪🟪⬜
⏹⬜⬛⬛⬛⬛⬛⏹  ⬜⬜🟪🟪
⏹⬜⬛⬛⬜⬛⬛⏹  
⏹⬜⬛⬛⬛⬛⬛⏹  
⏹⬜⬜⬛⬛⬛⬜⏹   LEVEL =>6
⏹⬜⬜⬛⬛⬛⬛⏹  
⏹⬜⬛⬛⬛⬛⬜⏹   LINES =>66
⏹⬜⬛⬛⬛⬛⬛⏹  
⏹⬜⬛⬛⬛⬛⬜⏹   SCORE =>78000
⏹⬜⬛⬛⬛⬛⬛⏹  
⏹⬛⬜⬛⬛⬛⬛⏹   -HOLD-
⏹⬛⬛⬜⬛⬛⬛⏹  ⬜⬜⬜🟩
⏹⬛⬜⬛⬛⬛⬛⏹  ⬜🟩🟩🟩
⏹⬛⬛⬛⬛⬛⬜⏹   ------
⏹⏹⏹⏹⏹⏹⏹⏹
Episode: from 16000 to 17999
Average Score:  149275
Max Score:      866000
Median Score:   124000


100%|██████████| 20000/20000 [33:10<00:00, 10.05it/s]


⏹⏹⏹⏹⏹⏹⏹⏹     NEXT 374
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜🟪🟪⬜
⏹⬛⬜⬜⬜⬜⬜⏹  ⬜⬜🟪🟪
⏹⬛⬜⬜⬜⬜⬜⏹  
⏹⬛⬜⬜⬛⬛⬛⏹   👆👆👆 375
⏹⬛⬛⬛⬛⬜⬛⏹  ⬜⬜⬜🟩
⏹⬜⬛⬛⬛⬜⬛⏹  ⬜🟩🟩🟩
⏹⬜⬜⬛⬛⬛⬛⏹  
⏹⬛⬛⬛⬜⬛⬛⏹   👆👆👆 376
⏹⬛⬛⬛⬜⬛⬛⏹  ⬜🟥🟥⬜
⏹⬜⬛⬛⬛⬛⬛⏹  ⬜🟥🟥⬜
⏹⬛⬛⬛⬜⬛⬜⏹  
⏹⬛⬛⬛⬛⬜⬛⏹  
⏹⬛⬛⬜⬛⬛⬛⏹   LEVEL =>23
⏹⬜⬛⬛⬛⬛⬛⏹  
⏹⬛⬜⬛⬛⬛⬛⏹   LINES =>232
⏹⬛⬛⬜⬛⬛⬛⏹  
⏹⬛⬜⬛⬛⬛⬛⏹   SCORE =>294000
⏹⬜⬛⬛⬛⬛⬛⏹  
⏹⬛⬜⬛⬛⬛⬛⏹   -HOLD-
⏹⬛⬛⬛⬛⬛⬜⏹  ⬜🟥🟥⬜
⏹⬛⬜⬛⬛⬛⬜⏹  ⬜🟥🟥⬜
⏹⬛⬛⬛⬜⬛⬛⏹   ------
⏹⏹⏹⏹⏹⏹⏹⏹
Episode: from 18000 to 19999
Average Score:  150485
Max Score:      889000
Median Score:   125000
Report:





q_tableを'/content/drive/My Drive/Q_TETRIS_2/q_table_module_b_1.py'に保存しました。
q_tableを'/content/drive/My Drive/Q_TETRIS_2/q_table_module_b_2.py'に保存しました。
q_tableを'/content/drive/My Drive/Q_TETRIS_2/q_table_module_b_3.py'に保存しました。
q_tableを'/content/drive/My Drive/Q_TETRIS_2/q_table_module_b_4.py'に保存しました。
q_tableを'/content/drive/My Drive/Q_TETRIS_2/q_table_module_b_5.py'に保存しました。
q_tableを'/content/drive/My Drive/Q_TETRIS_2/q_table_module_b_6.py'に保存しました。
q_tableを'/content/drive/My Drive/Q_TETRIS_2/q_table_module_b_7.py'に保存しました。
q_tableを'/content/drive/My Drive/Q_TETRIS_2/q_table_module_b_8.py'に保存しました。
5/100回目の学習を開始します


 10%|█         | 2002/20000 [03:29<22:29, 13.34it/s]


⏹⏹⏹⏹⏹⏹⏹⏹     NEXT 184
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜⬜🟦⬜
⏹⬜⬜⬜⬜⬛⬜⏹  ⬜🟦🟦🟦
⏹⬜⬜⬜⬛⬛⬜⏹  
⏹⬛⬛⬛⬛⬛⬜⏹   👆👆👆 185
⏹⬜⬜⬛⬛⬛⬛⏹  ⬜⬜🟫🟫
⏹⬜⬛⬛⬜⬛⬛⏹  ⬜🟫🟫⬜
⏹⬛⬛⬛⬜⬛⬛⏹  
⏹⬛⬛⬜⬛⬛⬛⏹   👆👆👆 186
⏹⬜⬛⬛⬛⬛⬛⏹  ⬜⬜🟦⬜
⏹⬛⬛⬛⬛⬜⬛⏹  ⬜🟦🟦🟦
⏹⬛⬛⬜⬛⬛⬛⏹  
⏹⬛⬛⬛⬜⬜⬛⏹  
⏹⬛⬛⬜⬛⬛⬛⏹   LEVEL =>10
⏹⬛⬜⬛⬛⬛⬛⏹  
⏹⬛⬛⬛⬛⬛⬜⏹   LINES =>105
⏹⬛⬛⬜⬛⬛⬛⏹  
⏹⬜⬛⬜⬛⬛⬛⏹   SCORE =>135000
⏹⬛⬛⬜⬛⬛⬛⏹  
⏹⬜⬛⬛⬛⬛⬛⏹   -HOLD-
⏹⬛⬛⬛⬜⬛⬛⏹  ⬜🟥🟥⬜
⏹⬜⬛⬛⬛⬛⬛⏹  ⬜🟥🟥⬜
⏹⬛⬛⬛⬜⬛⬛⏹   ------
⏹⏹⏹⏹⏹⏹⏹⏹
Episode: from 0 to 1999
Average Score:  155443
Max Score:      876000
Median Score:   128000


 20%|██        | 4000/20000 [06:56<23:14, 11.47it/s]


⏹⏹⏹⏹⏹⏹⏹⏹     NEXT 229
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜🟨⬜⬜
⏹⬜⬛⬛⬛⬜⬜⏹  ⬜🟨🟨🟨
⏹⬜⬛⬛⬛⬜⬜⏹  
⏹⬜⬛⬛⬜⬜⬜⏹   👆👆👆 230
⏹⬛⬛⬛⬛⬛⬜⏹  ⬜🟥🟥⬜
⏹⬜⬛⬛⬛⬛⬛⏹  ⬜🟥🟥⬜
⏹⬜⬛⬛⬛⬛⬛⏹  
⏹⬛⬛⬜⬛⬛⬛⏹   👆👆👆 231
⏹⬜⬛⬜⬛⬛⬜⏹  🟧🟧🟧🟧
⏹⬛⬛⬜⬛⬛⬜⏹  ⬜⬜⬜⬜
⏹⬛⬛⬜⬛⬛⬛⏹  
⏹⬛⬜⬜⬛⬛⬛⏹  
⏹⬛⬛⬜⬛⬛⬛⏹   LEVEL =>13
⏹⬜⬛⬛⬛⬛⬛⏹  
⏹⬛⬛⬜⬛⬛⬛⏹   LINES =>135
⏹⬛⬛⬛⬜⬛⬛⏹  
⏹⬛⬛⬛⬜⬛⬛⏹   SCORE =>179000
⏹⬛⬜⬛⬛⬛⬛⏹  
⏹⬛⬜⬛⬛⬛⬛⏹   -HOLD-
⏹⬛⬛⬛⬜⬛⬛⏹  ⬜⬜🟫🟫
⏹⬛⬜⬛⬛⬛⬛⏹  ⬜🟫🟫⬜
⏹⬛⬛⬛⬛⬛⬜⏹   ------
⏹⏹⏹⏹⏹⏹⏹⏹
Episode: from 2000 to 3999
Average Score:  154065
Max Score:      798000
Median Score:   128000


 30%|███       | 6001/20000 [10:20<18:05, 12.90it/s]


⏹⏹⏹⏹⏹⏹⏹⏹     NEXT 89
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜⬜🟫🟫
⏹⬜⬜⬜⬜⬜⬛⏹  ⬜🟫🟫⬜
⏹⬜⬛⬛⬜⬛⬛⏹  
⏹⬜⬛⬛⬜⬛⬛⏹   👆👆👆 90
⏹⬛⬛⬜⬛⬛⬛⏹  ⬜⬜🟫🟫
⏹⬜⬛⬛⬛⬛⬛⏹  ⬜🟫🟫⬜
⏹⬛⬛⬛⬜⬛⬛⏹  
⏹⬛⬛⬜⬛⬛⬛⏹   👆👆👆 91
⏹⬛⬛⬛⬛⬜⬛⏹  ⬜⬜⬜🟩
⏹⬛⬜⬛⬛⬛⬛⏹  ⬜🟩🟩🟩
⏹⬜⬜⬛⬛⬛⬛⏹  
⏹⬜⬛⬛⬛⬛⬛⏹  
⏹⬜⬛⬛⬛⬛⬛⏹   LEVEL =>4
⏹⬜⬛⬛⬛⬜⬛⏹  
⏹⬛⬛⬜⬛⬛⬛⏹   LINES =>42
⏹⬛⬜⬜⬜⬛⬛⏹  
⏹⬛⬜⬛⬜⬛⬛⏹   SCORE =>48000
⏹⬛⬛⬜⬛⬛⬛⏹  
⏹⬛⬛⬛⬜⬛⬛⏹   -HOLD-
⏹⬛⬛⬛⬛⬛⬜⏹  ⬜⬜🟫🟫
⏹⬛⬜⬜⬛⬜⬛⏹  ⬜🟫🟫⬜
⏹⬛⬛⬜⬛⬛⬛⏹   ------
⏹⏹⏹⏹⏹⏹⏹⏹
Episode: from 4000 to 5999
Average Score:  151547
Max Score:     1041000
Median Score:   124000


 38%|███▊      | 7563/20000 [12:57<13:41, 15.13it/s]

# 付録

## Q学習
学習中、現状確認のために一定の確率でゲームオーバー時の盤面が出力される

In [None]:
q_agent = QAgent(lr=0.2, eps=0.1)
q_agent.epsilon = 0.25 #はじめ、εはこのくらい大きめの方が経験的に良い
tetris = Tetris(False)
env = Env(q_agent, tetris)
record = env.train(10000)
fig = go.Figure(data=go.Scatter(y=record))
fig.show()

## 手動で遊んでみる
※ 非常にユーザビリティが悪いため、やらない方がよい(笑)


In [None]:
env = Env(HumanAgent(), Tetris(True))
env.execute()

Enter your move: [0, 5]0
Invalid move. Valid moves are: [0, 5]

⏹⏹⏹⏹⏹⏹⏹⏹     NEXT 2
⏹⬜🟧🟧🟧🟧⬜⏹  ⬜🟥🟥⬜
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜🟥🟥⬜
⏹⬜⬜⬜⬜⬜⬜⏹  
⏹⬜⬜⬜⬜⬜⬜⏹   👆👆👆 3
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜🟥🟥⬜
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜🟥🟥⬜
⏹⬜⬜⬜⬜⬜⬜⏹  
⏹⬜⬜⬜⬜⬜⬜⏹   👆👆👆 4
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜⬜⬜🟩
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜🟩🟩🟩
⏹⬜⬜⬜⬜⬜⬜⏹  
⏹⬜⬜⬜⬜⬜⬜⏹  
⏹⬜⬜⬜⬜⬜⬜⏹   LEVEL =>0
⏹⬜⬜⬜⬜⬜⬜⏹  
⏹⬜⬜⬜⬜⬜⬜⏹   LINES =>0
⏹⬜⬜⬜⬜⬜⬜⏹  
⏹⬜⬜⬜⬜⬜⬜⏹   SCORE =>0
⏹⬜⬜⬜⬜⬜⬜⏹  
⏹⬜⬜⬜⬜⬜⬜⏹   -HOLD-
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜⬜⬜⬜
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜⬜⬜⬜
⏹⬜⬜⬜⬜⬜⬜⏹   ------
⏹⏹⏹⏹⏹⏹⏹⏹
Enter your move: [5, 0]5
Invalid move. Valid moves are: [5, 0]

⏹⏹⏹⏹⏹⏹⏹⏹     NEXT 3
⏹⬜⬜🟥🟥⬜⬜⏹  ⬜🟥🟥⬜
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜🟥🟥⬜
⏹⬜⬜⬜⬜⬜⬜⏹  
⏹⬜⬜⬜⬜⬜⬜⏹   👆👆👆 4
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜⬜⬜🟩
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜🟩🟩🟩
⏹⬜⬜⬜⬜⬜⬜⏹  
⏹⬜⬜⬜⬜⬜⬜⏹   👆👆👆 5
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜🟪🟪⬜
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜⬜🟪🟪
⏹⬜⬜⬜⬜⬜⬜⏹  
⏹⬜⬜⬜⬜⬜⬜⏹  
⏹⬜⬜⬜⬜⬜⬜⏹   LEVEL =>0
⏹⬜⬜⬜⬜⬜⬜⏹  
⏹⬜⬜⬜⬜⬜⬜⏹   LINES =>0
⏹⬜⬜⬜⬜⬜⬜⏹  
⏹⬜⬜⬜⬜⬜⬜⏹   SCORE =>0
⏹⬜⬜⬜⬜⬜⬜⏹  
⏹⬜⬜⬜⬜⬜⬜⏹   -HOLD-
⏹⬜⬜⬜⬜⬜⬜⏹  🟧🟧🟧🟧
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜⬜⬜⬜
⏹⬜⬜⬜⬜⬜⬜⏹   ------
⏹⏹⏹⏹⏹⏹⏹⏹
Enter your move: [0, 7]7
Invalid move. Valid moves are: [0, 7]

⏹⏹⏹⏹⏹⏹⏹⏹     NEXT 3
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜🟥🟥⬜
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜🟥🟥⬜
⏹⬜⬜⬜⬜⬜⬜⏹  
⏹⬜⬜⬜⬜⬜⬜⏹   👆👆👆 4
⏹⬜⬜⬜⬜⬜⬜⏹  ⬜⬜

KeyboardInterrupt: Interrupted by user

# 🗑️trashbox🗑️

In [None]:
q_table = q_agent.q_table
q_agent = QAgent(lr=0.1, eps=0.1)
q_agent.q_table = q_table

In [None]:
_q_table = q_agent.q_table
new_dict = {}
for key, value in _q_table.items():
    flag = False
    state, block, action = key
    new_state = 0
    for e in state:
        new_state = new_state * 10 + e
    new_block = 0
    for e in block:
        if e < -3:
            print("error!")
            flag = True
            break
        new_block = new_block * 10 + e
    if flag:
        continue
    convert_dict = {"down": 0, "right": 2, "left": 1, "turn-left": 3, "turn-right": 4, "hold": 5}
    new_action = convert_dict[action]
    new_key = (new_state, new_block, new_action)
    new_dict[new_key] = value

In [None]:
!cat /proc/uptime | awk '{print $1 /60 /60 "hours"}'

0.0940222hours


In [None]:
_q_table = q_agent.q_table
new_dict = {}
for (i,j,k), value in _q_table.items():
    if k != 5:
      new_dict[(i,j,k)] = value
q_agent.q_table = new_dict

In [None]:
#Qテーブルを圧縮して保存する

new_dict = {}
for (i, j, k), v in q_agent.q_table.items():
    index = i*10000+j
    if index not in new_dict:
        new_dict[index] = (k , v, 6, -9999)
    elif v > new_dict[index][1]:
        new_dict[index] = (k , v, new_dict[index][0], new_dict[index][1])
    elif v > new_dict[index][3]:
        new_dict[index] = (new_dict[index][0], new_dict[index][1], k, v)
for key, value in new_dict.items():
    new_dict[key] = value[0]*10 + value[2]

# JSONとして出力
# with open("/content/drive/My Drive/Q_TETRIS/data.js", "w") as f:
#     json.dump(f"""const data = {new_dict};
# module.exports = data;""", f)

new_list = []
for key, value in new_dict.items():
    new_list.append({"id": key, "action": value})




In [None]:
sorted_data = dict(sorted(new_dict.items()))
with open("/content/drive/My Drive/Q_TETRIS/q_table.json", "w") as f:
    json.dump(sorted_data, f)

In [None]:
response= supabase.table("Q_Values").insert([{"id": 1, "action": 0}]).execute()

print(response)

data=[{'id': 1, 'action': 0}] count=None


In [None]:
for i in new_list:
  supabase.table("Q_Values").insert(i).execute()

RemoteProtocolError: <ConnectionTerminated error_code:ErrorCodes.NO_ERROR, last_stream_id:19999, additional_data:None>

In [None]:
with open("/content/drive/My Drive/Q_TETRIS/data.js", "w") as f:
    json.dump(f"""const data = {new_dict};
module.exports = data;""", f)

In [None]:
!pip install supabase
from supabase import create_client, Client

url = "https://ctvoissecavwoqupidfs.supabase.co"
key = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6ImN0dm9pc3NlY2F2d29xdXBpZGZzIiwicm9sZSI6ImFub24iLCJpYXQiOjE3MjM5ODMxNDIsImV4cCI6MjAzOTU1OTE0Mn0.PL5rF6kgBYZV8IYld5SJouQ7RjPl4-M3hwLvOsu1OGk"
supabase: Client  = create_client(url, key)

length = len(new_list)//100 + 1

for i in tqdm.tqdm(range(100)):
    supabase.table("Q_Values").insert(new_list[length*i: min(length*(i+1), len(new_list)-1)]).execute()

Collecting supabase
  Downloading supabase-2.7.3-py3-none-any.whl.metadata (10 kB)
Collecting gotrue<3.0,>=1.3 (from supabase)
  Downloading gotrue-2.7.0-py3-none-any.whl.metadata (6.0 kB)
Collecting httpx<0.28,>=0.24 (from supabase)
  Downloading httpx-0.27.0-py3-none-any.whl.metadata (7.2 kB)
Collecting postgrest<0.17.0,>=0.14 (from supabase)
  Downloading postgrest-0.16.11-py3-none-any.whl.metadata (5.1 kB)
Collecting realtime<3.0.0,>=2.0.0 (from supabase)
  Downloading realtime-2.0.2-py3-none-any.whl.metadata (6.7 kB)
Collecting storage3<0.8.0,>=0.5.3 (from supabase)
  Downloading storage3-0.7.7-py3-none-any.whl.metadata (1.9 kB)
Collecting supafunc<0.6.0,>=0.3.1 (from supabase)
  Downloading supafunc-0.5.1-py3-none-any.whl.metadata (1.2 kB)
Collecting httpcore==1.* (from httpx<0.28,>=0.24->supabase)
  Downloading httpcore-1.0.5-py3-none-any.whl.metadata (20 kB)
Collecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<0.28,>=0.24->supabase)
  Downloading h11-0.14.0-py3-none-any.whl.me

100%|██████████| 100/100 [03:14<00:00,  1.95s/it]


In [None]:
length = len(new_list)//100 + 1
for i in tqdm.tqdm(range(100)):
    supabase.table("Q_Values").insert(new_list[length*i: min(length*(i+1), len(new_list)-1)]).execute()

100%|██████████| 100/100 [03:13<00:00,  1.93s/it]


In [None]:
print(len(new_list))

2834034


In [None]:
_q_table = q_agent.q_table
new_dict = {}
for (i,j,k), value in _q_table.items():
    if k == 1:
      if j // 1000 in [1,3]:
        if j % 10 >= 3:
          continue
      else:
        if j % 10 >= 4:
          continue
    elif k == 2:
      if j // 1000 in [1,3]:
        if j % 10 <= 1:
          continue
      else:
        if j % 10 <= 2:
          continue
    elif k == 3:
      if (((j + 50) // 100) % 10) in [1,2]:
        continue
    elif k == 4:
      if (((j + 50) // 100) % 10) in [3,2]:
        continue
    if j // 10 % 10 in [5,6,7]:
      continue
    new_dict[(i,j,k)] = value
q_agent.q_table = new_dict

In [None]:
q_agent.q_table = _q_table

In [None]:
from google.colab import drive
import importlib.util

# Google Driveをマウント
drive.mount('/content/drive')

q_table = {}

for i in tqdm.tqdm(range(5)):
  pyc_file_path = f'/content/drive/My Drive/Q_TETRIS_3/__pycache__/q_table_module_{i+1}.cpython-310.pyc'
  spec = importlib.util.spec_from_file_location(f"q_table_module_{i+1}", pyc_file_path)
  module = importlib.util.module_from_spec(spec)
  spec.loader.exec_module(module)
  q = module.q_table
  del module
  q_table.update(q)

q_agent = QAgent(lr=0.1, eps=0.1)
q_agent.q_table = q_table

print("正常に読み込みが完了しました!")

# # .pycファイルのパスを指定
# pyc_file_path = '/content/drive/My Drive/Q_TETRIS_3/__pycache__/q_table_module_1.cpython-310.pyc'

# # .pycファイルを読み込む
# spec = importlib.util.spec_from_file_location("your_module", pyc_file_path)
# module = importlib.util.module_from_spec(spec)
# spec.loader.exec_module(module)

# # モジュールの関数やクラスを使用
# module.your_function()


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


100%|██████████| 5/5 [00:21<00:00,  4.37s/it]

正常に読み込みが完了しました!



