<a href="https://colab.research.google.com/github/smerapala/1223-dashboard/blob/master/2048.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import random
import copy
import math
import time

from IPython.display import clear_output
import torch
import torch.nn as nn
import torch.nn.functional as F

torch.set_default_tensor_type('torch.cuda.FloatTensor')

In [None]:
class GameBoard():
  def __init__(self):
    self.columns = 4
    self.rows = 4
    self.score = 0
    self.rep = [[0 for c in range(self.columns)] for r in range(self.rows)]
    first_position = (random.randint(0,3), random.randint(0,3), random.randrange(2,5,2))
    second_position = (random.randint(0,3), random.randint(0,3), random.randrange(2,5,2))
    while first_position[0] == second_position[0] and first_position[1] == second_position[1]:
      second_position = (random.randint(0,3), random.randint(0,3), random.randrange(2,5,2))
    self.rep[first_position[0]][first_position[1]] = first_position[2]
    self.rep[second_position[0]][second_position[1]] = second_position[2]
  
  def __str__(self):
    result = ""
    for r in range(self.rows):
      for c in range(self.columns):
        result += f"{self.rep[r][c]}\t"
      result += "\n"
    return(result)

  def combine(self, first_row, first_col, second_row, second_col):
    self.rep[first_row][first_col] *= 2
    self.score += self.rep[first_row][first_col]
    self.rep[second_row][second_col] = 0

  def new_block(self):
    new_block = (random.randint(0,3), random.randint(0,3), random.randrange(2,5,2))
    while self.rep[new_block[0]][new_block[1]] != 0:
      new_block = (random.randint(0,3), random.randint(0,3), random.randrange(2,5,2))
    self.rep[new_block[0]][new_block[1]] = new_block[2]

  def has_lost(self):
    lost = True
    for c in range(4):
      for r in range(4):
        if self.rep[r][c] == 0:
          lost = False
        if c != self.columns-1 and self.rep[r][c] == self.rep[r][c+1]:
          lost = False
        if r != self.rows-1 and self.rep[r][c] == self.rep[r+1][c]:
          lost = False
    return lost

  def up_shift(self):
    old_rep = copy.deepcopy(self.rep)
    for c in range(self.columns):
      for i in range(self.rows-1):
        for r in range(1,self.rows):
          if self.rep[r-1][c] == 0 and self.rep[r][c] != 0:
            self.rep[r-1][c] = self.rep[r][c]
            self.rep[r][c] = 0
      for r in range(1,self.rows):
        if self.rep[r-1][c] == self.rep[r][c]:
          self.combine(r-1,c,r,c)
      for i in range(self.rows-1):
        for r in range(1,self.rows):
          if self.rep[r-1][c] == 0 and self.rep[r][c] != 0:
            self.rep[r-1][c] = self.rep[r][c]
            self.rep[r][c] = 0
    if old_rep != self.rep:
      self.new_block()
        
  def down_shift(self):
    old_rep = copy.deepcopy(self.rep)
    for c in range(self.columns):
      for i in range(self.rows-1):
        for r in range(self.rows-2,-1,-1):
          if self.rep[r+1][c] == 0 and self.rep[r][c] != 0:
            self.rep[r+1][c] = self.rep[r][c]
            self.rep[r][c] = 0
      for r in range(self.rows-2,-1,-1):
          if self.rep[r+1][c] == self.rep[r][c]:
            self.combine(r+1,c,r,c)
      for i in range(self.rows-1):
        for r in range(self.rows-2,-1,-1):
          if self.rep[r+1][c] == 0 and self.rep[r][c] != 0:
            self.rep[r+1][c] = self.rep[r][c]
            self.rep[r][c] = 0
    if old_rep != self.rep:
      self.new_block()

  def left_shift(self):
    old_rep = copy.deepcopy(self.rep)
    for r in range(self.rows):
      for i in range(self.columns-1):
        for c in range(1,self.columns):
          if self.rep[r][c-1] == 0 and self.rep[r][c] != 0:
            self.rep[r][c-1] = self.rep[r][c]
            self.rep[r][c] = 0
      for c in range(1,self.columns):
        if self.rep[r][c-1] == self.rep[r][c]:
            self.combine(r,c-1,r,c)
      for i in range(self.columns-1):
        for c in range(1,self.columns):
          if self.rep[r][c-1] == 0 and self.rep[r][c] != 0:
            self.rep[r][c-1] = self.rep[r][c]
            self.rep[r][c] = 0
    if old_rep != self.rep:
      self.new_block()

  def right_shift(self):
    old_rep = copy.deepcopy(self.rep)
    for r in range(self.rows):
      for i in range(self.columns-1):
        for c in range(self.columns-2,-1,-1):
          if self.rep[r][c+1] == 0 and self.rep[r][c] != 0:
            self.rep[r][c+1] = self.rep[r][c]
            self.rep[r][c] = 0
      for c in range(self.columns-2,-1,-1):
        if self.rep[r][c+1] == self.rep[r][c]:
          self.combine(r,c+1,r,c)
      for i in range(self.columns-1):
        for c in range(self.columns-2,-1,-1):
          if self.rep[r][c+1] == 0 and self.rep[r][c] != 0:
            self.rep[r][c+1] = self.rep[r][c]
            self.rep[r][c] = 0
    if old_rep != self.rep:
      self.new_block()
  
  def play_human(self):
    direction = input("Direction: ")
    while direction != "":
      if direction == "u":
        self.up_shift()
      elif direction == "d":
        self.down_shift()
      elif direction == "l":
        self.left_shift()
      else:
        self.right_shift()
      print(self)
      if self.has_lost():
        print("you suck")
        break
      direction = input("Direction: ")
      clear_output()

  def play_network(self, network, print_output=False):
    while self.has_lost() == False:
      old = copy.deepcopy(self.rep)
      direction = network.forward(self).argmax().item()
      moves = [self.up_shift, self.right_shift, self.down_shift, self.left_shift]
      moves[direction]()
      if print_output:
        print(self)
        time.sleep(2)
        clear_output()
      if old == self.rep:
        break
    return self.score

In [None]:
game = GameBoard()
game.play_human()

0	2	0	0	
0	0	0	0	
0	0	0	4	
0	2	4	4	

Direction: up


In [None]:
class Harry(nn.Module):
    def __init__(self, game):
      super(Harry, self).__init__()
      self.input = nn.Linear(game.rows*game.columns*8,game.rows*game.columns)
      self.output = nn.Linear(game.rows*game.columns,4)

    def forward(self, game):
      game_input = torch.tensor([int(i) for row in game.rep for number in row for i in 
                    bin(int(math.log(number+1, 2)))[2:].zfill(8)], dtype=torch.float)
      intermediate = F.relu(self.input(game_input))
      output = self.output(intermediate)
      return output

In [None]:
def breed_granual(father, mother):
  child = Harry(GameBoard())
  params = list(child.named_parameters())
  m_params = list(mother.named_parameters())
  f_params = list(father.named_parameters())
  for set_params in range(0,len(params),2):
    for node in range(len(params[set_params][1])):
      for weight in range(len(params[set_params][1][node])):
        rand = random.choice([f_params[set_params][1][node][weight], m_params[set_params][1][node][weight]])
        params[set_params][1][node][weight] = rand

  for set_params in range(1,len(params),2):
    for bias in range(len(params[set_params][1])):
      rand = random.choice([f_params[set_params][1][bias], m_params[set_params][1][bias]])
      params[set_params][1][bias] = rand

  return child

def breed_genetics(father, mother):
  child = Harry(GameBoard())
  params = list(child.named_parameters())
  m_params = list(mother.named_parameters())
  f_params = list(father.named_parameters())
  for set_params in range(0,len(params),2):
    for node in range(len(params[set_params][1])):
      rand = random.choice([f_params[set_params][1][node], m_params[set_params][1][node]])
      params[set_params][1][node] = rand

  for set_params in range(1,len(params),2):
    for bias in range(len(params[set_params][1])):
      rand = random.choice([f_params[set_params][1][bias], m_params[set_params][1][bias]])
      params[set_params][1][bias] = rand

  return child

for breed, do we need to edit weight by weight or a row of weights at a time

In [None]:
num_networks = 20
num_games_network = 10
num_winners = 2
num_losers = 3
num_epochs = 50
breed = breed_granual

epoch_scores = []
networks = [Harry(GameBoard()) for i in range(num_networks)]

for epoch_index in range(num_epochs):
  scores = [sum([GameBoard().play_network(network) for i in range(num_games_network)]) for network in networks]

  winning_network_indices = sorted(range(len(scores)), key=lambda i: scores[i])[-num_winners:]
  remaining_networks = set([i for i in range(num_networks)]).difference(set(winning_network_indices))
  losing_network_indices = random.sample(remaining_networks, num_losers)

  winning_networks = [networks[index] for index in winning_network_indices]
  losing_networks = [networks[index] for index in losing_network_indices]
  winning_network_scores = [scores[index] for index in winning_network_indices]

  networks = winning_networks

  for i in range(num_networks-num_winners):

    new_network = breed(random.choice(winning_networks), random.choice(winning_networks))
    networks.append(new_network)

  epoch_scores.append(sum(winning_network_scores))

  if (epoch_index+1)%(int(num_epochs/10)) == 0:
    print(round((epoch_index+1)/num_epochs, 3))

0.1
0.2


KeyboardInterrupt: ignored

In [None]:
epoch_scores

In [None]:
test_network = networks[4]
GameBoard().play_network(test_network, print_output=True)